#include "embedsom_cuda.h"
#include "cooperative_groups.h"
#include "cooperative_groups/reduce.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

Include dependency graph for embedsom_cuda_projection.cu:

Classes
struct	EmbedSOMConstants< F >

struct	SharedNeighborStorage< F >
	Helper structure to transform neighbor distances to scores. More...

struct	NeighborScoreStorage< F >
	Helper structure to help transform neighbor distances to scores. More...

struct	RectangleIndexer
	Type tag for indexing by small rectangles. More...

Macros
#define	_CG_ABI_EXPERIMENTAL

#define	DOIT(X)

Functions
template<typename F >
__inline__ __device__ F	warpReduceSum (F val)
	Sum a value across the warp. More...

template<typename F , typename ArrayF >
__inline__ __device__ void	readAligned (F const __restrict__ dst, const F const __restrict__ src, const knn_entry< F > *const __restrict__ neighbors, const uint32_t n, const uint32_t dim, const uint32_t groupRank, const uint32_t groupSize)
	Read aligned data entries for EmbedSOM projection. More...

template<typename F >
__inline__ __device__ void	readAlignedGrid2D (F const __restrict__ dst, const F const __restrict__ src, const knn_entry< F > *const __restrict__ neighbors, const uint32_t n, const uint32_t groupRank, const uint32_t groupSize)
	Version of readAligned() for the 2D grid. More...

template<typename F >
__inline__ __device__ void	storeToCache (const uint32_t groupRank, const uint32_t groupSize, const F const __restrict__ points, F const __restrict__ pointsCache, const F const __restrict__ grid, F const __restrict__ gridCache, const F const __restrict__ grid2d, F const __restrict__ grid2dCache, const knn_entry< F > *const __restrict__ neighbors, const uint32_t dim, const uint32_t gridCacheLeadingDim, const uint32_t k)
	Load data into shared memory cache. More...

template<typename F >
__inline__ __device__ void	sortedDistsToScores (knn_entry< F > *const __restrict__ neighbors, const std::size_t adjustedK, const std::size_t k, const F boost)
	Compute scores for a vector of sorted distances. More...

template<typename F , class SCORE_STORAGE , class TILE >
__inline__ __device__ void	sortedDistsToScoresGroup (const TILE &tile, SCORE_STORAGE storage, const std::size_t adjustedK, const std::size_t k, const F boost)
	Convert the distances to scores using thread_block_tile and its built in functions for reduction and shuffle. More...

template<typename F >
__inline__ __device__ void	addGravity (const F score, const F const __restrict__ grid2DPoint, F const __restrict__ mtx)
	Add single-point approximation to the matrix. More...

template<typename F >
__inline__ __device__ void	addGravity2Wise (const F score, const F const __restrict__ grid2DPoint, F const __restrict__ mtx)
	Add single-point approximation to the matrix, on 2-vectors. More...

template<typename F >
__inline__ __device__ Vec< 2, F >::Type	euclideanProjection (const F const __restrict__ point, const F const __restrict__ gridPointI, const F *const __restrict__ gridPointJ, const uint32_t dim)
	Compute a projection of the point to a line defined by points I and J. More...

template<typename F >
__inline__ __device__ Vec< 2, F >::Type	euclideanProjection4Wise (const F const __restrict__ point, const F const __restrict__ gridPointI, const F *const __restrict__ gridPointJ, const uint32_t dim)
	Run the projection on 4-vectors. More...

template<typename F >
__inline__ __device__ void	addApproximation (const F scoreI, const F scoreJ, const F const __restrict__ grid2DPointI, const F const __restrict__ grid2DPointJ, const F adjust, const F scalarProjection, F *const __restrict__ mtx)
	Add the result of projections to the approximation matrix. More...

template<typename F >
__inline__ __device__ void	addApproximation2Wise (const F scoreI, const F scoreJ, const F const __restrict__ grid2DPointI, const F const __restrict__ grid2DPointJ, const F adjust, const F scalarProjection, F *const __restrict__ mtx)
	Same as addApproximation(), on 2-vectors. More...

template<typename INDEXER >
__inline__ __device__ uint2	getIndices (uint32_t plainIndex, uint32_t k)
	Function template for getting indexes. More...

template<>
__inline__ __device__ uint2	getIndices< RectangleIndexer > (uint32_t plainIndex, uint32_t k)
	Specialization of getIndices for the rectangle indexing. More...

template<typename F >
__global__ void	projectionBaseKernel (const F __restrict__ points, const F const __restrict__ grid, const F const __restrict__ grid2d, knn_entry< F > __restrict__ neighbors, F *__restrict__ projections, const uint32_t dim, const uint32_t n, const uint32_t gridSize, const uint32_t k, const F adjust, const F boost)
	Base projection kernel (each thread does everything for a single point). More...

template<typename F , typename INDEXER , size_t tileSize>
__global__ void	projectionAlignedShMemoryKernel (const F __restrict__ points, const F const __restrict__ grid, const F const __restrict__ grid2d, knn_entry< F > __restrict__ neighbors, F *__restrict__ projections, const uint32_t dim, const uint32_t n, const uint32_t gridSize, const uint32_t k, const F adjust, const F boost, const uint32_t groupSize, const uint32_t cacheLeadingDim)
	Aligned-shared-memory projection kernel. More...

Macro Definition Documentation

◆ _CG_ABI_EXPERIMENTAL

#define _CG_ABI_EXPERIMENTAL

Definition at line 32 of file embedsom_cuda_projection.cu.

◆ DOIT

#define DOIT ( X )

Value:

    tmp = tmpGridJ.X - tmpGridI.X;                                             \
    result.y += tmp * tmp;                                                     \
    result.x += tmp * (tmpPoint.X - tmpGridI.X)

Function Documentation

◆ addApproximation()

template<typename F >

__inline__ __device__ void addApproximation	(	const F	scoreI,
		const F	scoreJ,
		const F *const __restrict__	grid2DPointI,
		const F *const __restrict__	grid2DPointJ,
		const F	adjust,
		const F	scalarProjection,
		F *const __restrict__	mtx
	)

Add the result of projections to the approximation matrix.

Definition at line 379 of file embedsom_cuda_projection.cu.

Here is the caller graph for this function:

◆ addApproximation2Wise()

template<typename F >

__inline__ __device__ void addApproximation2Wise	(	const F	scoreI,
		const F	scoreJ,
		const F *const __restrict__	grid2DPointI,
		const F *const __restrict__	grid2DPointJ,
		const F	adjust,
		const F	scalarProjection,
		F *const __restrict__	mtx
	)

Same as addApproximation(), on 2-vectors.

Definition at line 415 of file embedsom_cuda_projection.cu.

Here is the caller graph for this function:

◆ addGravity()

template<typename F >

__inline__ __device__ void addGravity	(	const F	score,
		const F *const __restrict__	grid2DPoint,
		F *const __restrict__	mtx
	)

Add single-point approximation to the matrix.

Definition at line 285 of file embedsom_cuda_projection.cu.

Here is the caller graph for this function:

◆ addGravity2Wise()

template<typename F >

__inline__ __device__ void addGravity2Wise	(	const F	score,
		const F *const __restrict__	grid2DPoint,
		F *const __restrict__	mtx
	)

Add single-point approximation to the matrix, on 2-vectors.

Definition at line 300 of file embedsom_cuda_projection.cu.

Here is the caller graph for this function:

◆ euclideanProjection()

template<typename F >

__inline__ __device__ Vec< 2, F >::Type euclideanProjection	(	const F *const __restrict__	point,
		const F *const __restrict__	gridPointI,
		const F *const __restrict__	gridPointJ,
		const uint32_t	dim
	)

Compute a projection of the point to a line defined by points I and J.

Definition at line 318 of file embedsom_cuda_projection.cu.

◆ euclideanProjection4Wise()

template<typename F >

__inline__ __device__ Vec< 2, F >::Type euclideanProjection4Wise	(	const F *const __restrict__	point,
		const F *const __restrict__	gridPointI,
		const F *const __restrict__	gridPointJ,
		const uint32_t	dim
	)

Run the projection on 4-vectors.

Definition at line 335 of file embedsom_cuda_projection.cu.

◆ getIndices()

template<typename INDEXER >

__inline__ __device__ uint2 getIndices	(	uint32_t	plainIndex,
		uint32_t	k
	)

Function template for getting indexes.

Definition at line 469 of file embedsom_cuda_projection.cu.

◆ getIndices< RectangleIndexer >()

template<>

__inline__ __device__ uint2 getIndices< RectangleIndexer >	(	uint32_t	plainIndex,
		uint32_t	k
	)

Specialization of getIndices for the rectangle indexing.

Definition at line 475 of file embedsom_cuda_projection.cu.

◆ projectionAlignedShMemoryKernel()

template<typename F , typename INDEXER , size_t tileSize>

__global__ void projectionAlignedShMemoryKernel	(	const F *__restrict__	points,
		const F *const __restrict__	grid,
		const F *const __restrict__	grid2d,
		knn_entry< F > *__restrict__	neighbors,
		F *__restrict__	projections,
		const uint32_t	dim,
		const uint32_t	n,
		const uint32_t	gridSize,
		const uint32_t	k,
		const F	adjust,
		const F	boost,
		const uint32_t	groupSize,
		const uint32_t	cacheLeadingDim
	)

Aligned-shared-memory projection kernel.

One block computes embedding for one point, using CUB block reduce for matrix reduction.

Definition at line 552 of file embedsom_cuda_projection.cu.

Here is the call graph for this function:

◆ projectionBaseKernel()

template<typename F >

__global__ void projectionBaseKernel	(	const F *__restrict__	points,
		const F *const __restrict__	grid,
		const F *const __restrict__	grid2d,
		knn_entry< F > *__restrict__	neighbors,
		F *__restrict__	projections,
		const uint32_t	dim,
		const uint32_t	n,
		const uint32_t	gridSize,
		const uint32_t	k,
		const F	adjust,
		const F	boost
	)

Base projection kernel (each thread does everything for a single point).

Definition at line 490 of file embedsom_cuda_projection.cu.

Here is the call graph for this function:

◆ readAligned()

template<typename F , typename ArrayF >

__inline__ __device__ void readAligned	(	F *const __restrict__	dst,
		const F *const __restrict__	src,
		const knn_entry< F > *const __restrict__	neighbors,
		const uint32_t	n,
		const uint32_t	dim,
		const uint32_t	groupRank,
		const uint32_t	groupSize
	)

Read aligned data entries for EmbedSOM projection.

Definition at line 105 of file embedsom_cuda_projection.cu.

◆ readAlignedGrid2D()

template<typename F >

__inline__ __device__ void readAlignedGrid2D	(	F *const __restrict__	dst,
		const F *const __restrict__	src,
		const knn_entry< F > *const __restrict__	neighbors,
		const uint32_t	n,
		const uint32_t	groupRank,
		const uint32_t	groupSize
	)

Version of readAligned() for the 2D grid.

Definition at line 132 of file embedsom_cuda_projection.cu.

◆ sortedDistsToScores()

template<typename F >

__inline__ __device__ void sortedDistsToScores	(	knn_entry< F > *const __restrict__	neighbors,
		const std::size_t	adjustedK,
		const std::size_t	k,
		const F	boost
	)

Compute scores for a vector of sorted distances.

Definition at line 184 of file embedsom_cuda_projection.cu.

Here is the call graph for this function:

◆ sortedDistsToScoresGroup()

template<typename F , class SCORE_STORAGE , class TILE >

__inline__ __device__ void sortedDistsToScoresGroup	(	const TILE &	tile,
		SCORE_STORAGE	storage,
		const std::size_t	adjustedK,
		const std::size_t	k,
		const F	boost
	)

Convert the distances to scores using thread_block_tile and its built in functions for reduction and shuffle.

Definition at line 222 of file embedsom_cuda_projection.cu.

◆ storeToCache()

template<typename F >

__inline__ __device__ void storeToCache	(	const uint32_t	groupRank,
		const uint32_t	groupSize,
		const F *const __restrict__	points,
		F *const __restrict__	pointsCache,
		const F *const __restrict__	grid,
		F *const __restrict__	gridCache,
		const F *const __restrict__	grid2d,
		F *const __restrict__	grid2dCache,
		const knn_entry< F > *const __restrict__	neighbors,
		const uint32_t	dim,
		const uint32_t	gridCacheLeadingDim,
		const uint32_t	k
	)

Load data into shared memory cache.

Definition at line 151 of file embedsom_cuda_projection.cu.

Here is the caller graph for this function:

◆ warpReduceSum()

template<typename F >

__inline__ __device__ F warpReduceSum ( F val )

Sum a value across the warp.

Definition at line 95 of file embedsom_cuda_projection.cu.

Classes

Macros

Functions

Macro Definition Documentation

◆ _CG_ABI_EXPERIMENTAL

◆ DOIT

Function Documentation

◆ addApproximation()

◆ addApproximation2Wise()

◆ addGravity()

◆ addGravity2Wise()

◆ euclideanProjection()

◆ euclideanProjection4Wise()

◆ getIndices()

◆ getIndices< RectangleIndexer >()

◆ projectionAlignedShMemoryKernel()

◆ projectionBaseKernel()

◆ readAligned()

◆ readAlignedGrid2D()

◆ sortedDistsToScores()

◆ sortedDistsToScoresGroup()

◆ storeToCache()

◆ warpReduceSum()