BlosSOM
Interactive dimensionality reduction on large datasets (EmbedSOM and FLOWER combined)
Classes | Macros | Functions
embedsom_cuda_projection.cu File Reference
#include "embedsom_cuda.h"
#include "cooperative_groups.h"
#include "cooperative_groups/reduce.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
Include dependency graph for embedsom_cuda_projection.cu:

Go to the source code of this file.

Classes

struct  EmbedSOMConstants< F >
 
struct  SharedNeighborStorage< F >
 Helper structure to transform neighbor distances to scores. More...
 
struct  NeighborScoreStorage< F >
 Helper structure to help transform neighbor distances to scores. More...
 
struct  RectangleIndexer
 Type tag for indexing by small rectangles. More...
 

Macros

#define _CG_ABI_EXPERIMENTAL
 
#define DOIT(X)
 

Functions

template<typename F >
__inline__ __device__ F warpReduceSum (F val)
 Sum a value across the warp. More...
 
template<typename F , typename ArrayF >
__inline__ __device__ void readAligned (F *const __restrict__ dst, const F *const __restrict__ src, const knn_entry< F > *const __restrict__ neighbors, const uint32_t n, const uint32_t dim, const uint32_t groupRank, const uint32_t groupSize)
 Read aligned data entries for EmbedSOM projection. More...
 
template<typename F >
__inline__ __device__ void readAlignedGrid2D (F *const __restrict__ dst, const F *const __restrict__ src, const knn_entry< F > *const __restrict__ neighbors, const uint32_t n, const uint32_t groupRank, const uint32_t groupSize)
 Version of readAligned() for the 2D grid. More...
 
template<typename F >
__inline__ __device__ void storeToCache (const uint32_t groupRank, const uint32_t groupSize, const F *const __restrict__ points, F *const __restrict__ pointsCache, const F *const __restrict__ grid, F *const __restrict__ gridCache, const F *const __restrict__ grid2d, F *const __restrict__ grid2dCache, const knn_entry< F > *const __restrict__ neighbors, const uint32_t dim, const uint32_t gridCacheLeadingDim, const uint32_t k)
 Load data into shared memory cache. More...
 
template<typename F >
__inline__ __device__ void sortedDistsToScores (knn_entry< F > *const __restrict__ neighbors, const std::size_t adjustedK, const std::size_t k, const F boost)
 Compute scores for a vector of sorted distances. More...
 
template<typename F , class SCORE_STORAGE , class TILE >
__inline__ __device__ void sortedDistsToScoresGroup (const TILE &tile, SCORE_STORAGE storage, const std::size_t adjustedK, const std::size_t k, const F boost)
 Convert the distances to scores using thread_block_tile and its built in functions for reduction and shuffle. More...
 
template<typename F >
__inline__ __device__ void addGravity (const F score, const F *const __restrict__ grid2DPoint, F *const __restrict__ mtx)
 Add single-point approximation to the matrix. More...
 
template<typename F >
__inline__ __device__ void addGravity2Wise (const F score, const F *const __restrict__ grid2DPoint, F *const __restrict__ mtx)
 Add single-point approximation to the matrix, on 2-vectors. More...
 
template<typename F >
__inline__ __device__ Vec< 2, F >::Type euclideanProjection (const F *const __restrict__ point, const F *const __restrict__ gridPointI, const F *const __restrict__ gridPointJ, const uint32_t dim)
 Compute a projection of the point to a line defined by points I and J. More...
 
template<typename F >
__inline__ __device__ Vec< 2, F >::Type euclideanProjection4Wise (const F *const __restrict__ point, const F *const __restrict__ gridPointI, const F *const __restrict__ gridPointJ, const uint32_t dim)
 Run the projection on 4-vectors. More...
 
template<typename F >
__inline__ __device__ void addApproximation (const F scoreI, const F scoreJ, const F *const __restrict__ grid2DPointI, const F *const __restrict__ grid2DPointJ, const F adjust, const F scalarProjection, F *const __restrict__ mtx)
 Add the result of projections to the approximation matrix. More...
 
template<typename F >
__inline__ __device__ void addApproximation2Wise (const F scoreI, const F scoreJ, const F *const __restrict__ grid2DPointI, const F *const __restrict__ grid2DPointJ, const F adjust, const F scalarProjection, F *const __restrict__ mtx)
 Same as addApproximation(), on 2-vectors. More...
 
template<typename INDEXER >
__inline__ __device__ uint2 getIndices (uint32_t plainIndex, uint32_t k)
 Function template for getting indexes. More...
 
template<>
__inline__ __device__ uint2 getIndices< RectangleIndexer > (uint32_t plainIndex, uint32_t k)
 Specialization of getIndices for the rectangle indexing. More...
 
template<typename F >
__global__ void projectionBaseKernel (const F *__restrict__ points, const F *const __restrict__ grid, const F *const __restrict__ grid2d, knn_entry< F > *__restrict__ neighbors, F *__restrict__ projections, const uint32_t dim, const uint32_t n, const uint32_t gridSize, const uint32_t k, const F adjust, const F boost)
 Base projection kernel (each thread does everything for a single point). More...
 
template<typename F , typename INDEXER , size_t tileSize>
__global__ void projectionAlignedShMemoryKernel (const F *__restrict__ points, const F *const __restrict__ grid, const F *const __restrict__ grid2d, knn_entry< F > *__restrict__ neighbors, F *__restrict__ projections, const uint32_t dim, const uint32_t n, const uint32_t gridSize, const uint32_t k, const F adjust, const F boost, const uint32_t groupSize, const uint32_t cacheLeadingDim)
 Aligned-shared-memory projection kernel. More...
 

Macro Definition Documentation

◆ _CG_ABI_EXPERIMENTAL

#define _CG_ABI_EXPERIMENTAL

Definition at line 32 of file embedsom_cuda_projection.cu.

◆ DOIT

#define DOIT (   X)
Value:
tmp = tmpGridJ.X - tmpGridI.X; \
result.y += tmp * tmp; \
result.x += tmp * (tmpPoint.X - tmpGridI.X)

Function Documentation

◆ addApproximation()

template<typename F >
__inline__ __device__ void addApproximation ( const F  scoreI,
const F  scoreJ,
const F *const __restrict__  grid2DPointI,
const F *const __restrict__  grid2DPointJ,
const F  adjust,
const F  scalarProjection,
F *const __restrict__  mtx 
)

Add the result of projections to the approximation matrix.

Definition at line 379 of file embedsom_cuda_projection.cu.

Here is the caller graph for this function:

◆ addApproximation2Wise()

template<typename F >
__inline__ __device__ void addApproximation2Wise ( const F  scoreI,
const F  scoreJ,
const F *const __restrict__  grid2DPointI,
const F *const __restrict__  grid2DPointJ,
const F  adjust,
const F  scalarProjection,
F *const __restrict__  mtx 
)

Same as addApproximation(), on 2-vectors.

Definition at line 415 of file embedsom_cuda_projection.cu.

Here is the caller graph for this function:

◆ addGravity()

template<typename F >
__inline__ __device__ void addGravity ( const F  score,
const F *const __restrict__  grid2DPoint,
F *const __restrict__  mtx 
)

Add single-point approximation to the matrix.

Definition at line 285 of file embedsom_cuda_projection.cu.

Here is the caller graph for this function:

◆ addGravity2Wise()

template<typename F >
__inline__ __device__ void addGravity2Wise ( const F  score,
const F *const __restrict__  grid2DPoint,
F *const __restrict__  mtx 
)

Add single-point approximation to the matrix, on 2-vectors.

Definition at line 300 of file embedsom_cuda_projection.cu.

Here is the caller graph for this function:

◆ euclideanProjection()

template<typename F >
__inline__ __device__ Vec< 2, F >::Type euclideanProjection ( const F *const __restrict__  point,
const F *const __restrict__  gridPointI,
const F *const __restrict__  gridPointJ,
const uint32_t  dim 
)

Compute a projection of the point to a line defined by points I and J.

Definition at line 318 of file embedsom_cuda_projection.cu.

◆ euclideanProjection4Wise()

template<typename F >
__inline__ __device__ Vec< 2, F >::Type euclideanProjection4Wise ( const F *const __restrict__  point,
const F *const __restrict__  gridPointI,
const F *const __restrict__  gridPointJ,
const uint32_t  dim 
)

Run the projection on 4-vectors.

Definition at line 335 of file embedsom_cuda_projection.cu.

◆ getIndices()

template<typename INDEXER >
__inline__ __device__ uint2 getIndices ( uint32_t  plainIndex,
uint32_t  k 
)

Function template for getting indexes.

Definition at line 469 of file embedsom_cuda_projection.cu.

◆ getIndices< RectangleIndexer >()

template<>
__inline__ __device__ uint2 getIndices< RectangleIndexer > ( uint32_t  plainIndex,
uint32_t  k 
)

Specialization of getIndices for the rectangle indexing.

Definition at line 475 of file embedsom_cuda_projection.cu.

◆ projectionAlignedShMemoryKernel()

template<typename F , typename INDEXER , size_t tileSize>
__global__ void projectionAlignedShMemoryKernel ( const F *__restrict__  points,
const F *const __restrict__  grid,
const F *const __restrict__  grid2d,
knn_entry< F > *__restrict__  neighbors,
F *__restrict__  projections,
const uint32_t  dim,
const uint32_t  n,
const uint32_t  gridSize,
const uint32_t  k,
const F  adjust,
const F  boost,
const uint32_t  groupSize,
const uint32_t  cacheLeadingDim 
)

Aligned-shared-memory projection kernel.

One block computes embedding for one point, using CUB block reduce for matrix reduction.

Definition at line 552 of file embedsom_cuda_projection.cu.

Here is the call graph for this function:

◆ projectionBaseKernel()

template<typename F >
__global__ void projectionBaseKernel ( const F *__restrict__  points,
const F *const __restrict__  grid,
const F *const __restrict__  grid2d,
knn_entry< F > *__restrict__  neighbors,
F *__restrict__  projections,
const uint32_t  dim,
const uint32_t  n,
const uint32_t  gridSize,
const uint32_t  k,
const F  adjust,
const F  boost 
)

Base projection kernel (each thread does everything for a single point).

Definition at line 490 of file embedsom_cuda_projection.cu.

Here is the call graph for this function:

◆ readAligned()

template<typename F , typename ArrayF >
__inline__ __device__ void readAligned ( F *const __restrict__  dst,
const F *const __restrict__  src,
const knn_entry< F > *const __restrict__  neighbors,
const uint32_t  n,
const uint32_t  dim,
const uint32_t  groupRank,
const uint32_t  groupSize 
)

Read aligned data entries for EmbedSOM projection.

Definition at line 105 of file embedsom_cuda_projection.cu.

◆ readAlignedGrid2D()

template<typename F >
__inline__ __device__ void readAlignedGrid2D ( F *const __restrict__  dst,
const F *const __restrict__  src,
const knn_entry< F > *const __restrict__  neighbors,
const uint32_t  n,
const uint32_t  groupRank,
const uint32_t  groupSize 
)

Version of readAligned() for the 2D grid.

Definition at line 132 of file embedsom_cuda_projection.cu.

◆ sortedDistsToScores()

template<typename F >
__inline__ __device__ void sortedDistsToScores ( knn_entry< F > *const __restrict__  neighbors,
const std::size_t  adjustedK,
const std::size_t  k,
const F  boost 
)

Compute scores for a vector of sorted distances.

Definition at line 184 of file embedsom_cuda_projection.cu.

Here is the call graph for this function:

◆ sortedDistsToScoresGroup()

template<typename F , class SCORE_STORAGE , class TILE >
__inline__ __device__ void sortedDistsToScoresGroup ( const TILE &  tile,
SCORE_STORAGE  storage,
const std::size_t  adjustedK,
const std::size_t  k,
const F  boost 
)

Convert the distances to scores using thread_block_tile and its built in functions for reduction and shuffle.

Definition at line 222 of file embedsom_cuda_projection.cu.

◆ storeToCache()

template<typename F >
__inline__ __device__ void storeToCache ( const uint32_t  groupRank,
const uint32_t  groupSize,
const F *const __restrict__  points,
F *const __restrict__  pointsCache,
const F *const __restrict__  grid,
F *const __restrict__  gridCache,
const F *const __restrict__  grid2d,
F *const __restrict__  grid2dCache,
const knn_entry< F > *const __restrict__  neighbors,
const uint32_t  dim,
const uint32_t  gridCacheLeadingDim,
const uint32_t  k 
)

Load data into shared memory cache.

Definition at line 151 of file embedsom_cuda_projection.cu.

Here is the caller graph for this function:

◆ warpReduceSum()

template<typename F >
__inline__ __device__ F warpReduceSum ( val)

Sum a value across the warp.

Definition at line 95 of file embedsom_cuda_projection.cu.