BlosSOM
Interactive dimensionality reduction on large datasets (EmbedSOM and FLOWER combined)
|
#include "embedsom_cuda.h"
#include "cooperative_groups.h"
#include "cooperative_groups/reduce.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
Go to the source code of this file.
Classes | |
struct | EmbedSOMConstants< F > |
struct | SharedNeighborStorage< F > |
Helper structure to transform neighbor distances to scores. More... | |
struct | NeighborScoreStorage< F > |
Helper structure to help transform neighbor distances to scores. More... | |
struct | RectangleIndexer |
Type tag for indexing by small rectangles. More... | |
Macros | |
#define | _CG_ABI_EXPERIMENTAL |
#define | DOIT(X) |
Functions | |
template<typename F > | |
__inline__ __device__ F | warpReduceSum (F val) |
Sum a value across the warp. More... | |
template<typename F , typename ArrayF > | |
__inline__ __device__ void | readAligned (F *const __restrict__ dst, const F *const __restrict__ src, const knn_entry< F > *const __restrict__ neighbors, const uint32_t n, const uint32_t dim, const uint32_t groupRank, const uint32_t groupSize) |
Read aligned data entries for EmbedSOM projection. More... | |
template<typename F > | |
__inline__ __device__ void | readAlignedGrid2D (F *const __restrict__ dst, const F *const __restrict__ src, const knn_entry< F > *const __restrict__ neighbors, const uint32_t n, const uint32_t groupRank, const uint32_t groupSize) |
Version of readAligned() for the 2D grid. More... | |
template<typename F > | |
__inline__ __device__ void | storeToCache (const uint32_t groupRank, const uint32_t groupSize, const F *const __restrict__ points, F *const __restrict__ pointsCache, const F *const __restrict__ grid, F *const __restrict__ gridCache, const F *const __restrict__ grid2d, F *const __restrict__ grid2dCache, const knn_entry< F > *const __restrict__ neighbors, const uint32_t dim, const uint32_t gridCacheLeadingDim, const uint32_t k) |
Load data into shared memory cache. More... | |
template<typename F > | |
__inline__ __device__ void | sortedDistsToScores (knn_entry< F > *const __restrict__ neighbors, const std::size_t adjustedK, const std::size_t k, const F boost) |
Compute scores for a vector of sorted distances. More... | |
template<typename F , class SCORE_STORAGE , class TILE > | |
__inline__ __device__ void | sortedDistsToScoresGroup (const TILE &tile, SCORE_STORAGE storage, const std::size_t adjustedK, const std::size_t k, const F boost) |
Convert the distances to scores using thread_block_tile and its built in functions for reduction and shuffle. More... | |
template<typename F > | |
__inline__ __device__ void | addGravity (const F score, const F *const __restrict__ grid2DPoint, F *const __restrict__ mtx) |
Add single-point approximation to the matrix. More... | |
template<typename F > | |
__inline__ __device__ void | addGravity2Wise (const F score, const F *const __restrict__ grid2DPoint, F *const __restrict__ mtx) |
Add single-point approximation to the matrix, on 2-vectors. More... | |
template<typename F > | |
__inline__ __device__ Vec< 2, F >::Type | euclideanProjection (const F *const __restrict__ point, const F *const __restrict__ gridPointI, const F *const __restrict__ gridPointJ, const uint32_t dim) |
Compute a projection of the point to a line defined by points I and J. More... | |
template<typename F > | |
__inline__ __device__ Vec< 2, F >::Type | euclideanProjection4Wise (const F *const __restrict__ point, const F *const __restrict__ gridPointI, const F *const __restrict__ gridPointJ, const uint32_t dim) |
Run the projection on 4-vectors. More... | |
template<typename F > | |
__inline__ __device__ void | addApproximation (const F scoreI, const F scoreJ, const F *const __restrict__ grid2DPointI, const F *const __restrict__ grid2DPointJ, const F adjust, const F scalarProjection, F *const __restrict__ mtx) |
Add the result of projections to the approximation matrix. More... | |
template<typename F > | |
__inline__ __device__ void | addApproximation2Wise (const F scoreI, const F scoreJ, const F *const __restrict__ grid2DPointI, const F *const __restrict__ grid2DPointJ, const F adjust, const F scalarProjection, F *const __restrict__ mtx) |
Same as addApproximation(), on 2-vectors. More... | |
template<typename INDEXER > | |
__inline__ __device__ uint2 | getIndices (uint32_t plainIndex, uint32_t k) |
Function template for getting indexes. More... | |
template<> | |
__inline__ __device__ uint2 | getIndices< RectangleIndexer > (uint32_t plainIndex, uint32_t k) |
Specialization of getIndices for the rectangle indexing. More... | |
template<typename F > | |
__global__ void | projectionBaseKernel (const F *__restrict__ points, const F *const __restrict__ grid, const F *const __restrict__ grid2d, knn_entry< F > *__restrict__ neighbors, F *__restrict__ projections, const uint32_t dim, const uint32_t n, const uint32_t gridSize, const uint32_t k, const F adjust, const F boost) |
Base projection kernel (each thread does everything for a single point). More... | |
template<typename F , typename INDEXER , size_t tileSize> | |
__global__ void | projectionAlignedShMemoryKernel (const F *__restrict__ points, const F *const __restrict__ grid, const F *const __restrict__ grid2d, knn_entry< F > *__restrict__ neighbors, F *__restrict__ projections, const uint32_t dim, const uint32_t n, const uint32_t gridSize, const uint32_t k, const F adjust, const F boost, const uint32_t groupSize, const uint32_t cacheLeadingDim) |
Aligned-shared-memory projection kernel. More... | |
#define _CG_ABI_EXPERIMENTAL |
Definition at line 32 of file embedsom_cuda_projection.cu.
#define DOIT | ( | X | ) |
__inline__ __device__ void addApproximation | ( | const F | scoreI, |
const F | scoreJ, | ||
const F *const __restrict__ | grid2DPointI, | ||
const F *const __restrict__ | grid2DPointJ, | ||
const F | adjust, | ||
const F | scalarProjection, | ||
F *const __restrict__ | mtx | ||
) |
Add the result of projections to the approximation matrix.
Definition at line 379 of file embedsom_cuda_projection.cu.
__inline__ __device__ void addApproximation2Wise | ( | const F | scoreI, |
const F | scoreJ, | ||
const F *const __restrict__ | grid2DPointI, | ||
const F *const __restrict__ | grid2DPointJ, | ||
const F | adjust, | ||
const F | scalarProjection, | ||
F *const __restrict__ | mtx | ||
) |
Same as addApproximation(), on 2-vectors.
Definition at line 415 of file embedsom_cuda_projection.cu.
__inline__ __device__ void addGravity | ( | const F | score, |
const F *const __restrict__ | grid2DPoint, | ||
F *const __restrict__ | mtx | ||
) |
Add single-point approximation to the matrix.
Definition at line 285 of file embedsom_cuda_projection.cu.
__inline__ __device__ void addGravity2Wise | ( | const F | score, |
const F *const __restrict__ | grid2DPoint, | ||
F *const __restrict__ | mtx | ||
) |
Add single-point approximation to the matrix, on 2-vectors.
Definition at line 300 of file embedsom_cuda_projection.cu.
__inline__ __device__ Vec< 2, F >::Type euclideanProjection | ( | const F *const __restrict__ | point, |
const F *const __restrict__ | gridPointI, | ||
const F *const __restrict__ | gridPointJ, | ||
const uint32_t | dim | ||
) |
Compute a projection of the point to a line defined by points I and J.
Definition at line 318 of file embedsom_cuda_projection.cu.
__inline__ __device__ Vec< 2, F >::Type euclideanProjection4Wise | ( | const F *const __restrict__ | point, |
const F *const __restrict__ | gridPointI, | ||
const F *const __restrict__ | gridPointJ, | ||
const uint32_t | dim | ||
) |
Run the projection on 4-vectors.
Definition at line 335 of file embedsom_cuda_projection.cu.
__inline__ __device__ uint2 getIndices | ( | uint32_t | plainIndex, |
uint32_t | k | ||
) |
Function template for getting indexes.
Definition at line 469 of file embedsom_cuda_projection.cu.
__inline__ __device__ uint2 getIndices< RectangleIndexer > | ( | uint32_t | plainIndex, |
uint32_t | k | ||
) |
Specialization of getIndices for the rectangle indexing.
Definition at line 475 of file embedsom_cuda_projection.cu.
__global__ void projectionAlignedShMemoryKernel | ( | const F *__restrict__ | points, |
const F *const __restrict__ | grid, | ||
const F *const __restrict__ | grid2d, | ||
knn_entry< F > *__restrict__ | neighbors, | ||
F *__restrict__ | projections, | ||
const uint32_t | dim, | ||
const uint32_t | n, | ||
const uint32_t | gridSize, | ||
const uint32_t | k, | ||
const F | adjust, | ||
const F | boost, | ||
const uint32_t | groupSize, | ||
const uint32_t | cacheLeadingDim | ||
) |
Aligned-shared-memory projection kernel.
One block computes embedding for one point, using CUB block reduce for matrix reduction.
Definition at line 552 of file embedsom_cuda_projection.cu.
__global__ void projectionBaseKernel | ( | const F *__restrict__ | points, |
const F *const __restrict__ | grid, | ||
const F *const __restrict__ | grid2d, | ||
knn_entry< F > *__restrict__ | neighbors, | ||
F *__restrict__ | projections, | ||
const uint32_t | dim, | ||
const uint32_t | n, | ||
const uint32_t | gridSize, | ||
const uint32_t | k, | ||
const F | adjust, | ||
const F | boost | ||
) |
Base projection kernel (each thread does everything for a single point).
Definition at line 490 of file embedsom_cuda_projection.cu.
__inline__ __device__ void readAligned | ( | F *const __restrict__ | dst, |
const F *const __restrict__ | src, | ||
const knn_entry< F > *const __restrict__ | neighbors, | ||
const uint32_t | n, | ||
const uint32_t | dim, | ||
const uint32_t | groupRank, | ||
const uint32_t | groupSize | ||
) |
Read aligned data entries for EmbedSOM projection.
Definition at line 105 of file embedsom_cuda_projection.cu.
__inline__ __device__ void readAlignedGrid2D | ( | F *const __restrict__ | dst, |
const F *const __restrict__ | src, | ||
const knn_entry< F > *const __restrict__ | neighbors, | ||
const uint32_t | n, | ||
const uint32_t | groupRank, | ||
const uint32_t | groupSize | ||
) |
Version of readAligned() for the 2D grid.
Definition at line 132 of file embedsom_cuda_projection.cu.
__inline__ __device__ void sortedDistsToScores | ( | knn_entry< F > *const __restrict__ | neighbors, |
const std::size_t | adjustedK, | ||
const std::size_t | k, | ||
const F | boost | ||
) |
Compute scores for a vector of sorted distances.
Definition at line 184 of file embedsom_cuda_projection.cu.
__inline__ __device__ void sortedDistsToScoresGroup | ( | const TILE & | tile, |
SCORE_STORAGE | storage, | ||
const std::size_t | adjustedK, | ||
const std::size_t | k, | ||
const F | boost | ||
) |
Convert the distances to scores using thread_block_tile and its built in functions for reduction and shuffle.
Definition at line 222 of file embedsom_cuda_projection.cu.
__inline__ __device__ void storeToCache | ( | const uint32_t | groupRank, |
const uint32_t | groupSize, | ||
const F *const __restrict__ | points, | ||
F *const __restrict__ | pointsCache, | ||
const F *const __restrict__ | grid, | ||
F *const __restrict__ | gridCache, | ||
const F *const __restrict__ | grid2d, | ||
F *const __restrict__ | grid2dCache, | ||
const knn_entry< F > *const __restrict__ | neighbors, | ||
const uint32_t | dim, | ||
const uint32_t | gridCacheLeadingDim, | ||
const uint32_t | k | ||
) |
Load data into shared memory cache.
Definition at line 151 of file embedsom_cuda_projection.cu.
__inline__ __device__ F warpReduceSum | ( | F | val | ) |
Sum a value across the warp.
Definition at line 95 of file embedsom_cuda_projection.cu.