BlosSOM
Interactive dimensionality reduction on large datasets (EmbedSOM and FLOWER combined)
scatter_model.cpp
Go to the documentation of this file.
1/* This file is part of BlosSOM.
2 *
3 * Copyright (C) 2021 Mirek Kratochvil
4 * Sona Molnarova
5 *
6 * BlosSOM is free software: you can redistribute it and/or modify it under
7 * the terms of the GNU General Public License as published by the Free
8 * Software Foundation, either version 3 of the License, or (at your option)
9 * any later version.
10 *
11 * BlosSOM is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * BlosSOM. If not, see <https://www.gnu.org/licenses/>.
18 */
19
20#include "scatter_model.h"
21#include "embedsom.h"
22
23#ifdef ENABLE_CUDA
24#include "embedsom_cuda.h"
25#endif
26
27void
29 const LandmarkModel &lm,
30 const TrainingConfig &tc,
31 FrameStats &frame_stats)
32{
33 if (dirty(d)) {
34 points.resize(d.n);
35 refresh(d);
36 clean(d);
37 frame_stats.reset(frame_stats.embedsom_t);
39 }
40
41 if (lm_watch.dirty(lm)) {
42 refresh(d);
43 lm_watch.clean(lm);
44 }
45
46 // It gives the beginning index ri of the data that should be
47 // processed and the number of elements rn that should be
48 // processed. The number of the elements can be zero if nothing
49 // has to be recomputed.
50 auto [ri, rn] = dirty_range(d);
51 if (!rn) {
52 frame_stats.reset(frame_stats.embedsom_t);
53 return;
54 }
55
56 const size_t max_points = batch_size_gen.next(
57 frame_stats.embedsom_t, frame_stats.embedsom_duration);
58
59 // If the number of elements that need to be recomputed is larger
60 // than the maximum possible points that can be processed in this
61 // frame, the number of elements lowers to this value.
62 if (rn > max_points)
63 rn = max_points;
64
65 if (lm.d != d.dim()) {
66 frame_stats.reset(frame_stats.embedsom_t);
68 return;
69 }
70
71 // Say that rn data in the cache will be refreshed. Where rn is the
72 // number of the data that will be refreshed.
73 clean_range(d, rn);
74
75 auto do_embedsom = [&](size_t from, size_t n) {
76 frame_stats.add_const_time();
77
78#ifdef ENABLE_CUDA
79 embedsom_cuda.run
80#else
82#endif
83 (n,
84 lm.n_landmarks(),
85 d.dim(),
86 tc.boost,
87 tc.topn,
88 tc.adjust,
89 d.data.data() + d.dim() * from,
90 lm.hidim_vertices.data(),
91 &lm.lodim_vertices[0][0],
92 &points[from][0]);
93
94 frame_stats.store_time(frame_stats.embedsom_t);
95 };
96
97 // If the index in the elements is over the size of the data
98 // It means it is cyclic and needs to continue from the
99 // beginning of the data.
100 if (ri + rn >= d.n) {
101 // So firstly the elements that remain to the end of the data
102 // are processed.
103 size_t diff = d.n - ri;
104 do_embedsom(ri, diff);
105 // Then the index cycles back to the beginning
106 ri = 0;
107 // And the number of elements that need to be processed
108 // is lowered by the already processed elements.
109 rn -= diff;
110 }
111
112 // Process the elements that are left to processing.
113 if (rn)
114 do_embedsom(ri, rn);
115}
size_t next(float T, float t)
Computes size of the next batch.
void embedsom(size_t n, size_t n_landmarks, size_t dim, float boost, size_t topn, float adjust, const float *points, const float *hidim_lm, const float *lodim_lm, float *embedding)
Definition: embedsom.cpp:422
void clean(const Dirt &d)
Call this when the cache is refreshed.
Definition: dirty.h:67
bool dirty(const Dirt &d)
Returns true if the cache needs to be refreshed.
Definition: dirty.h:60
size_t n
Definition: dirty.h:83
void add_const_time()
Definition: frame_stats.h:73
void store_time(float &to)
Definition: frame_stats.h:79
void reset(float &t)
Definition: frame_stats.h:85
float embedsom_duration
Definition: frame_stats.h:41
float embedsom_t
Definition: frame_stats.h:31
Model of the high- and low-dimensional landmarks.
size_t n_landmarks() const
Reurns number of the 2D landmarks.
std::vector< glm::vec2 > lodim_vertices
Array storing two-dimensional landmark coordinates.
std::vector< float > hidim_vertices
One-dimensional array storing d-dimensional landmark coordinates in row-major order.
size_t d
Dimension size.
Storage of the scaled data.
Definition: scaled_data.h:53
std::vector< float > data
Scaled data in the same format as DataModel::data.
Definition: scaled_data.h:55
size_t dim() const
Returns dimension of the scaled data.
Definition: scaled_data.h:66
std::vector< glm::vec2 > points
Coordinates of the two-dimensional data points.
Definition: scatter_model.h:44
BatchSizeGen batch_size_gen
Definition: scatter_model.h:52
void update(const ScaledData &d, const LandmarkModel &lm, const TrainingConfig &tc, FrameStats &frame_stats)
Recomputes the coordinates if any of the the parameters of the embedsom algorithm has changed.
Cleaner lm_watch
Definition: scatter_model.h:50
std::tuple< size_t, size_t > dirty_range(const Dirts &d)
Find the range to refresh.
Definition: dirty.h:113
void clean_range(const Dirts &d, size_t n)
Clean a range of the cache.
Definition: dirty.h:130
void refresh(const Dirts &d)
Force-refresh the whole range.
Definition: dirty.h:105
Storage of the dynamic parameters of the algorithms that are set in the GUI by user.
float boost
Boost value for EmbedSOM algorithm.
float adjust
Adjust value for EmbedSOM algorithm.
int topn
Landmark neighborhood size value for EmbedSOM algorithm.