BlosSOM
Interactive dimensionality reduction on large datasets (EmbedSOM and FLOWER combined)
kmeans_landmark.cpp
Go to the documentation of this file.
1/* This file is part of BlosSOM.
2 *
3 * Copyright (C) 2021 Mirek Kratochvil
4 *
5 * BlosSOM is free software: you can redistribute it and/or modify it under
6 * the terms of the GNU General Public License as published by the Free
7 * Software Foundation, either version 3 of the License, or (at your option)
8 * any later version.
9 *
10 * BlosSOM is distributed in the hope that it will be useful, but WITHOUT ANY
11 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13 * details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * BlosSOM. If not, see <https://www.gnu.org/licenses/>.
17 */
18
19#include "kmeans_landmark.h"
20
21#include <glm/glm.hpp>
22
23#include <limits>
24
25/** Helper functions for squaring floats.
26 *
27 * Name taken from Borland Pascal standard library.
28 */
29constexpr float
30sqr(float x)
31{
32 return x * x;
33}
34
35void
37 const ScaledData &model,
38 size_t iters,
39 float alpha,
40 float gravity,
41 LandmarkModel &lm)
42{
43 size_t n_means = lm.n_landmarks();
44 size_t d = lm.d;
45 auto &means = lm.hidim_vertices;
46
47 if (!(model.n && n_means))
48 return; // this might happen but it's technically okay
49
50 if (d != model.dim() || means.size() != n_means * d)
51 return; // TODO throw something useful
52
53 gravity *= alpha / n_means;
54
55 std::uniform_int_distribution<size_t> random_target(0, model.n - 1);
56
57 lm.touch();
58 for (size_t iter = 0; iter < iters; ++iter) {
59 size_t tgt = random_target(data.gen);
60
61 size_t best = 0;
62 float best_sqdist = std::numeric_limits<float>::infinity();
63
64 // find the mean that's closest to the target
65 for (size_t mi = 0; mi < n_means; ++mi) {
66 float sqd = 0;
67
68 for (size_t di = 0; di < d; ++di)
69 sqd += sqr(means[di + d * mi] - model.data[di + d * tgt]);
70
71 if (sqd < best_sqdist) {
72 best = mi;
73 best_sqdist = sqd;
74 }
75 }
76
77 // move the means a bit closer
78 for (size_t mi = 0; mi < n_means; ++mi)
79 for (size_t di = 0; di < d; ++di)
80 means[di + d * mi] +=
81 ((mi == best) ? alpha : gravity) *
82 (model.data[di + d * tgt] - means[di + d * mi]);
83 }
84}
85
86void
88 const ScaledData &model,
89 size_t iters,
90 float alpha,
91 float sigma,
92 LandmarkModel &lm)
93{
94 size_t n_neurons = lm.n_landmarks();
95 size_t d = lm.d;
96 const auto &map = lm.lodim_vertices;
97 auto &neurons = lm.hidim_vertices;
98
99 const float nisqsigma = -1.0 / (sigma * sigma);
100
101 if (!(model.n && n_neurons))
102 return; // this might happen but it's technically okay
103
104 if (d != model.dim() || neurons.size() != n_neurons * d ||
105 map.size() != n_neurons)
106 return; // TODO throw something useful
107
108 std::uniform_int_distribution<size_t> random_target(0, model.n - 1);
109
110 lm.touch();
111 for (size_t iter = 0; iter < iters; ++iter) {
112 size_t tgt = random_target(data.gen);
113
114 size_t best = 0;
115 float best_sqdist = std::numeric_limits<float>::infinity();
116
117 // find the mean that's closest to the target
118 for (size_t ni = 0; ni < n_neurons; ++ni) {
119 float sqd = 0;
120
121 for (size_t di = 0; di < d; ++di)
122 sqd += sqr(neurons[di + d * ni] - model.data[di + d * tgt]);
123
124 if (sqd < best_sqdist) {
125 best = ni;
126 best_sqdist = sqd;
127 }
128 }
129
130 // move the rest according to the neighborhood function
131 for (size_t ni = 0; ni < n_neurons; ++ni) {
132 float r =
133 alpha * exp(glm::dot(map[best] - map[ni], map[best] - map[ni]) *
134 nisqsigma);
135
136 for (size_t di = 0; di < d; ++di)
137 neurons[di + d * ni] +=
138 r * (model.data[di + d * tgt] - neurons[di + d * ni]);
139 }
140 }
141}
void kmeans_landmark_step(KMeansData &data, const ScaledData &model, size_t iters, float alpha, float gravity, LandmarkModel &lm)
Run a k-means-like optimization of high-dimensional landmark positions.
constexpr float sqr(float x)
Helper functions for squaring floats.
void som_landmark_step(KMeansData &data, const ScaledData &model, size_t iters, float alpha, float sigma, LandmarkModel &lm)
Run a SOM to optimize high-dimensional landmark positions.
void touch()
Make the cache dirty.
Definition: dirty.h:43
size_t n
Definition: dirty.h:83
Structure for storing the kmeans-style data.
std::default_random_engine gen
Random engine for picking the points for training.
Model of the high- and low-dimensional landmarks.
size_t n_landmarks() const
Reurns number of the 2D landmarks.
std::vector< glm::vec2 > lodim_vertices
Array storing two-dimensional landmark coordinates.
std::vector< float > hidim_vertices
One-dimensional array storing d-dimensional landmark coordinates in row-major order.
size_t d
Dimension size.
Storage of the scaled data.
Definition: scaled_data.h:53
std::vector< float > data
Scaled data in the same format as DataModel::data.
Definition: scaled_data.h:55
size_t dim() const
Returns dimension of the scaled data.
Definition: scaled_data.h:66