BlosSOM
Interactive dimensionality reduction on large datasets (EmbedSOM and FLOWER combined)
tsne_layout.cpp
Go to the documentation of this file.
1/* This file is part of BlosSOM.
2 *
3 * Copyright (C) 2021 Mirek Kratochvil
4 *
5 * BlosSOM is free software: you can redistribute it and/or modify it under
6 * the terms of the GNU General Public License as published by the Free
7 * Software Foundation, either version 3 of the License, or (at your option)
8 * any later version.
9 *
10 * BlosSOM is distributed in the hope that it will be useful, but WITHOUT ANY
11 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13 * details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * BlosSOM. If not, see <https://www.gnu.org/licenses/>.
17 */
18
19#include "tsne_layout.h"
20
21constexpr float
22sqrf(float x)
23{
24 return x * x;
25}
26
27void
29 bool vert_pressed,
30 int vert_ind,
31 LandmarkModel &lm,
32 float time)
33{
34 size_t n = lm.n_landmarks(), d = lm.d;
35
36 if (!n || !d)
37 return;
38
39 auto &pji = data.pji;
40 auto &heap = data.heap;
41
42 if (pji.size() != n * n)
43 pji.resize(n * n);
44 if (heap.size() != n)
45 heap.resize(n);
46
47 // Compute distances between hidim landmarks
48 for (size_t i = 0; i < n; ++i) {
49 pji[n * i + i] = 0;
50 for (size_t j = i + 1; j < n; ++j) {
51 float tmp = 0;
52 for (size_t di = 0; di < d; ++di)
53 tmp += sqrf(lm.hidim_vertices[i * d + di] -
54 lm.hidim_vertices[j * d + di]);
55 pji[n * i + j] = tmp;
56 pji[n * j + i] = tmp;
57 }
58 }
59
60 auto hat = [&pji, &heap, n](size_t i, size_t row) -> float {
61 return pji[row * n + heap[i]];
62 };
63 auto hsw = [&heap](size_t i, size_t j) { std::swap(heap[i], heap[j]); };
64 auto hdown = [&hat, &hsw](size_t i, size_t n, size_t row) {
65 for (;;) {
66 size_t l = 2 * i + 1;
67 size_t r = l + 1;
68 if (l >= n)
69 break;
70 if (r >= n) {
71 if (hat(i, row) > hat(l, row))
72 hsw(i, l);
73 break;
74 }
75 if (hat(l, row) < hat(r, row)) {
76 hsw(i, l);
77 i = l;
78 } else {
79 hsw(i, r);
80 i = r;
81 }
82 }
83 };
84 auto heapify = [&hdown](size_t n, size_t row) {
85 size_t i = n / 2 + 1;
86 while (i-- > 0)
87 hdown(i, n, row);
88 };
89 auto hpop = [&hdown, &heap](size_t &n, size_t row) -> size_t {
90 size_t out = heap[0];
91 --n;
92 heap[0] = heap[n];
93 hdown(0, n, row);
94 return out;
95 };
96
97 // Compute similarity matrix for high-dim vertices
98 for (size_t i = 0; i < n; ++i) {
99 for (size_t j = 0; j < n - 1; ++j)
100 heap[j] = j < i ? j : j + 1;
101 heapify(n - 1, i);
102 size_t hs = n - 1;
103 float wsum = 0; // it should sum to 1
104 // Dmitry Kobak's slides say this approximation is OK
105 for (size_t k = 1; hs; ++k)
106 wsum += pji[i * n + hpop(hs, i)] = 1 / float(k);
107 // hopefully this code isn't perplexed.
108 wsum += 0.001;
109 wsum = 1 / wsum;
110 for (size_t j = 0; j < n; ++j)
111 pji[i * n + j] *= wsum;
112 }
113
114 // Make similarities symmetric
115 for (size_t i = 0; i < n; ++i)
116 for (size_t j = i + 1; j < n; ++j)
117 pji[i * n + j] = pji[j * n + i] =
118 (pji[i * n + j] + pji[j * n + i]) / (2 * n);
119
120 float Z = 0;
121 for (size_t i = 0; i < n; ++i)
122 for (size_t j = i + 1; j < n; ++j)
123 Z +=
124 2 / (1 + glm::dot(lm.lodim_vertices[i] - lm.lodim_vertices[j],
125 lm.lodim_vertices[i] - lm.lodim_vertices[j]));
126
127 Z = 1 / Z;
128
129 auto &ups = data.updates;
130 if (ups.size() != n)
131 ups.resize(n);
132 for (auto &u : ups)
133 u = glm::vec2(0, 0);
134
135 float update_weight = 0;
136
137 // Compute the forces applied to each vertex.
138 for (size_t i = 0; i < n; ++i)
139 for (size_t j = i + 1; j < n; ++j) {
140 auto vji = lm.lodim_vertices[i] - lm.lodim_vertices[j];
141 float wij =
142 1 / (1 + glm::dot(lm.lodim_vertices[i] - lm.lodim_vertices[j],
143 lm.lodim_vertices[i] - lm.lodim_vertices[j]));
144 auto a = vji * pji[i * n + j] * wij;
145 ups[i] -= a;
146 ups[j] += a;
147 update_weight += glm::length(a);
148 a = vji * Z * wij * wij;
149 ups[i] += a;
150 ups[j] -= a;
151 update_weight += glm::length(a);
152 }
153
154 update_weight = 100 / update_weight;
155
156 // Apply forces to low dim landmarks
157 for (size_t i = 0; i < n; ++i)
158 if (!vert_pressed || vert_ind != i)
159 lm.lodim_vertices[i] += update_weight * time * ups[i];
160
161 lm.touch();
162}
void touch()
Make the cache dirty.
Definition: dirty.h:43
Model of the high- and low-dimensional landmarks.
size_t n_landmarks() const
Reurns number of the 2D landmarks.
std::vector< glm::vec2 > lodim_vertices
Array storing two-dimensional landmark coordinates.
std::vector< float > hidim_vertices
One-dimensional array storing d-dimensional landmark coordinates in row-major order.
size_t d
Dimension size.
A context structure for tSNE computation.
Definition: tsne_layout.h:35
std::vector< float > pji
Definition: tsne_layout.h:36
std::vector< glm::vec2 > updates
Definition: tsne_layout.h:38
std::vector< size_t > heap
Definition: tsne_layout.h:37
void tsne_layout_step(TSNELayoutData &data, bool vert_pressed, int vert_ind, LandmarkModel &lm, float time)
Optimize the positions of low-dimensional landmarks using the t-SNE algorithm.
Definition: tsne_layout.cpp:28
constexpr float sqrf(float x)
Definition: tsne_layout.cpp:22