43 inline static float back(
float x) {
return sqrt(x); }
44 inline static float comp(
const float *p1,
const float *p2,
const size_t dim)
48 for (
size_t i = 0; i < dim; ++i) {
49 float tmp = p1[i] - p2[i];
54 const float *p1e = p1 + dim, *p1ie = p1e - 3;
56 __m128 s = _mm_setzero_ps();
57 for (; p1 < p1ie; p1 += 4, p2 += 4) {
58 __m128 tmp = _mm_sub_ps(_mm_loadu_ps(p1), _mm_loadu_ps(p2));
59 s = _mm_add_ps(_mm_mul_ps(tmp, tmp), s);
61 float sqdist = s[0] + s[1] + s[2] + s[3];
62 for (; p1 < p1e; ++p1, ++p2) {
63 float tmp = *p1 - *p2;
70 inline static void proj(
const float *la,
79 float scalar = 0, sqdist = 0;
80 for (
size_t k = 0; k < dim; ++k) {
81 float tmp = lb[k] - la[k];
83 scalar += tmp * (p[k] - la[k]);
88 const float *ke = la + dim, *kie = ke - 3;
90 __m128 sca = _mm_setzero_ps(), sqd = _mm_setzero_ps();
91 for (; la < kie; la += 4, lb += 4, p += 4) {
92 __m128 ti = _mm_loadu_ps(la);
93 __m128 tmp = _mm_sub_ps(_mm_loadu_ps(lb), ti);
94 sqd = _mm_add_ps(sqd, _mm_mul_ps(tmp, tmp));
96 sca, _mm_mul_ps(tmp, _mm_sub_ps(_mm_loadu_ps(p), ti)));
98 scalar = sca[0] + sca[1] + sca[2] + sca[3];
99 sqdist = sqd[0] + sqd[1] + sqd[2] + sqd[3];
100 for (; la < ke; ++la, ++lb, ++p) {
101 float tmp = *lb - *la;
103 scalar += tmp * (*p - *la);
143 size_t L = 2 * start + 1;
146 float dl = heap[L].
dist;
147 float dr = heap[R].
dist;
150 if (heap[start].dist >= dl)
152 swap(heap[L], heap[start]);
155 if (heap[start].dist >= dr)
157 swap(heap[R], heap[start]);
160 }
else if (L < lim) {
161 if (heap[start].dist < heap[L].dist)
162 swap(heap[L], heap[start]);
172 const float *hidim_lm,
176 vector<dist_id> &dists)
181 for (i = 0; i < topnn; ++i) {
182 dists[i].dist = distf::comp(point, hidim_lm + i * dim, dim);
187 for (i = 0; i < topnn; ++i)
188 heap_down(dists.data(), topnn - i - 1, topnn);
191 for (i = topnn; i < n_landmarks; ++i) {
192 float s = distf::comp(point, hidim_lm + i * dim, dim);
193 if (dists[0].dist < s)
201 for (i = topnn - 1; i > 0; --i) {
202 swap(dists[0], dists[i]);
211template<
int embed_dim>
216 if (embed_dim == 2) {
219 mtx[4] += gs * lodim_lm[0];
220 mtx[5] += gs * lodim_lm[1];
222 if (embed_dim == 3) {
226 mtx[9] += gs * lodim_lm[0];
227 mtx[10] += gs * lodim_lm[1];
228 mtx[11] += gs * lodim_lm[2];
232template<
int embed_dim>
237 for (
size_t i = 0; i < embed_dim; ++i)
242template<
int embed_dim>
252 float h[embed_dim], hp = 0;
253 for (
size_t i = 0; i < embed_dim; ++i)
254 hp +=
sqrf(h[i] = jlm[i] - ilm[i]);
259 score_i * score_j * powf(1 + hp, -adjust) * expf(-
sqrf(scalar_proj - .5));
260 const float sihp = s / hp;
261 const float rhsc = s * (scalar_proj + dotp_ec<embed_dim>(h, ilm) / hp);
263 if (embed_dim == 2) {
265 mtx[0] += h[0] * h[0] * sihp;
266 mtx[1] += h[0] * h[1] * sihp;
267 mtx[2] += h[1] * h[0] * sihp;
268 mtx[3] += h[1] * h[1] * sihp;
269 mtx[4] += h[0] * rhsc;
270 mtx[5] += h[1] * rhsc;
273 if (embed_dim == 3) {
274 mtx[0] += h[0] * h[0] * sihp;
275 mtx[1] += h[0] * h[1] * sihp;
276 mtx[2] += h[0] * h[2] * sihp;
277 mtx[3] += h[1] * h[0] * sihp;
278 mtx[4] += h[1] * h[1] * sihp;
279 mtx[5] += h[1] * h[2] * sihp;
280 mtx[6] += h[2] * h[0] * sihp;
281 mtx[7] += h[2] * h[1] * sihp;
282 mtx[8] += h[2] * h[2] * sihp;
283 mtx[9] += h[0] * rhsc;
284 mtx[10] += h[1] * rhsc;
285 mtx[11] += h[2] * rhsc;
289template<
int embed_dim>
293 if (embed_dim == 2) {
294 float det = mtx[0] * mtx[3] - mtx[1] * mtx[2];
295 embedding[0] = (mtx[4] * mtx[3] - mtx[5] * mtx[2]) / det;
296 embedding[1] = (mtx[0] * mtx[5] - mtx[1] * mtx[4]) / det;
298 if (embed_dim == 3) {
300 float det = mtx[0] * mtx[4] * mtx[8] + mtx[1] * mtx[5] * mtx[6] +
301 mtx[2] * mtx[3] * mtx[7] - mtx[0] * mtx[5] * mtx[7] -
302 mtx[1] * mtx[3] * mtx[8] - mtx[2] * mtx[4] * mtx[6];
303 embedding[0] = (mtx[9] * mtx[4] * mtx[8] + mtx[10] * mtx[5] * mtx[6] +
304 mtx[11] * mtx[3] * mtx[7] - mtx[9] * mtx[5] * mtx[7] -
305 mtx[10] * mtx[3] * mtx[8] - mtx[11] * mtx[4] * mtx[6]) /
307 embedding[1] = (mtx[0] * mtx[10] * mtx[8] + mtx[1] * mtx[11] * mtx[6] +
308 mtx[2] * mtx[9] * mtx[7] - mtx[0] * mtx[11] * mtx[7] -
309 mtx[1] * mtx[9] * mtx[8] - mtx[2] * mtx[10] * mtx[6]) /
311 embedding[2] = (mtx[0] * mtx[4] * mtx[11] + mtx[1] * mtx[5] * mtx[9] +
312 mtx[2] * mtx[3] * mtx[10] - mtx[0] * mtx[5] * mtx[10] -
313 mtx[1] * mtx[3] * mtx[11] - mtx[2] * mtx[4] * mtx[9]) /
329 float mean = 0, sd = 0, wsum = 0;
330 for (
size_t i = 0; i < topnn; ++i) {
331 const float tmp = distf::back(dists[i].dist);
332 const float w = 1 / float(i + 1);
340 sd = boost / sqrtf(sd / wsum -
sqrf(mean));
344 for (
size_t i = 0; i < topn; ++i)
346 dists[i].dist = expf((mean - dists[i].dist) * sd) *
349 dists[i].dist = expf((mean - dists[i].dist) * sd);
355template<
class distf,
int embed_dim>
363 const float *hidim_lm,
364 const float *lodim_lm,
366 vector<dist_id> &dists)
368 const size_t topnn = topn < n_landmarks ? topn + 1 : topn;
370 knn<distf>(point, hidim_lm, n_landmarks, dim, topnn, dists);
372 sorted_dists_to_scores<distf>(dists, topn, topnn, boost);
375 float mtx[embed_dim * (1 + embed_dim)];
376 fill(mtx, mtx + embed_dim * (1 + embed_dim), 0);
379 for (
size_t i = 0; i < topn; ++i) {
380 size_t idx = dists[i].id;
381 float score_i = dists[i].dist;
383 float ilm[embed_dim];
384 copy_n(lodim_lm + embed_dim * idx, embed_dim, ilm);
388 add_gravity<embed_dim>(ilm, score_i, mtx);
392 for (
size_t j = i + 1; j < topn; ++j) {
394 size_t jdx = dists[j].id;
395 float score_j = dists[j].dist;
397 float jlm[embed_dim];
398 copy_n(lodim_lm + embed_dim * jdx, embed_dim, jlm);
400 float scalar, sqdist;
401 distf::proj(hidim_lm + dim * idx,
402 hidim_lm + dim * jdx,
413 add_approximation<embed_dim>(
414 score_i, score_j, ilm, jlm, scalar, adjust, mtx);
418 solve_lin_eq<embed_dim>(mtx, embedding);
429 const float *hidim_lm,
430 const float *lodim_lm,
433 if (topn > n_landmarks)
436 const size_t topnn = topn < n_landmarks ? topn + 1 : topn;
441 vector<dist_id> dists;
445 for (
size_t i = 0; i < n; ++i)
446 embedsom_point<distfs::sqeucl, 2>(n_landmarks,
static const float koho_gravity
static void solve_lin_eq(const float *mtx, float *embedding)
static void heap_down(dist_id *heap, size_t start, size_t lim)
static const float min_boost
static void embedsom_point(const size_t n_landmarks, const size_t dim, const float boost, const size_t topn, const float adjust, const float *point, const float *hidim_lm, const float *lodim_lm, float *embedding, vector< dist_id > &dists)
void embedsom(size_t n, size_t n_landmarks, size_t dim, float boost, size_t topn, float adjust, const float *points, const float *hidim_lm, const float *lodim_lm, float *embedding)
static float dotp_ec(const float *a, const float *b)
static void add_gravity(const float *lodim_lm, float score, float *mtx)
static float sqrf(float n)
static void add_approximation(float score_i, float score_j, const float *ilm, const float *jlm, float scalar_proj, float adjust, float *mtx)
static const float max_avoidance
static void knn(const float *point, const float *hidim_lm, size_t n_landmarks, size_t dim, size_t topnn, vector< dist_id > &dists)
static void sorted_dists_to_scores(vector< dist_id > &dists, const size_t topn, const size_t topnn, const float boost)
static const float zero_avoidance
static void proj(const float *la, const float *lb, const float *p, const size_t dim, float &o_scalar, float &o_sqdist)
static float back(float x)
static float comp(const float *p1, const float *p2, const size_t dim)