blossom/tsne__layout_8cpp_source.html

/* This file is part of BlosSOM.

 *

 * Copyright (C) 2021 Mirek Kratochvil

 *

 * BlosSOM is free software: you can redistribute it and/or modify it under

 * the terms of the GNU General Public License as published by the Free

 * Software Foundation, either version 3 of the License, or (at your option)

 * any later version.

 *

 * BlosSOM is distributed in the hope that it will be useful, but WITHOUT ANY

 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

 * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more

 * details.

 *

 * You should have received a copy of the GNU General Public License along with

 * BlosSOM. If not, see <https://www.gnu.org/licenses/>.

 */


#include "tsne_layout.h"


constexpr float

sqrf(float x)

{

    return x * x;

}


void

tsne_layout_step(TSNELayoutData &data,

                 bool vert_pressed,

                 int vert_ind,

                 LandmarkModel &lm,

                 float time)

{

    size_t n = lm.n_landmarks(), d = lm.d;


    if (!n || !d)

        return;


    auto &pji = data.pji;

    auto &heap = data.heap;


    if (pji.size() != n * n)

        pji.resize(n * n);

    if (heap.size() != n)

        heap.resize(n);


    // Compute distances between hidim landmarks

    for (size_t i = 0; i < n; ++i) {

        pji[n * i + i] = 0;

        for (size_t j = i + 1; j < n; ++j) {

            float tmp = 0;

            for (size_t di = 0; di < d; ++di)

                tmp += sqrf(lm.hidim_vertices[i * d + di] -

                            lm.hidim_vertices[j * d + di]);

            pji[n * i + j] = tmp;

            pji[n * j + i] = tmp;

        }

    }


    auto hat = [&pji, &heap, n](size_t i, size_t row) -> float {

        return pji[row * n + heap[i]];

    };

    auto hsw = [&heap](size_t i, size_t j) { std::swap(heap[i], heap[j]); };

    auto hdown = [&hat, &hsw](size_t i, size_t n, size_t row) {

        for (;;) {

            size_t l = 2 * i + 1;

            size_t r = l + 1;

            if (l >= n)

                break;

            if (r >= n) {

                if (hat(i, row) > hat(l, row))

                    hsw(i, l);

                break;

            }

            if (hat(l, row) < hat(r, row)) {

                hsw(i, l);

                i = l;

            } else {

                hsw(i, r);

                i = r;

            }

        }

    };

    auto heapify = [&hdown](size_t n, size_t row) {

        size_t i = n / 2 + 1;

        while (i-- > 0)

            hdown(i, n, row);

    };

    auto hpop = [&hdown, &heap](size_t &n, size_t row) -> size_t {

        size_t out = heap[0];

        --n;

        heap[0] = heap[n];

        hdown(0, n, row);

        return out;

    };


    // Compute similarity matrix for high-dim vertices

    for (size_t i = 0; i < n; ++i) {

        for (size_t j = 0; j < n - 1; ++j)

            heap[j] = j < i ? j : j + 1;

        heapify(n - 1, i);

        size_t hs = n - 1;

        float wsum = 0; // it should sum to 1

        // Dmitry Kobak's slides say this approximation is OK

        for (size_t k = 1; hs; ++k)

            wsum += pji[i * n + hpop(hs, i)] = 1 / float(k);

        // hopefully this code isn't perplexed.

        wsum += 0.001;

        wsum = 1 / wsum;

        for (size_t j = 0; j < n; ++j)

            pji[i * n + j] *= wsum;

    }


    // Make similarities symmetric

    for (size_t i = 0; i < n; ++i)

        for (size_t j = i + 1; j < n; ++j)

            pji[i * n + j] = pji[j * n + i] =

              (pji[i * n + j] + pji[j * n + i]) / (2 * n);


    float Z = 0;

    for (size_t i = 0; i < n; ++i)

        for (size_t j = i + 1; j < n; ++j)

            Z +=

              2 / (1 + glm::dot(lm.lodim_vertices[i] - lm.lodim_vertices[j],

                                lm.lodim_vertices[i] - lm.lodim_vertices[j]));


    Z = 1 / Z;


    auto &ups = data.updates;

    if (ups.size() != n)

        ups.resize(n);

    for (auto &u : ups)

        u = glm::vec2(0, 0);


    float update_weight = 0;


    // Compute the forces applied to each vertex.

    for (size_t i = 0; i < n; ++i)

        for (size_t j = i + 1; j < n; ++j) {

            auto vji = lm.lodim_vertices[i] - lm.lodim_vertices[j];

            float wij =

              1 / (1 + glm::dot(lm.lodim_vertices[i] - lm.lodim_vertices[j],

                                lm.lodim_vertices[i] - lm.lodim_vertices[j]));

            auto a = vji * pji[i * n + j] * wij;

            ups[i] -= a;

            ups[j] += a;

            update_weight += glm::length(a);

            a = vji * Z * wij * wij;

            ups[i] += a;

            ups[j] -= a;

            update_weight += glm::length(a);

        }


    update_weight = 100 / update_weight;


    // Apply forces to low dim landmarks

    for (size_t i = 0; i < n; ++i)

        if (!vert_pressed || vert_ind != i)

            lm.lodim_vertices[i] += update_weight * time * ups[i];


    lm.touch();

}

Dirt::touch
void touch()
Make the cache dirty.
Definition: dirty.h:43

LandmarkModel
Model of the high- and low-dimensional landmarks.
Definition: landmark_model.h:34

LandmarkModel::n_landmarks
size_t n_landmarks() const
Reurns number of the 2D landmarks.
Definition: landmark_model.h:120

LandmarkModel::lodim_vertices
std::vector< glm::vec2 > lodim_vertices
Array storing two-dimensional landmark coordinates.
Definition: landmark_model.h:41

LandmarkModel::hidim_vertices
std::vector< float > hidim_vertices
One-dimensional array storing d-dimensional landmark coordinates in row-major order.
Definition: landmark_model.h:39

LandmarkModel::d
size_t d
Dimension size.
Definition: landmark_model.h:36

TSNELayoutData
A context structure for tSNE computation.
Definition: tsne_layout.h:35

TSNELayoutData::pji
std::vector< float > pji
Definition: tsne_layout.h:36

TSNELayoutData::updates
std::vector< glm::vec2 > updates
Definition: tsne_layout.h:38

TSNELayoutData::heap
std::vector< size_t > heap
Definition: tsne_layout.h:37

tsne_layout_step
void tsne_layout_step(TSNELayoutData &data, bool vert_pressed, int vert_ind, LandmarkModel &lm, float time)
Optimize the positions of low-dimensional landmarks using the t-SNE algorithm.
Definition: tsne_layout.cpp:28

sqrf
constexpr float sqrf(float x)
Definition: tsne_layout.cpp:22

tsne_layout.h