BlosSOM
Interactive dimensionality reduction on large datasets (EmbedSOM and FLOWER combined)
trans_data.h
Go to the documentation of this file.
1/* This file is part of BlosSOM.
2 *
3 * Copyright (C) 2021 Mirek Kratochvil
4 * Sona Molnarova
5 *
6 * BlosSOM is free software: you can redistribute it and/or modify it under
7 * the terms of the GNU General Public License as published by the Free
8 * Software Foundation, either version 3 of the License, or (at your option)
9 * any later version.
10 *
11 * BlosSOM is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * BlosSOM. If not, see <https://www.gnu.org/licenses/>.
18 */
19
20#ifndef TRANS_DATA_H
21#define TRANS_DATA_H
22
23#include <thread>
24#include <vector>
25
26#include "batch_size_gen.h"
27#include "data_model.h"
28#include "frame_stats.h"
29
30/** Statistics from the untransformed dataset
31 *
32 * Pipeline part that gets preliminary statistics for use in later processes,
33 * esp. transformations and parameter guessing.
34 */
36 : public Cleaner
37 , public Dirt
38{
39 /** Array containing means for each dimension. */
40 std::vector<float> means;
41 /** Array containing standard deviations for each dimension. */
42 std::vector<float> sds;
43
44 /**
45 * @brief Recomputes the statistics if the input data changed.
46 *
47 * @param dm Original data parsed from the input file.
48 */
49 void update(const DataModel &dm);
50};
51
52/** Configuration of single-dimension transformation */
54{
56 bool asinh;
58
60 : affine_adjust(0)
61 , asinh(false)
62 , asinh_cofactor(500)
63 {
64 }
65};
66
67/**
68 * @brief Storage of the transformed data.
69 *
70 */
72 : public Sweeper
73 , public Dirts
74{
75 /** Transformed data in the same format as @ref DataModel::data. */
76 std::vector<float> data;
77
78 /** Array representing sums for each dimension. */
79 std::vector<float> sums;
80 /** Array representing square sums for each dimension. */
81 std::vector<float> sqsums;
82
83 /** Separate configurations for each dimension. */
84 std::vector<TransConfig> config;
85
87
88 /**
89 * @brief Returns dimension of the transformed data.
90 *
91 * @return size_t Dimension of the transformed data.
92 */
93 size_t dim() const { return config.size(); }
94 /**
95 * @brief Notifies @ref Sweeper that the config has been modified and that
96 * the data has to be recomputed.
97 *
98 */
99 void touch_config() { refresh(*this); }
100
102
103 /**
104 * @brief Recomputes the data if any of the config has been touched.
105 *
106 * @param dm Original data parsed from the input file.
107 * @param s Statistics from the untransformed dataset.
108 */
109 void update(const DataModel &dm,
110 const RawDataStats &s,
111 FrameStats &frame_stats);
112 /**
113 * @brief Resets configurations to their initial values.
114 *
115 */
116 void reset();
117
118 // UI interface. config can be touched directly except for adding/removing
119 // cols. After touching the config, call touch() to cause (gradual)
120 // recomputation.
121 // TODO void disable_col(size_t);
122 // TODO void enable_col(size_t);
123};
124
125#endif // #ifndef TRANS_DATA_H
Generator of the size of the next point batch.
A piece of cache that keeps track of the dirty status.
Definition: dirty.h:51
Storage of data from loaded input file.
Definition: data_model.h:32
A piece of dirt for dirtying the caches.
Definition: dirty.h:32
Multi-piece cache-dirtying object.
Definition: dirty.h:82
Statistics from the untransformed dataset.
Definition: trans_data.h:38
void update(const DataModel &dm)
Recomputes the statistics if the input data changed.
Definition: trans_data.cpp:24
std::vector< float > means
Array containing means for each dimension.
Definition: trans_data.h:40
std::vector< float > sds
Array containing standard deviations for each dimension.
Definition: trans_data.h:42
A piece of multi-object cache.
Definition: dirty.h:95
void refresh(const Dirts &d)
Force-refresh the whole range.
Definition: dirty.h:105
Configuration of single-dimension transformation.
Definition: trans_data.h:54
bool asinh
Definition: trans_data.h:56
float asinh_cofactor
Definition: trans_data.h:57
float affine_adjust
Definition: trans_data.h:55
Storage of the transformed data.
Definition: trans_data.h:74
void update(const DataModel &dm, const RawDataStats &s, FrameStats &frame_stats)
Recomputes the data if any of the config has been touched.
Definition: trans_data.cpp:53
void touch_config()
Notifies Sweeper that the config has been modified and that the data has to be recomputed.
Definition: trans_data.h:99
size_t dim() const
Returns dimension of the transformed data.
Definition: trans_data.h:93
Cleaner stat_watch
Definition: trans_data.h:101
void reset()
Resets configurations to their initial values.
Definition: trans_data.cpp:128
std::vector< float > data
Transformed data in the same format as DataModel::data.
Definition: trans_data.h:76
std::vector< float > sums
Array representing sums for each dimension.
Definition: trans_data.h:79
BatchSizeGen batch_size_gen
Definition: trans_data.h:86
std::vector< TransConfig > config
Separate configurations for each dimension.
Definition: trans_data.h:84
std::vector< float > sqsums
Array representing square sums for each dimension.
Definition: trans_data.h:81