BlosSOM
Interactive dimensionality reduction on large datasets (EmbedSOM and FLOWER combined)
fcs_parser.cpp
Go to the documentation of this file.
1/* This file is part of BlosSOM.
2 *
3 * Copyright (C) 2021 Mirek Kratochvil
4 * Sona Molnarova
5 *
6 * BlosSOM is free software: you can redistribute it and/or modify it under
7 * the terms of the GNU General Public License as published by the Free
8 * Software Foundation, either version 3 of the License, or (at your option)
9 * any later version.
10 *
11 * BlosSOM is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * BlosSOM. If not, see <https://www.gnu.org/licenses/>.
18 */
19
20#include "fcs_parser.h"
21
22#include <algorithm>
23#include <cstddef>
24#include <filesystem>
25#include <fstream>
26#include <iomanip>
27#include <random>
28#include <regex>
29#include <sstream>
30#include <string>
31
32/** Helper function for parsing ID from string. */
33static size_t
34parse_id(const std::string &word)
35{
36 std::stringstream ss(word);
37 std::stringstream output;
38 char c;
39 while (ss >> c) {
40 if (isdigit(c) != 0)
41 output << c;
42 }
43
44 return stoi(output.str());
45}
46
47/**
48 * @brief Parses info from FCS header.
49 *
50 * @param[in] handle
51 * @param[out] data_begin_offset
52 * @param[out] data_end_offset
53 * @param[out] params_count
54 * @param[out] events_count
55 * @param[out] is_be
56 * @param[out] params_names
57 */
58static void
59parse_info(std::ifstream &handle,
60 size_t &data_begin_offset,
61 size_t &data_end_offset,
62 size_t &params_count,
63 size_t &events_count,
64 bool &is_be,
65 std::vector<std::string> &params_names)
66{
67 size_t text_begin_offset;
68 size_t text_end_offset;
69
70 // Offset of the name and version.
71 constexpr int off = 7;
72 // Ignore name and version.
73 handle.ignore(off, ' ');
74
75 // Save text begin and end offset.
76 handle >> text_begin_offset >> text_end_offset;
77
78 handle.seekg(text_begin_offset);
79
80 // Read delimiter
81 char delim = handle.get();
82
83 std::string word;
84 // Can convert to long int, because it is only header, and it will never be
85 // greater than long int.
86 while (size_t(handle.tellg()) < text_end_offset + 1) {
87 std::getline(handle, word, delim);
88
89 if (word == "$BEGINDATA") {
90 std::getline(handle, word, delim);
91 data_begin_offset = static_cast<size_t>(stoul(word));
92 continue;
93 }
94
95 if (word == "$BYTEORD") {
96 std::getline(handle, word, delim);
97 is_be = word == "4,3,2,1";
98 continue;
99 }
100
101 if (word == "$ENDDATA") {
102 std::getline(handle, word, delim);
103 data_end_offset = static_cast<size_t>(stoul(word));
104 continue;
105 }
106
107 if (std::regex_match(word, std::regex("\\$P[0-9]+N"))) {
108 size_t id = parse_id(word);
109
110 std::getline(handle, word, delim);
111
112 // If id is greater than size of vector, it needs to be resized
113 if (params_names.size() < id)
114 params_names.resize(id, "");
115 params_names[id - 1] = word;
116
117 continue;
118 }
119
120 if (word == "$PAR") {
121 std::getline(handle, word, delim);
122 params_count = static_cast<size_t>(stoul(word));
123 continue;
124 }
125
126 if (word == "$TOT") {
127 std::getline(handle, word, delim);
128 events_count = static_cast<size_t>(stoul(word));
129 continue;
130 }
131 }
132}
133
134/**
135 * @brief Parses actual data from the FCS file.
136 *
137 * @param[in] handle
138 * @param[in] data_begin_offset
139 * @param[in] data_end_offset
140 * @param[in] params_count
141 * @param[in] events_count
142 * @param[in] is_be
143 * @param[out] out_data
144 */
145static void
146parse_data(std::ifstream &handle,
147 size_t data_begin_offset,
148 size_t data_end_offset,
149 size_t params_count,
150 size_t &events_count,
151 bool is_be,
152 std::vector<float> &out_data)
153{
154 // If not enough points.
155 auto diff = data_end_offset - data_begin_offset;
156 if (diff < params_count * events_count * sizeof(float))
157 events_count = diff / params_count / sizeof(float);
158
159 out_data.resize(params_count * events_count);
160
161 handle.seekg(data_begin_offset);
162 handle.read(reinterpret_cast<char *>(out_data.data()),
163 params_count * events_count * sizeof(float));
164
165 if (is_be)
166 std::transform(
167 out_data.begin(), out_data.end(), out_data.begin(), [](float n) {
168 uint8_t *tmp = reinterpret_cast<uint8_t *>(&n);
169 uint32_t w1 = *tmp;
170 uint32_t w2 = *(tmp + 1);
171 uint32_t w3 = *(tmp + 2);
172 uint32_t w4 = *(tmp + 3);
173 uint32_t res = w4 << 0 | w3 << 8 | w2 << 16 | w1 << 24;
174 return *reinterpret_cast<float *>(&res);
175 });
176 else
177 std::transform(
178 out_data.begin(), out_data.end(), out_data.begin(), [](float n) {
179 uint8_t *tmp = reinterpret_cast<uint8_t *>(&n);
180 uint32_t w1 = *tmp;
181 uint32_t w2 = *(tmp + 1);
182 uint32_t w3 = *(tmp + 2);
183 uint32_t w4 = *(tmp + 3);
184 uint32_t res = w1 << 0 | w2 << 8 | w3 << 16 | w4 << 24;
185 return *reinterpret_cast<float *>(&res);
186 });
187}
188
189void
190parse_FCS(const std::string &filename, DataModel &dm)
191{
192 std::ifstream handle(filename, std::ios::in | std::ios::binary);
193 if (!handle)
194 throw std::domain_error("Can not open file");
195
196 size_t data_begin_offset = 0;
197 size_t data_end_offset = 0;
198 bool is_be = false;
199
201 handle, data_begin_offset, data_end_offset, dm.d, dm.n, is_be, dm.names);
203 handle, data_begin_offset, data_end_offset, dm.d, dm.n, is_be, dm.data);
204 handle.close();
205}
static size_t parse_id(const std::string &word)
Helper function for parsing ID from string.
Definition: fcs_parser.cpp:34
static void parse_info(std::ifstream &handle, size_t &data_begin_offset, size_t &data_end_offset, size_t &params_count, size_t &events_count, bool &is_be, std::vector< std::string > &params_names)
Parses info from FCS header.
Definition: fcs_parser.cpp:59
static void parse_data(std::ifstream &handle, size_t data_begin_offset, size_t data_end_offset, size_t params_count, size_t &events_count, bool is_be, std::vector< float > &out_data)
Parses actual data from the FCS file.
Definition: fcs_parser.cpp:146
void parse_FCS(const std::string &filename, DataModel &dm)
Parses FCS file and fills DataModel data.
Definition: fcs_parser.cpp:190
Storage of data from loaded input file.
Definition: data_model.h:32
std::vector< std::string > names
Names of the dimensions.
Definition: data_model.h:37
size_t d
Dimension size.
Definition: data_model.h:39
std::vector< float > data
One-dimensional array storing d-dimensional input data in row-major order.
Definition: data_model.h:35
size_t n
Definition: dirty.h:83