BlosSOM
Interactive dimensionality reduction on large datasets (EmbedSOM and FLOWER combined)
tsv_parser.cpp
Go to the documentation of this file.
1/* This file is part of BlosSOM.
2 *
3 * Copyright (C) 2021 Mirek Kratochvil
4 * Sona Molnarova
5 *
6 * BlosSOM is free software: you can redistribute it and/or modify it under
7 * the terms of the GNU General Public License as published by the Free
8 * Software Foundation, either version 3 of the License, or (at your option)
9 * any later version.
10 *
11 * BlosSOM is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * BlosSOM. If not, see <https://www.gnu.org/licenses/>.
18 */
19
20#include "tsv_parser.h"
21
22#include <fstream>
23#include <sstream>
24
25// TODO replace by inplace ops
26/**
27 * @brief Splits a given string into words by a given delimiter.
28 *
29 * @param str Input string for splitting.
30 * @param delim Delimiter used for splitting.
31 * @return std::vector<std::string> Array of resulting words.
32 *
33 * \todo TODO replace by inplace ops
34 */
35static std::vector<std::string>
36split(const std::string &str, char delim)
37{
38 std::vector<std::string> result;
39 std::stringstream ss(str);
40 std::string item;
41
42 while (getline(ss, item, delim)) {
43 result.emplace_back(item);
44 }
45
46 return result;
47}
48
49void
50parse_TSV(const std::string &filename, DataModel &dm)
51{
52 std::ifstream handle(filename, std::ios::in);
53 if (!handle)
54 throw std::domain_error("Can not open file");
55
56 std::string line;
57
58 while (std::getline(handle, line)) {
59 std::vector<std::string> values = split(line, '\t');
60 if (values.size() == 0)
61 continue;
62
63 if (dm.d == 0) {
64 // first line that contains anything is a header with data dimension
65 dm.d = values.size();
66 dm.names = values;
67 continue;
68 } else if (dm.d != values.size())
69 throw std::length_error("Row length mismatch");
70 for (auto &&value : values)
71 dm.data.emplace_back(std::stof(value));
72 ++dm.n;
73 }
74
75 if (!dm.n)
76 throw std::domain_error("File contained no data!");
77}
Storage of data from loaded input file.
Definition: data_model.h:32
std::vector< std::string > names
Names of the dimensions.
Definition: data_model.h:37
size_t d
Dimension size.
Definition: data_model.h:39
std::vector< float > data
One-dimensional array storing d-dimensional input data in row-major order.
Definition: data_model.h:35
size_t n
Definition: dirty.h:83
static std::vector< std::string > split(const std::string &str, char delim)
Splits a given string into words by a given delimiter.
Definition: tsv_parser.cpp:36
void parse_TSV(const std::string &filename, DataModel &dm)
Parses FCS file and fills DataModel data.
Definition: tsv_parser.cpp:50