Tulip 5.7.1
Large graphs analysis and drawing
Loading...
Searching...
No Matches
CSVParser.h
1/*
2 *
3 * This file is part of Tulip (https://tulip.labri.fr)
4 *
5 * Authors: David Auber and the Tulip development Team
6 * from LaBRI, University of Bordeaux
7 *
8 * Tulip is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU Lesser General Public License
10 * as published by the Free Software Foundation, either version 3
11 * of the License, or (at your option) any later version.
12 *
13 * Tulip is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 * See the GNU General Public License for more details.
17 *
18 */
19///@cond DOXYGEN_HIDDEN
20
21#ifndef CSVDATALOADER_H_
22#define CSVDATALOADER_H_
23
24#include <tulip/tulipconf.h>
25#include <tulip/CSVContentHandler.h>
26
27#include <vector>
28#include <climits>
29
30#include <QString>
31
32class QTextCodec;
33
34namespace tlp {
35
36class PluginProgress;
37
38/*
39 * @brief Interface for CSV data parser.
40 *
41 * Send the found tokens to the CSVContentHandler interface.
42 */
43class TLP_QT_SCOPE CSVParser {
44public:
45 virtual ~CSVParser() {}
46
47 virtual char decimalMark() const {
48 return '.';
49 }
50 /**
51 * @brief Parse the data and send the tokens found to the CSVContentHandler.
52 *
53 * Notify the progression of the parsing with the progress object.
54 **/
55 virtual bool parse(CSVContentHandler *handler, tlp::PluginProgress *progress = nullptr,
56 bool firstLineOnly = false) = 0;
57};
58
59/**
60 * @brief Parse a csv data and send each tokens to the given CSVContentHandler object.
61 *
62 * Parse a csv data and send each tokens to the given CSVContentHandler object. Get each line of the
63 *file in the given range and parse them. This object skip empty lines.
64 * Send the found tokens to the CSVContentHandler interface.
65 * \code
66 * CSVParser parser(fileName,";","\"","UTF-8",true);
67 * \/\/Automatically remove quotes.
68 * CSVContentHandler * handler ;
69 * parser.parse(handler);
70 * \endcode
71 **/
72class TLP_QT_SCOPE CSVSimpleParser : public CSVParser {
73public:
74 /**
75 * @brief Construct a csv simple file parser.
76 * @param filename The path to the file to import.
77 * @param separator The separator to use.
78 * @param textDelimiter If a token is sourrounded by this character we ignore all the separators
79 *found in this token. Useful if a token contains the separator.
80 * @param firstLine The number of the first line to read. The first line is 0.
81 * @param lastLine The number of the last line to read.
82 **/
83 CSVSimpleParser(const std::string &fileName, const QString &separator = ";",
84 bool mergesep = false, char textDelimiter = '"', char delimiterMark = '.',
85 bool considerAsString = false,
86 const std::string &fileEncoding = std::string("UTF-8"),
87 unsigned int firstLine = 0, unsigned int lastLine = UINT_MAX);
88
89 ~CSVSimpleParser() override;
90
91 inline std::string fileName() const {
92 return _fileName;
93 }
94 inline void setFileName(const std::string &fileName) {
95 _fileName = fileName;
96 }
97
98 inline char textDelimiter() const {
99 return _textDelimiter;
100 }
101
102 char decimalMark() const override {
103 return _decimalMark;
104 }
105
106 inline void setTextDelimiter(char delimiter) {
107 _textDelimiter = delimiter;
108 }
109
110 inline std::string fileEncoding() const {
111 return _fileEncoding;
112 }
113
114 inline void setFileEncoding(const std::string &fileEncoding) {
115 _fileEncoding = fileEncoding;
116 }
117
118 bool parse(CSVContentHandler *handler, tlp::PluginProgress *progress = nullptr,
119 bool firstLineOnly = false) override;
120
121protected:
122 virtual std::string treatToken(const std::string &token, int row, int column);
123
124private:
125 void tokenize(const std::string &str, std::vector<CSVToken> &tokens, const QString &delimiters,
126 const bool mergedelim, char _textDelimiter, unsigned int numberOfCol);
127 std::string convertStringEncoding(const std::string &toConvert, QTextCodec *encoder);
128
129 /**
130 * @brief Function to extract a line from a istream. Can handle Linux,Mac and Windows end of line
131 *pattern.
132 **/
133 bool multiplatformgetline(std::istream &is, std::string &str);
134
135 bool checkForContiguousTdlm(std::istream &is, std::string &str, char sep, bool tdlm);
136 std::string removeQuotesIfAny(std::string &s);
137 std::string _fileName;
138 QString _separator;
139 char _textDelimiter;
140 char _decimalMark;
141 std::string _fileEncoding;
142 unsigned int _firstLine;
143 unsigned int _lastLine;
144 bool _mergesep, _considerAsString;
145};
146
147/**
148 *@brief CSV parser used to invert the token matrix in order to treat rows as columns.
149 **/
150class TLP_QT_SCOPE CSVInvertMatrixParser : public tlp::CSVParser, public tlp::CSVContentHandler {
151public:
152 CSVInvertMatrixParser(CSVParser *parser);
153 ~CSVInvertMatrixParser() override;
154
155 char decimalMark() const override {
156 return parser->decimalMark();
157 }
158
159 bool parse(CSVContentHandler *handler, tlp::PluginProgress *progress = nullptr,
160 bool firstLineOnly = false) override;
161
162 bool begin() override;
163 bool line(unsigned int row, const std::vector<CSVToken> &lineTokens) override;
164 bool end(unsigned int rowNumber, unsigned int columnNumber) override;
165
166private:
167 CSVParser *parser;
168 CSVContentHandler *handler;
169 std::vector<std::vector<CSVToken>> columns;
170 unsigned int maxLineSize;
171};
172} // namespace tlp
173#endif /* CSVDATALOADER_H_ */
174///@endcond
PluginProcess subclasses are meant to notify about the progress state of some process (typically a pl...