Tulip 5.7.1
Large graphs analysis and drawing
Loading...
Searching...
No Matches
CSVGraphImport.h
1/*
2 *
3 * This file is part of Tulip (https://tulip.labri.fr)
4 *
5 * Authors: David Auber and the Tulip development Team
6 * from LaBRI, University of Bordeaux
7 *
8 * Tulip is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU Lesser General Public License
10 * as published by the Free Software Foundation, either version 3
11 * of the License, or (at your option) any later version.
12 *
13 * Tulip is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 * See the GNU General Public License for more details.
17 *
18 */
19///@cond DOXYGEN_HIDDEN
20
21#ifndef CSVGRAPHIMPORT_H
22#define CSVGRAPHIMPORT_H
23
24#include <unordered_map>
25
26#include <tulip/CSVContentHandler.h>
27#include <tulip/Graph.h>
28#include <tulip/tulipconf.h>
29
30#include <QMessageBox>
31
32namespace tlp {
33class PropertyInterface;
34
35/**
36 * @brief Store import parameters for a CSV file column.
37 *
38 * Contains all the parameters defined by user for a given CSV column (the name of the column, its
39 *data type and if user want to import it).
40 **/
41class TLP_QT_SCOPE CSVColumn {
42public:
43 CSVColumn(const std::string &columnName = "", const std::string &columnType = "")
44 : _used(true), _name(columnName), _type(columnType), _valueSeparator(0) {}
45
46 /**
47 * @brief Get the name of the column.
48 **/
49 const std::string &name() const {
50 return _name;
51 }
52
53 /**
54 * @brief Tells if the property marked for import.
55 **/
56 bool isUsed() const {
57 return _used;
58 }
59
60 /**
61 * @brief Return the property data type.
62 **/
63 const std::string &dataType() const {
64 return _type;
65 }
66
67 bool needMultiValues() const {
68 return _valueSeparator != 0;
69 }
70
71 char getMultiValueSeparator() const {
72 return _valueSeparator;
73 }
74
75 // possible actions
76 // the two first ones indicate an exception
77 enum Action { ASSIGN_NO_VALUE = 0, SKIP_ROW = 1, ASSIGN_VALUE = 2 };
78
79 struct Exception {
80 std::string value;
81 Action action;
82 Exception(const std::string &v, Action a) : value(v), action(a) {}
83 };
84
85 void addException(const std::string &value, Action action) {
86 _exceptions.emplace_back(value, action);
87 }
88
89 void clearExceptions() {
90 _exceptions.clear();
91 }
92
93 // look for a specific exception defined for token
94 Action getActionForToken(const std::string &token) {
95 for (const Exception &exception : _exceptions) {
96 if (exception.value == token)
97 return exception.action;
98 }
99 return Action::ASSIGN_VALUE;
100 }
101
102protected:
103 bool _used;
104 std::string _name;
105 std::string _type;
106 char _valueSeparator;
107 std::vector<Exception> _exceptions;
108};
109/**
110 * @brief Store all the advanced import parameters for the CSV file.
111 *
112 * Store the information about columns and rows to import.
113 * Use this object to configure the import process of a CSVImportGraph object.
114 **/
115class TLP_QT_SCOPE CSVImportParameters {
116public:
117 CSVImportParameters(unsigned int fromLine = 0, unsigned int toLine = UINT_MAX,
118 const std::vector<CSVColumn *> &columns = std::vector<CSVColumn *>());
119 virtual ~CSVImportParameters();
120
121 /**
122 * @brief Return the number of column.
123 **/
124 unsigned int columnNumber() const;
125
126 /**
127 * @brief return true if the column is marked for import
128 **/
129 bool importColumn(unsigned int column) const;
130 /**
131 * @brief Get the column name
132 **/
133 std::string getColumnName(unsigned int column) const;
134 /**
135 * @brief Get the column data type
136 **/
137 std::string getColumnDataType(unsigned int column) const;
138
139 /**
140 * @brief Get the column separator for multiple values
141 **/
142 char getColumnMultiValueSeparator(unsigned int column) const;
143
144 /**
145 * @brief Get the column action according to the given token
146 **/
147 CSVColumn::Action getColumnActionForToken(unsigned int column, const std::string &token) const;
148
149 /**
150 * @brief Return the index of the first line to import
151 **/
152 unsigned int getFirstLineIndex() const;
153 /**
154 * @brief Return the index of the last line to import
155 **/
156 unsigned int getLastLineIndex() const;
157 /**
158 * @brief Return true if the given row is between the first row to import and the last row to
159 *import
160 **/
161 bool importRow(unsigned int row) const;
162
163private:
164 unsigned int fromLine;
165 unsigned int toLine;
166 std::vector<CSVColumn *> columns;
167};
168
169/**
170 * @brief Interface to map CSV rows to graph elements.
171 *
172 * To build the mapping user had to parse the CSV file.
173 * @code
174 * CSVParser *parser;
175 * CSVToGraphDataMapping *mapping;
176 * parser->parse(mapping);
177 * //Now the mapping has been built.
178 * //Get the element for the first row.
179 * pair<tlp::ElementType,unsigned int> element = mapping->getElementForRow(0);
180 * @endcode
181 **/
182class TLP_QT_SCOPE CSVToGraphDataMapping {
183public:
184 virtual ~CSVToGraphDataMapping() {}
185 virtual std::pair<tlp::ElementType, std::vector<unsigned int>>
186 getElementsForRow(const std::vector<std::vector<std::string>> &tokens) = 0;
187 virtual void init(unsigned int rowNumber) = 0;
188};
189
190/**
191 * @brief Abstract class handling node or edge mapping between a CSV column and a graph property.
192 *
193 * Be sure there is a property with the given name in the graph or an error will occur.
194 * Automatically handle CSV file parsing just implements the buildIndexForRow function to fill the
195 *rowToGraphId map with the right graph element.
196 **/
197class TLP_QT_SCOPE AbstractCSVToGraphDataMapping : public CSVToGraphDataMapping {
198public:
199 AbstractCSVToGraphDataMapping(tlp::Graph *graph, tlp::ElementType type,
200 const std::vector<unsigned int> &columnIds,
201 const std::vector<std::string> &propertyNames);
202 ~AbstractCSVToGraphDataMapping() override {}
203
204 void init(unsigned int rowNumber) override;
205 std::pair<tlp::ElementType, std::vector<unsigned int>>
206 getElementsForRow(const std::vector<std::vector<std::string>> &tokens) override;
207
208protected:
209 /**
210 * @brief Create a new element if no elements for the given row was found.
211 * @return Return the graph element id or UINT_MAX if no new element is created.
212 **/
213 virtual unsigned int buildIndexForRow(unsigned int row, const std::vector<std::string> &keys) = 0;
214
215protected:
216 std::unordered_map<std::string, unsigned int> valueToId;
217 tlp::Graph *graph;
218 tlp::ElementType type;
219 std::vector<unsigned int> columnIds;
220 std::vector<tlp::PropertyInterface *> keyProperties;
221};
222/**
223 * @brief Map each row of the CSV file on a new node.
224 **/
225class TLP_QT_SCOPE CSVToNewNodeIdMapping : public CSVToGraphDataMapping {
226public:
227 CSVToNewNodeIdMapping(tlp::Graph *graph);
228 void init(unsigned int rowNumber) override;
229 std::pair<tlp::ElementType, std::vector<unsigned int>>
230 getElementsForRow(const std::vector<std::vector<std::string>> &tokens) override;
231
232private:
233 tlp::Graph *graph;
234};
235
236/**
237 * @brief Try to map CSV file rows to nodes according to value between a CSV column and a graph
238 *property.
239 *
240 * Be sure there is a property with the given name in the graph before using it.
241 **/
242class TLP_QT_SCOPE CSVToGraphNodeIdMapping : public AbstractCSVToGraphDataMapping {
243public:
244 /**
245 * @param graph The graph where the nodes will be searched.
246 * @param columnIndex The index of the column with the ids in the CSV file.
247 * @param propertyName The name of the property to search ids.
248 * @param firstRow The first row to search ids.
249 * @param lastRow The last row to search ids.
250 * @param createNode If set to true if there is no node for an id in the CSV file a new node will
251 *be created for this id.
252 **/
253 CSVToGraphNodeIdMapping(tlp::Graph *graph, const std::vector<unsigned int> &columnIds,
254 const std::vector<std::string> &propertyNames, bool createNode = false);
255 void init(unsigned int rowNumber) override;
256
257protected:
258 unsigned int buildIndexForRow(unsigned int row, const std::vector<std::string> &keys) override;
259
260private:
261 bool createMissingNodes;
262};
263/**
264 * @brief Try to map CSV file rows to edges according to value between a CSV column and a graph
265 *property.
266 *
267 * Be sure there is a property with the given name in the graph before using it.
268 **/
269class TLP_QT_SCOPE CSVToGraphEdgeIdMapping : public AbstractCSVToGraphDataMapping {
270public:
271 /**
272 * @param graph The graph where the edges will be searched.
273 * @param columnIndex The index of the column with the ids in the CSV file.
274 * @param propertyName The name of the property to search ids.
275 * @param firstRow The first row to search ids.
276 * @param lastRow The last row to search ids.
277 **/
278 CSVToGraphEdgeIdMapping(tlp::Graph *graph, const std::vector<unsigned int> &columnIds,
279 const std::vector<std::string> &propertyNames);
280
281protected:
282 unsigned int buildIndexForRow(unsigned int row, const std::vector<std::string> &keys) override;
283};
284
285/**
286 * @brief Try to map CSV file rows to edges according to edge source and destination.
287 *
288 * For each row in the CSV file create an edge in the graph between source and destination nodes.
289 *Find source node by comparing id in the source CSV column and destination node by comparing id in
290 *the destination CSV column.
291 **/
292class TLP_QT_SCOPE CSVToGraphEdgeSrcTgtMapping : public CSVToGraphDataMapping {
293public:
294 /**
295 * @param graph The graph where the edges will be searched.
296 * @param srcColumnIndex The index of the column with the source node id in the CSV file.
297 * @param tgtColumnIndex The index of the column with the taret node id in the CSV file.
298 * @param srcPropertyName The name of the property to search source node id.
299 * @param tgtPropertyName The name of the property to search target node id.
300 * @param firstRow The first row to search ids.
301 * @param lastRow The last row to search ids.
302 * @param createMissinElements If true create source node, destination node if one of them is not
303 *found in the graph.
304 **/
305 CSVToGraphEdgeSrcTgtMapping(tlp::Graph *graph, const std::vector<unsigned int> &srcColumnIds,
306 const std::vector<unsigned int> &tgtColumnIds,
307 const std::vector<std::string> &srcPropNames,
308 const std::vector<std::string> &tgtPropNames,
309 bool createMissinElements = false);
310 std::pair<tlp::ElementType, unsigned int> getElementForRow(unsigned int row);
311 void init(unsigned int lineNumbers) override;
312 std::pair<tlp::ElementType, std::vector<unsigned int>>
313 getElementsForRow(const std::vector<std::vector<std::string>> &tokens) override;
314
315private:
316 tlp::Graph *graph;
317 std::unordered_map<std::string, unsigned int> srcValueToId;
318 std::unordered_map<std::string, unsigned int> tgtValueToId;
319 std::vector<unsigned int> srcColumnIds;
320 std::vector<unsigned int> tgtColumnIds;
321 std::vector<tlp::PropertyInterface *> srcProperties;
322 std::vector<tlp::PropertyInterface *> tgtProperties;
323 bool sameSrcTgtProperties;
324 bool buildMissingElements;
325};
326
327/**
328 * @brief Interface to perform mapping between CSV columns and graph properties during the CSV
329 *import process.
330 *
331 **/
332class TLP_QT_SCOPE CSVImportColumnToGraphPropertyMapping {
333public:
334 virtual ~CSVImportColumnToGraphPropertyMapping() {}
335 /**
336 * @brief Return the property corresponding to the column index.
337 * @param column The index of the column.
338 * @param token The current token. May be needed to determine column data type.
339 *
340 * The token parameter is used to guess property type if needed.
341 **/
342 virtual tlp::PropertyInterface *getPropertyInterface(unsigned int column,
343 const std::string &token) = 0;
344};
345
346/**
347 * @brief Proxy to handle all the properties operations like access, creation, data type detection
348 *during the CSV parsing process.
349 *
350 * Try to guess the type of the property in function of the first token
351 * if user don't tell which type the property is.
352 **/
353class TLP_QT_SCOPE CSVImportColumnToGraphPropertyMappingProxy
354 : public CSVImportColumnToGraphPropertyMapping {
355public:
356 CSVImportColumnToGraphPropertyMappingProxy(tlp::Graph *graph,
357 const CSVImportParameters &importParameters,
358 QWidget *parent = nullptr);
359 ~CSVImportColumnToGraphPropertyMappingProxy() override {}
360 tlp::PropertyInterface *getPropertyInterface(unsigned int column,
361 const std::string &token) override;
362
363private:
364 tlp::Graph *graph;
365 CSVImportParameters importParameters;
366 std::unordered_map<unsigned int, tlp::PropertyInterface *> propertiesBuffer;
367 QMessageBox::StandardButton overwritePropertiesButton;
368 QWidget *parent;
369 PropertyInterface *generateApproximateProperty(const std::string &name, const std::string &type);
370};
371
372/**
373 * @brief Manage all the CSV import process. Use the mapping object to find the graph element in
374 *function of the row and the propertiesManager to find the property corresponding to the column.
375 * The import parameters are used to filter the rows and the columns to import.
376 **/
377class TLP_QT_SCOPE CSVGraphImport : public tlp::CSVContentHandler {
378public:
379 CSVGraphImport(CSVToGraphDataMapping *mapping,
380 CSVImportColumnToGraphPropertyMapping *propertiesManager,
381 const CSVImportParameters &importParameters);
382 ~CSVGraphImport() override;
383 bool begin() override;
384 bool line(unsigned int row, const std::vector<CSVToken> &lineTokens) override;
385 bool end(unsigned int rowNumber, unsigned int columnNumber) override;
386
387protected:
388 CSVToGraphDataMapping *mapping;
389 CSVImportColumnToGraphPropertyMapping *propertiesManager;
390 CSVImportParameters importParameters;
391};
392} // namespace tlp
393#endif // CSVGRAPHIMPORT_H
394///@endcond
PropertyInterface describes the interface of a graph property.
ElementType
Definition: Graph.h:50