12#ifndef MLPACK_CORE_DATA_LOAD_CSV_HPP
13#define MLPACK_CORE_DATA_LOAD_CSV_HPP
15#include <boost/spirit/include/qi.hpp>
16#include <boost/algorithm/string/trim.hpp>
54 template<
typename T,
typename PolicyType>
55 void Load(arma::Mat<T> &inout,
57 const bool transpose =
true)
62 TransposeParse(inout, infoSet);
64 NonTransposeParse(inout, infoSet);
77 template<
typename T,
typename MapPolicy>
80 using namespace boost::spirit;
89 inFile.seekg(0, std::ios::beg);
95 while (std::getline(inFile, line))
103 inFile.seekg(0, std::ios::beg);
106 while (std::getline(inFile, line))
115 auto findColSize = [&cols](iter_type) { ++cols; };
116 qi::parse(line.begin(), line.end(),
117 stringRule[findColSize] % delimiterRule);
122 if (MapPolicy::NeedsFirstPass)
125 auto firstPassMap = [&](
const iter_type& iter)
127 std::string str(iter.begin(), iter.end());
130 info.template MapFirstPass<T>(std::move(str), rows - 1);
134 qi::parse(line.begin(), line.end(),
135 stringRule[firstPassMap] % delimiterRule);
150 template<
typename T,
typename MapPolicy>
155 using namespace boost::spirit;
164 inFile.seekg(0, std::ios::beg);
169 while (std::getline(inFile, line))
178 auto findRowSize = [&rows](iter_type) { ++rows; };
179 qi::parse(line.begin(), line.end(),
180 stringRule[findRowSize] % delimiterRule);
187 if (MapPolicy::NeedsFirstPass)
192 auto firstPassMap = [&](
const iter_type& iter)
194 std::string str(iter.begin(), iter.end());
197 info.template MapFirstPass<T>(std::move(str), dim++);
201 qi::parse(line.begin(), line.end(),
202 stringRule[firstPassMap] % delimiterRule);
208 using iter_type = boost::iterator_range<std::string::iterator>;
222 template<
typename T,
typename PolicyType>
223 void NonTransposeParse(arma::Mat<T>& inout,
226 using namespace boost::spirit;
230 GetMatrixSize<T>(rows, cols, infoSet);
233 inout.set_size(rows, cols);
240 inFile.seekg(0, std::ios::beg);
242 auto setCharClass = [&](iter_type
const &iter)
244 std::string str(iter.begin(), iter.end());
251 inout(row, col++) = infoSet.template MapString<T>(std::move(str), row);
254 while (std::getline(inFile, line))
261 const bool canParse = qi::parse(line.begin(), line.end(),
262 stringRule[setCharClass] % delimiterRule);
267 std::ostringstream oss;
268 oss <<
"LoadCSV::NonTransposeParse(): wrong number of dimensions ("
269 << col <<
") on line " << row <<
"; should be " << cols
271 throw std::runtime_error(oss.str());
276 std::ostringstream oss;
277 oss <<
"LoadCSV::NonTransposeParse(): parsing error on line " << col
279 throw std::runtime_error(oss.str());
292 template<
typename T,
typename PolicyType>
293 void TransposeParse(arma::Mat<T>& inout, DatasetMapper<PolicyType>& infoSet)
295 using namespace boost::spirit;
299 GetTransposeMatrixSize<T>(rows, cols, infoSet);
302 inout.set_size(rows, cols);
309 inFile.seekg(0, std::ios::beg);
315 auto parseString = [&](iter_type
const &iter)
318 std::string str(iter.begin(), iter.end());
321 inout(row, col) = infoSet.template MapString<T>(std::move(str), row);
325 while (std::getline(inFile, line))
335 const bool canParse = qi::parse(line.begin(), line.end(),
336 stringRule[parseString] % delimiterRule);
341 std::ostringstream oss;
342 oss <<
"LoadCSV::TransposeParse(): wrong number of dimensions (" << row
343 <<
") on line " << col <<
"; should be " << rows <<
" dimensions.";
344 throw std::runtime_error(oss.str());
349 std::ostringstream oss;
350 oss <<
"LoadCSV::TransposeParse(): parsing error on line " << col
352 throw std::runtime_error(oss.str());
361 boost::spirit::qi::rule<std::string::iterator, iter_type()> stringRule;
363 boost::spirit::qi::rule<std::string::iterator, iter_type()> delimiterRule;
366 std::string extension;
368 std::string filename;
370 std::ifstream inFile;
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...
void SetDimensionality(const size_t dimensionality)
Set the dimensionality of an existing DatasetMapper object.
Load the csv file.This class use boost::spirit to implement the parser, please refer to following lin...
LoadCSV(const std::string &file)
Construct the LoadCSV object on the given file.
void GetTransposeMatrixSize(size_t &rows, size_t &cols, DatasetMapper< MapPolicy > &info)
Peek at the file to determine the number of rows and columns in the matrix, assuming a transposed mat...
void GetMatrixSize(size_t &rows, size_t &cols, DatasetMapper< MapPolicy > &info)
Peek at the file to determine the number of rows and columns in the matrix, assuming a non-transposed...
void Load(arma::Mat< T > &inout, DatasetMapper< PolicyType > &infoSet, const bool transpose=true)
Load the file into the given matrix with the given DatasetMapper object.
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
Linear algebra utility functions, generally performed on matrices or vectors.