mlpack 3.4.2
split_by_any_of.hpp
Go to the documentation of this file.
1
13#ifndef MLPACK_CORE_DATA_TOKENIZERS_SPLIT_BY_ANY_OF_HPP
14#define MLPACK_CORE_DATA_TOKENIZERS_SPLIT_BY_ANY_OF_HPP
15
16#include <mlpack/prereqs.hpp>
17#include <mlpack/core/boost_backport/boost_backport_string_view.hpp>
18#include <array>
19
20namespace mlpack {
21namespace data {
22
27{
28 public:
30 using TokenType = boost::string_view;
31
33 using MaskType = std::array<bool, 1 << CHAR_BIT>;
34
40 SplitByAnyOf(const boost::string_view delimiters)
41 {
42 mask.fill(false);
43
44 for (char symbol : delimiters)
45 mask[static_cast<unsigned char>(symbol)] = true;
46 }
47
54 boost::string_view operator()(boost::string_view& str) const
55 {
56 boost::string_view retval;
57
58 while (retval.empty())
59 {
60 const std::size_t pos = FindFirstDelimiter(str);
61 if (pos == str.npos)
62 {
63 retval = str;
64 str.clear();
65 return retval;
66 }
67 retval = str.substr(0, pos);
68 str.remove_prefix(pos + 1);
69 }
70 return retval;
71 }
72
78 static bool IsTokenEmpty(const boost::string_view token)
79 {
80 return token.empty();
81 }
82
84 const MaskType& Mask() const { return mask; }
86 MaskType& Mask() { return mask; }
87
88 private:
96 size_t FindFirstDelimiter(const boost::string_view str) const
97 {
98 for (size_t pos = 0; pos < str.size(); pos++)
99 {
100 if (mask[static_cast<unsigned char>(str[pos])])
101 return pos;
102 }
103 return str.npos;
104 }
105
106 private:
108 MaskType mask;
109};
110
111} // namespace data
112} // namespace mlpack
113
114#endif
The SplitByAnyOf class tokenizes a string using a set of delimiters.
boost::string_view TokenType
The type of the token which the tokenizer extracts.
boost::string_view operator()(boost::string_view &str) const
The function extracts the first token from the given string view and then removes the prefix containi...
std::array< bool, 1<< CHAR_BIT > MaskType
A convenient alias for the mask type.
const MaskType & Mask() const
Return the mask.
SplitByAnyOf(const boost::string_view delimiters)
Construct the object from the given delimiters.
MaskType & Mask()
Modify the mask.
static bool IsTokenEmpty(const boost::string_view token)
The function returns true if the given token is empty.
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.