mlpack 3.4.2
string_encoding_dictionary.hpp
Go to the documentation of this file.
1
13#ifndef MLPACK_CORE_DATA_STRING_ENCODING_DICTIONARY_HPP
14#define MLPACK_CORE_DATA_STRING_ENCODING_DICTIONARY_HPP
15
16#include <mlpack/prereqs.hpp>
17#include <mlpack/core/boost_backport/boost_backport_string_view.hpp>
18#include <unordered_map>
19#include <deque>
20#include <array>
21
22namespace mlpack {
23namespace data {
24
31template<typename Token>
33{
34 public:
36 using MapType = std::unordered_map<Token, size_t>;
37
39 using TokenType = Token;
40
46 bool HasToken(const Token& token) const
47 {
48 return mapping.find(token) != mapping.end();
49 }
50
58 template<typename T>
59 size_t AddToken(T&& token)
60 {
61 size_t size = mapping.size();
62
63 mapping[std::forward<T>(token)] = ++size;
64
65 return size;
66 }
67
74 size_t Value(const Token& token) const
75 {
76 return mapping.at(token);
77 }
78
80 size_t Size() const { return mapping.size(); }
81
83 void Clear()
84 {
85 mapping.clear();
86 }
87
89 const MapType& Mapping() const { return mapping; }
91 MapType& Mapping() { return mapping; }
92
96 template<typename Archive>
97 void serialize(Archive& ar, const unsigned int /* version */)
98 {
99 ar & BOOST_SERIALIZATION_NVP(mapping);
100 }
101
102 private:
104 MapType mapping;
105};
106
107/*
108 * Specialization of the StringEncodingDictionary class for boost::string_view.
109 */
110template<>
112{
113 public:
115 using MapType = std::unordered_map<
116 boost::string_view,
117 size_t,
118 boost::hash<boost::string_view>>;
119
121 using TokenType = boost::string_view;
122
125
128 tokens(other.tokens)
129 {
130 for (const std::string& token : tokens)
131 mapping[token] = other.mapping.at(token);
132 }
133
136
139 {
140 tokens = other.tokens;
141 mapping.clear();
142
143 for (const std::string& token : tokens)
144 mapping[token] = other.mapping.at(token);
145
146 return *this;
147 }
148
151 StringEncodingDictionary&& other) = default;
152
158 bool HasToken(const boost::string_view token) const
159 {
160 return mapping.find(token) != mapping.end();
161 }
162
170 size_t AddToken(const boost::string_view token)
171 {
172 tokens.emplace_back(token);
173
174 size_t size = mapping.size();
175
176 mapping[tokens.back()] = ++size;
177
178 return size;
179 }
180
187 size_t Value(const boost::string_view token) const
188 {
189 return mapping.at(token);
190 }
191
193 size_t Size() const { return mapping.size(); }
194
196 void Clear()
197 {
198 mapping.clear();
199 tokens.clear();
200 }
201
203 const std::deque<std::string>& Tokens() const { return tokens; }
205 std::deque<std::string>& Tokens() { return tokens; }
206
208 const MapType& Mapping() const { return mapping; }
210 MapType& Mapping() { return mapping; }
211
215 template<typename Archive>
216 void serialize(Archive& ar, const unsigned int /* version */)
217 {
218 size_t numTokens = tokens.size();
219
220 ar & BOOST_SERIALIZATION_NVP(numTokens);
221
222 if (Archive::is_loading::value)
223 {
224 tokens.resize(numTokens);
225
226 for (std::string& token : tokens)
227 {
228 ar & BOOST_SERIALIZATION_NVP(token);
229
230 size_t tokenValue = 0;
231 ar & BOOST_SERIALIZATION_NVP(tokenValue);
232 mapping[token] = tokenValue;
233 }
234 }
235 if (Archive::is_saving::value)
236 {
237 for (std::string& token : tokens)
238 {
239 ar & BOOST_SERIALIZATION_NVP(token);
240
241 size_t tokenValue = mapping.at(token);
242 ar & BOOST_SERIALIZATION_NVP(tokenValue);
243 }
244 }
245 }
246
247 private:
249 std::deque<std::string> tokens;
250
252 MapType mapping;
253};
254
255template<>
257{
258 public:
260 using MapType = std::array<size_t, 1 << CHAR_BIT>;
261
263 using TokenType = int;
264
267 size(0)
268 {
269 mapping.fill(0);
270 }
271
278 bool HasToken(const int token) const
279 {
280 return mapping[token] > 0;
281 }
282
291 size_t AddToken(const int token)
292 {
293 mapping[token] = ++size;
294
295 return size;
296 }
297
305 size_t Value(const int token) const
306 {
307 return mapping[token];
308 }
309
311 size_t Size() const
312 {
313 return size;
314 }
315
317 void Clear()
318 {
319 mapping.fill(0);
320 }
321
323 const MapType& Mapping() const { return mapping; }
325 MapType& Mapping() { return mapping; }
326
330 template<typename Archive>
331 void serialize(Archive& ar, const unsigned int /* version */)
332 {
333 ar & BOOST_SERIALIZATION_NVP(mapping);
334 ar & BOOST_SERIALIZATION_NVP(size);
335 }
336
337 private:
339 MapType mapping;
340
342 size_t size;
343};
344
345} // namespace data
346} // namespace mlpack
347
348#endif
boost::string_view TokenType
The type of the token that the dictionary stores.
StringEncodingDictionary(StringEncodingDictionary &&other)=default
Standard move constructor.
size_t Value(const boost::string_view token) const
The function returns the label assigned to the given token.
bool HasToken(const boost::string_view token) const
The function returns true if the dictionary contains the given token.
const std::deque< std::string > & Tokens() const
Get the tokens.
StringEncodingDictionary & operator=(const StringEncodingDictionary &other)
Copy the class from the given object.
StringEncodingDictionary()=default
Construct the default class.
size_t AddToken(const boost::string_view token)
The function adds the given token to the dictionary and assigns a label to the token.
std::unordered_map< boost::string_view, size_t, boost::hash< boost::string_view > > MapType
A convenient alias for the internal type of the map.
StringEncodingDictionary & operator=(StringEncodingDictionary &&other)=default
Standard move assignment operator.
StringEncodingDictionary(const StringEncodingDictionary &other)
Copy the class from the given object.
void serialize(Archive &ar, const unsigned int)
Serialize the class to the given archive.
const MapType & Mapping() const
Get the mapping.
size_t Size() const
Get the size of the dictionary.
int TokenType
The type of the token that the dictionary stores.
size_t AddToken(const int token)
The function adds the given token to the dictionary and assigns a label to the token.
size_t Value(const int token) const
The function returns the label assigned to the given token.
std::array< size_t, 1<< CHAR_BIT > MapType
A convenient alias for the internal type of the map.
void serialize(Archive &ar, const unsigned int)
Serialize the class to the given archive.
bool HasToken(const int token) const
The function returns true if the dictionary contains the given token.
This class provides a dictionary interface for the purpose of string encoding.
bool HasToken(const Token &token) const
The function returns true if the dictionary contains the given token.
std::unordered_map< Token, size_t > MapType
A convenient alias for the internal type of the map.
const MapType & Mapping() const
Get the mapping.
Token TokenType
The type of the token that the dictionary stores.
size_t Size() const
Get the size of the dictionary.
size_t AddToken(T &&token)
The function adds the given token to the dictionary and assigns a label to the token.
void serialize(Archive &ar, const unsigned int)
Serialize the class to the given archive.
size_t Value(const Token &token) const
The function returns the label assigned to the given token.
Set the serialization version of the adaboost class.
Definition: adaboost.hpp:198
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.