13#ifndef MLPACK_CORE_DATA_STRING_ENCODING_DICTIONARY_HPP
14#define MLPACK_CORE_DATA_STRING_ENCODING_DICTIONARY_HPP
17#include <mlpack/core/boost_backport/boost_backport_string_view.hpp>
18#include <unordered_map>
31template<
typename Token>
36 using MapType = std::unordered_map<Token, size_t>;
48 return mapping.find(token) != mapping.end();
61 size_t size = mapping.size();
63 mapping[std::forward<T>(token)] = ++size;
74 size_t Value(
const Token& token)
const
76 return mapping.at(token);
80 size_t Size()
const {
return mapping.size(); }
96 template<
typename Archive>
99 ar & BOOST_SERIALIZATION_NVP(mapping);
118 boost::hash<boost::string_view>>;
130 for (
const std::string& token : tokens)
131 mapping[token] = other.mapping.at(token);
140 tokens = other.tokens;
143 for (
const std::string& token : tokens)
144 mapping[token] = other.mapping.at(token);
158 bool HasToken(
const boost::string_view token)
const
160 return mapping.find(token) != mapping.end();
172 tokens.emplace_back(token);
174 size_t size = mapping.size();
176 mapping[tokens.back()] = ++size;
187 size_t Value(
const boost::string_view token)
const
189 return mapping.at(token);
193 size_t Size()
const {
return mapping.size(); }
203 const std::deque<std::string>&
Tokens()
const {
return tokens; }
205 std::deque<std::string>&
Tokens() {
return tokens; }
215 template<
typename Archive>
218 size_t numTokens = tokens.size();
220 ar & BOOST_SERIALIZATION_NVP(numTokens);
222 if (Archive::is_loading::value)
224 tokens.resize(numTokens);
226 for (std::string& token : tokens)
228 ar & BOOST_SERIALIZATION_NVP(token);
230 size_t tokenValue = 0;
231 ar & BOOST_SERIALIZATION_NVP(tokenValue);
232 mapping[token] = tokenValue;
235 if (Archive::is_saving::value)
237 for (std::string& token : tokens)
239 ar & BOOST_SERIALIZATION_NVP(token);
241 size_t tokenValue = mapping.at(token);
242 ar & BOOST_SERIALIZATION_NVP(tokenValue);
249 std::deque<std::string> tokens;
260 using MapType = std::array<size_t, 1 << CHAR_BIT>;
280 return mapping[token] > 0;
293 mapping[token] = ++size;
307 return mapping[token];
330 template<
typename Archive>
333 ar & BOOST_SERIALIZATION_NVP(mapping);
334 ar & BOOST_SERIALIZATION_NVP(size);
boost::string_view TokenType
The type of the token that the dictionary stores.
StringEncodingDictionary(StringEncodingDictionary &&other)=default
Standard move constructor.
const MapType & Mapping() const
Get the mapping.
size_t Value(const boost::string_view token) const
The function returns the label assigned to the given token.
std::deque< std::string > & Tokens()
Modify the tokens.
bool HasToken(const boost::string_view token) const
The function returns true if the dictionary contains the given token.
size_t Size() const
Get the size of the dictionary.
const std::deque< std::string > & Tokens() const
Get the tokens.
StringEncodingDictionary & operator=(const StringEncodingDictionary &other)
Copy the class from the given object.
StringEncodingDictionary()=default
Construct the default class.
size_t AddToken(const boost::string_view token)
The function adds the given token to the dictionary and assigns a label to the token.
std::unordered_map< boost::string_view, size_t, boost::hash< boost::string_view > > MapType
A convenient alias for the internal type of the map.
StringEncodingDictionary & operator=(StringEncodingDictionary &&other)=default
Standard move assignment operator.
void Clear()
Clear the dictionary.
StringEncodingDictionary(const StringEncodingDictionary &other)
Copy the class from the given object.
MapType & Mapping()
Modify the mapping.
void serialize(Archive &ar, const unsigned int)
Serialize the class to the given archive.
const MapType & Mapping() const
Get the mapping.
size_t Size() const
Get the size of the dictionary.
int TokenType
The type of the token that the dictionary stores.
StringEncodingDictionary()
Construct the default class.
void Clear()
Clear the dictionary.
size_t AddToken(const int token)
The function adds the given token to the dictionary and assigns a label to the token.
size_t Value(const int token) const
The function returns the label assigned to the given token.
std::array< size_t, 1<< CHAR_BIT > MapType
A convenient alias for the internal type of the map.
MapType & Mapping()
Modify the mapping.
void serialize(Archive &ar, const unsigned int)
Serialize the class to the given archive.
bool HasToken(const int token) const
The function returns true if the dictionary contains the given token.
This class provides a dictionary interface for the purpose of string encoding.
bool HasToken(const Token &token) const
The function returns true if the dictionary contains the given token.
std::unordered_map< Token, size_t > MapType
A convenient alias for the internal type of the map.
const MapType & Mapping() const
Get the mapping.
Token TokenType
The type of the token that the dictionary stores.
size_t Size() const
Get the size of the dictionary.
void Clear()
Clear the dictionary.
size_t AddToken(T &&token)
The function adds the given token to the dictionary and assigns a label to the token.
MapType & Mapping()
Modify the mapping.
void serialize(Archive &ar, const unsigned int)
Serialize the class to the given archive.
size_t Value(const Token &token) const
The function returns the label assigned to the given token.
Set the serialization version of the adaboost class.
Linear algebra utility functions, generally performed on matrices or vectors.
The core includes that mlpack expects; standard C++ includes and Armadillo.