mlpack 3.4.2
string_encoding.hpp
Go to the documentation of this file.
1
13#ifndef MLPACK_CORE_DATA_STRING_ENCODING_HPP
14#define MLPACK_CORE_DATA_STRING_ENCODING_HPP
15
16#include <mlpack/prereqs.hpp>
17#include <mlpack/core/boost_backport/boost_backport_string_view.hpp>
20#include <vector>
21
22namespace mlpack {
23namespace data {
24
33template<typename EncodingPolicyType,
34 typename DictionaryType>
36{
37 public:
42 template<typename ... ArgTypes>
43 StringEncoding(ArgTypes&& ... args);
44
50 StringEncoding(EncodingPolicyType encodingPolicy);
51
56
59
62
65
68
84 template<typename TokenizerType>
85 void CreateMap(const std::string& input,
86 const TokenizerType& tokenizer);
87
91 void Clear();
92
118 template<typename OutputType, typename TokenizerType>
119 void Encode(const std::vector<std::string>& input,
120 OutputType& output,
121 const TokenizerType& tokenizer);
122
124 const DictionaryType& Dictionary() const { return dictionary; }
126 DictionaryType& Dictionary() { return dictionary; }
127
129 const EncodingPolicyType& EncodingPolicy() const { return encodingPolicy; }
131 EncodingPolicyType& EncodingPolicy() { return encodingPolicy; }
132
136 template<typename Archive>
137 void serialize(Archive& ar, const unsigned int /* version */);
138
139 private:
168 template<typename OutputType, typename TokenizerType, typename PolicyType>
169 void EncodeHelper(const std::vector<std::string>& input,
170 OutputType& output,
171 const TokenizerType& tokenizer,
172 PolicyType& policy);
173
197 template<typename TokenizerType, typename PolicyType, typename ElemType>
198 void EncodeHelper(const std::vector<std::string>& input,
199 std::vector<std::vector<ElemType>>& output,
200 const TokenizerType& tokenizer,
201 PolicyType& policy,
202 typename std::enable_if<StringEncodingPolicyTraits<
203 PolicyType>::onePassEncoding>::type* = 0);
204
205 private:
207 EncodingPolicyType encodingPolicy;
209 DictionaryType dictionary;
210};
211
212} // namespace data
213} // namespace mlpack
214
215// Include implementation.
216#include "string_encoding_impl.hpp"
217
218#endif
The class translates a set of strings into numbers using various encoding algorithms.
DictionaryType & Dictionary()
Modify the dictionary.
void CreateMap(const std::string &input, const TokenizerType &tokenizer)
Initialize the dictionary using the given corpus.
StringEncoding(const StringEncoding &)
Default copy-constructor.
void Encode(const std::vector< std::string > &input, OutputType &output, const TokenizerType &tokenizer)
Encode the given text and write the result to the given output.
StringEncoding(ArgTypes &&... args)
Pass the given arguments to the policy constructor and create the StringEncoding object using the pol...
StringEncoding & operator=(StringEncoding &&)=default
Default move assignment operator.
StringEncoding(StringEncoding &)
A variant of the copy constructor for non-constant objects.
StringEncoding(StringEncoding &&)
Default move-constructor.
const DictionaryType & Dictionary() const
Return the dictionary.
EncodingPolicyType & EncodingPolicy()
Modify the encoding policy object.
void Clear()
Clear the dictionary.
StringEncoding & operator=(const StringEncoding &)=default
Default copy assignment operator.
StringEncoding(EncodingPolicyType encodingPolicy)
Construct the class from the given encoding policy.
const EncodingPolicyType & EncodingPolicy() const
Return the encoding policy object.
void serialize(Archive &ar, const unsigned int)
Serialize the class to the given archive.
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.
This is a template struct that provides some information about various encoding policies.