13#ifndef MLPACK_CORE_DATA_STRING_ENCODING_HPP
14#define MLPACK_CORE_DATA_STRING_ENCODING_HPP
17#include <mlpack/core/boost_backport/boost_backport_string_view.hpp>
33template<
typename EncodingPolicyType,
34 typename DictionaryType>
42 template<
typename ... ArgTypes>
84 template<
typename TokenizerType>
86 const TokenizerType& tokenizer);
118 template<
typename OutputType,
typename TokenizerType>
119 void Encode(
const std::vector<std::string>& input,
121 const TokenizerType& tokenizer);
124 const DictionaryType&
Dictionary()
const {
return dictionary; }
136 template<
typename Archive>
168 template<
typename OutputType,
typename TokenizerType,
typename PolicyType>
169 void EncodeHelper(
const std::vector<std::string>& input,
171 const TokenizerType& tokenizer,
197 template<
typename TokenizerType,
typename PolicyType,
typename ElemType>
198 void EncodeHelper(
const std::vector<std::string>& input,
199 std::vector<std::vector<ElemType>>& output,
200 const TokenizerType& tokenizer,
203 PolicyType>::onePassEncoding>::type* = 0);
207 EncodingPolicyType encodingPolicy;
209 DictionaryType dictionary;
216#include "string_encoding_impl.hpp"
The class translates a set of strings into numbers using various encoding algorithms.
DictionaryType & Dictionary()
Modify the dictionary.
void CreateMap(const std::string &input, const TokenizerType &tokenizer)
Initialize the dictionary using the given corpus.
StringEncoding(const StringEncoding &)
Default copy-constructor.
void Encode(const std::vector< std::string > &input, OutputType &output, const TokenizerType &tokenizer)
Encode the given text and write the result to the given output.
StringEncoding(ArgTypes &&... args)
Pass the given arguments to the policy constructor and create the StringEncoding object using the pol...
StringEncoding & operator=(StringEncoding &&)=default
Default move assignment operator.
StringEncoding(StringEncoding &)
A variant of the copy constructor for non-constant objects.
StringEncoding(StringEncoding &&)
Default move-constructor.
const DictionaryType & Dictionary() const
Return the dictionary.
EncodingPolicyType & EncodingPolicy()
Modify the encoding policy object.
void Clear()
Clear the dictionary.
StringEncoding & operator=(const StringEncoding &)=default
Default copy assignment operator.
StringEncoding(EncodingPolicyType encodingPolicy)
Construct the class from the given encoding policy.
const EncodingPolicyType & EncodingPolicy() const
Return the encoding policy object.
void serialize(Archive &ar, const unsigned int)
Serialize the class to the given archive.
Linear algebra utility functions, generally performed on matrices or vectors.
The core includes that mlpack expects; standard C++ includes and Armadillo.
This is a template struct that provides some information about various encoding policies.