mlpack 3.4.2
bag_of_words_encoding_policy.hpp
Go to the documentation of this file.
1
13#ifndef MLPACK_CORE_DATA_STR_ENCODING_POLICIES_BAG_OF_WORDS_ENCODING_POLICY_HPP
14#define MLPACK_CORE_DATA_STR_ENCODING_POLICIES_BAG_OF_WORDS_ENCODING_POLICY_HPP
15
16#include <mlpack/prereqs.hpp>
19
20namespace mlpack {
21namespace data {
22
36{
37 public:
41 static void Reset()
42 {
43 // Nothing to do.
44 }
45
58 template<typename MatType>
59 static void InitMatrix(MatType& output,
60 const size_t datasetSize,
61 const size_t /* maxNumTokens */,
62 const size_t dictionarySize)
63 {
64 output.zeros(dictionarySize, datasetSize);
65 }
66
81 template<typename ElemType>
82 static void InitMatrix(std::vector<std::vector<ElemType>>& output,
83 const size_t datasetSize,
84 const size_t /* maxNumTokens */,
85 const size_t dictionarySize)
86 {
87 output.resize(datasetSize, std::vector<ElemType>(dictionarySize));
88 }
89
102 template<typename MatType>
103 static void Encode(MatType& output,
104 const size_t value,
105 const size_t line,
106 const size_t /* index */)
107 {
108 // The labels are assigned sequentially starting from one.
109 output(value - 1, line) += 1;
110 }
111
127 template<typename ElemType>
128 static void Encode(std::vector<std::vector<ElemType>>& output,
129 const size_t value,
130 const size_t line,
131 const size_t /* index */)
132 {
133 // The labels are assigned sequentially starting from one.
134 output[line][value - 1] += 1;
135 }
136
144 static void PreprocessToken(size_t /* line */,
145 size_t /* index */,
146 size_t /* value */)
147 { }
148
152 template<typename Archive>
153 void serialize(Archive& /* ar */, const unsigned int /* version */)
154 {
155 // Nothing to serialize.
156 }
157};
158
165template<typename TokenType>
168} // namespace data
169} // namespace mlpack
170
171#endif
Definition of the BagOfWordsEncodingPolicy class.
static void InitMatrix(std::vector< std::vector< ElemType > > &output, const size_t datasetSize, const size_t, const size_t dictionarySize)
The function initializes the output matrix.
static void Encode(MatType &output, const size_t value, const size_t line, const size_t)
The function performs the bag of words encoding algorithm i.e.
static void Encode(std::vector< std::vector< ElemType > > &output, const size_t value, const size_t line, const size_t)
The function performs the bag of words encoding algorithm i.e.
static void InitMatrix(MatType &output, const size_t datasetSize, const size_t, const size_t dictionarySize)
The function initializes the output matrix.
static void Reset()
Clear the necessary internal variables.
void serialize(Archive &, const unsigned int)
Serialize the class to the given archive.
static void PreprocessToken(size_t, size_t, size_t)
The function is not used by the bag of words encoding policy.
This class provides a dictionary interface for the purpose of string encoding.
The class translates a set of strings into numbers using various encoding algorithms.
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.