mlpack 3.4.2
dataset_mapper.hpp
Go to the documentation of this file.
1
15#ifndef MLPACK_CORE_DATA_DATASET_INFO_HPP
16#define MLPACK_CORE_DATA_DATASET_INFO_HPP
17
18#include <mlpack/prereqs.hpp>
19#include <unordered_map>
20
22
23namespace mlpack {
24namespace data {
25
40template<typename PolicyType, typename InputType = std::string>
42{
43 public:
49 explicit DatasetMapper(const size_t dimensionality = 0);
50
56 explicit DatasetMapper(PolicyType& policy, const size_t dimensionality = 0);
57
64 void SetDimensionality(const size_t dimensionality);
65
73 template<typename T>
74 void MapFirstPass(const InputType& input, const size_t dimension);
75
86 template<typename T>
87 T MapString(const InputType& input,
88 const size_t dimension);
89
108 template<typename T>
109 const InputType& UnmapString(const T value,
110 const size_t dimension,
111 const size_t unmappingIndex = 0) const;
112
116 template<typename T>
117 size_t NumUnmappings(const T value, const size_t dimension) const;
118
128 typename PolicyType::MappedType UnmapValue(const InputType& input,
129 const size_t dimension);
130
132 Datatype Type(const size_t dimension) const;
134 Datatype& Type(const size_t dimension);
135
140 size_t NumMappings(const size_t dimension) const;
141
148 size_t Dimensionality() const;
149
153 template<typename Archive>
154 void serialize(Archive& ar, const unsigned int /* version */)
155 {
156 ar & BOOST_SERIALIZATION_NVP(types);
157 ar & BOOST_SERIALIZATION_NVP(maps);
158 }
159
161 const PolicyType& Policy() const;
162
164 PolicyType& Policy();
166 void Policy(PolicyType&& policy);
167
168 private:
170 std::vector<Datatype> types;
171
172 // Forward mapping type.
173 using ForwardMapType = typename std::unordered_map<InputType, typename
174 PolicyType::MappedType>;
175
176 // Reverse mapping type. Multiple inputs may map to a single output, hence
177 // the need for std::vector.
178 using ReverseMapType = std::unordered_map<typename PolicyType::MappedType,
179 std::vector<InputType>>;
180
181 // Mappings from strings to integers.
182 // Map entries will only exist for dimensions that are categorical.
183 // MapType = map<dimension, pair<bimap<string, MappedType>, numMappings>>
184 using MapType = std::unordered_map<size_t, std::pair<ForwardMapType,
185 ReverseMapType>>;
186
188 MapType maps;
189
191 // mapped to the maps object. It is used in MapString() and MapTokens().
192 PolicyType policy;
193};
194
195// Use typedef to provide backward compatibility
197
198} // namespace data
199} // namespace mlpack
200
201#include "dataset_mapper_impl.hpp"
202
203#endif
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...
Datatype & Type(const size_t dimension)
Modify the type of a given dimension (be careful!).
DatasetMapper(PolicyType &policy, const size_t dimensionality=0)
Create the DatasetMapper object with the given policy and dimensionality.
DatasetMapper(const size_t dimensionality=0)
Create the DatasetMapper object with the given dimensionality.
void Policy(PolicyType &&policy)
Modify (Replace) the policy of the mapper with a new policy.
size_t NumUnmappings(const T value, const size_t dimension) const
Get the number of possible unmappings for a string in a given dimension.
PolicyType::MappedType UnmapValue(const InputType &input, const size_t dimension)
Return the value that corresponds to a given input in a given dimension.
size_t Dimensionality() const
Get the dimensionality of the DatasetMapper object (that is, how many dimensions it has information f...
const InputType & UnmapString(const T value, const size_t dimension, const size_t unmappingIndex=0) const
Return the input that corresponds to a given value in a given dimension.
const PolicyType & Policy() const
Return the policy of the mapper.
PolicyType & Policy()
Modify the policy of the mapper (be careful!).
T MapString(const InputType &input, const size_t dimension)
Given the input and the dimension to which it belongs, return its numeric mapping.
size_t NumMappings(const size_t dimension) const
Get the number of mappings for a particular dimension.
void MapFirstPass(const InputType &input, const size_t dimension)
Preprocessing: during a first pass of the data, pass the input on to the MapPolicy if they are needed...
void SetDimensionality(const size_t dimensionality)
Set the dimensionality of an existing DatasetMapper object.
void serialize(Archive &ar, const unsigned int)
Serialize the dataset information.
Datatype Type(const size_t dimension) const
Return the type of a given dimension (numeric or categorical).
Datatype
The Datatype enum specifies the types of data mlpack algorithms can use.
Definition: datatype.hpp:25
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.