mlpack 3.4.2
increment_policy.hpp
Go to the documentation of this file.
1
12#ifndef MLPACK_CORE_DATA_MAP_POLICIES_INCREMENT_POLICY_HPP
13#define MLPACK_CORE_DATA_MAP_POLICIES_INCREMENT_POLICY_HPP
14
15#include <mlpack/prereqs.hpp>
16#include <unordered_map>
18
19namespace mlpack {
20namespace data {
21
34{
35 public:
36 IncrementPolicy(const bool forceAllMappings = false) :
37 forceAllMappings(forceAllMappings) { }
38
39 // typedef of MappedType
40 using MappedType = size_t;
41
43 static const bool NeedsFirstPass = true;
44
48 template<typename T, typename InputType>
49 void MapFirstPass(const InputType& input,
50 const size_t dim,
51 std::vector<Datatype>& types)
52 {
53 if (types[dim] == Datatype::categorical)
54 {
55 // No need to check; it's already categorical.
56 return;
57 }
58
59 if (forceAllMappings)
60 {
61 types[dim] = Datatype::categorical;
62 }
63 else
64 {
65 // Attempt to convert the input to an output type via a stringstream.
66 std::stringstream token;
67 token << input;
68 T val;
69 token >> val;
70
71 if (token.fail() || !token.eof())
72 types[dim] = Datatype::categorical;
73 }
74 }
75
89 template<typename MapType, typename T, typename InputType>
90 T MapString(const InputType& input,
91 const size_t dimension,
92 MapType& maps,
93 std::vector<Datatype>& types)
94 {
95 // If we are in a categorical dimension we already know we need to map.
96 if (types[dimension] == Datatype::numeric && !forceAllMappings)
97 {
98 // Check if this input needs to be mapped or if it can be read
99 // directly as a number. This will be true if nothing else in this
100 // dimension has yet been mapped, but this can't be read as a number.
101 std::stringstream token;
102 token << input;
103 T val;
104 token >> val;
105
106 if (!token.fail() && token.eof())
107 return val;
108
109 // Otherwise, we must map.
110 }
111
112 // If this condition is true, either we have no mapping for the given input
113 // or we have no mappings for the given dimension at all. In either case,
114 // we create a mapping.
115 if (maps.count(dimension) == 0 ||
116 maps[dimension].first.count(input) == 0)
117 {
118 // This input does not exist yet.
119 size_t numMappings = maps[dimension].first.size();
120
121 // Change type of the feature to categorical.
122 if (numMappings == 0)
123 types[dimension] = Datatype::categorical;
124
125 typedef typename std::pair<InputType, MappedType> PairType;
126 maps[dimension].first.insert(PairType(input, numMappings));
127
128 // Do we need to create the second map?
129 if (maps[dimension].second.count(numMappings) == 0)
130 {
131 maps[dimension].second.insert(std::make_pair(numMappings,
132 std::vector<InputType>()));
133 }
134 maps[dimension].second[numMappings].push_back(input);
135
136 return T(numMappings);
137 }
138 else
139 {
140 // This input already exists in the mapping.
141 return maps[dimension].first.at(input);
142 }
143 }
144
145 private:
146 // Whether or not we should map all tokens.
147 bool forceAllMappings;
148}; // class IncrementPolicy
149
150} // namespace data
151} // namespace mlpack
152
153#endif
IncrementPolicy is used as a helper class for DatasetMapper.
IncrementPolicy(const bool forceAllMappings=false)
static const bool NeedsFirstPass
We do need a first pass over the data to set the dimension types right.
T MapString(const InputType &input, const size_t dimension, MapType &maps, std::vector< Datatype > &types)
Given the input and the dimension to which the it belongs, and the maps and types given by the Datase...
void MapFirstPass(const InputType &input, const size_t dim, std::vector< Datatype > &types)
Determine if the dimension is numeric or categorical.
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.