mlpack 3.4.2
missing_policy.hpp
Go to the documentation of this file.
1
12#ifndef MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
13#define MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
14
15#include <mlpack/prereqs.hpp>
16#include <unordered_map>
18#include <limits>
19#include <set>
20
21namespace mlpack {
22namespace data {
23
32{
33 public:
34 // typedef of MappedType
35 using MappedType = double;
36
38 {
39 // Nothing to initialize here.
40 }
41
49 explicit MissingPolicy(std::set<std::string> missingSet) :
50 missingSet(std::move(missingSet))
51 {
52 // Nothing to initialize here.
53 }
54
56 static const bool NeedsFirstPass = false;
57
62 template<typename T>
63 void MapFirstPass(const std::string& /* string */, const size_t /* dim */)
64 {
65 // Nothing to do.
66 }
67
83 template<typename MapType, typename T>
84 T MapString(const std::string& string,
85 const size_t dimension,
86 MapType& maps,
87 std::vector<Datatype>& /* types */)
88 {
89 static_assert(std::numeric_limits<T>::has_quiet_NaN == true,
90 "Cannot use MissingPolicy with types where has_quiet_NaN() is false!");
91
92 // If we can load the string then there is no need for mapping.
93 std::stringstream token;
94 token.str(string);
95 T t;
96 token >> t; // Could be sped up by only doing this if we need to.
97
98 MappedType value = std::numeric_limits<MappedType>::quiet_NaN();
99 // But we can't use that for the map, so we need some other thing that will
100 // represent quiet_NaN().
101 const MappedType mapValue = std::nexttoward(
102 std::numeric_limits<MappedType>::max(), MappedType(0));
103
104 // If extraction of the value fails, or if it is a value that is supposed to
105 // be mapped, then do mapping.
106 if (token.fail() || !token.eof() ||
107 missingSet.find(string) != std::end(missingSet))
108 {
109 // Everything is mapped to NaN. However we must still keep track of
110 // everything that we have mapped, so we add it to the maps if needed.
111 if (maps.count(dimension) == 0 ||
112 maps[dimension].first.count(string) == 0)
113 {
114 // This string does not exist yet.
115 typedef std::pair<std::string, MappedType> PairType;
116 maps[dimension].first.insert(PairType(string, value));
117
118 // Insert right mapping too.
119 if (maps[dimension].second.count(mapValue) == 0)
120 {
121 // Create new element in reverse map.
122 maps[dimension].second.insert(std::make_pair(mapValue,
123 std::vector<std::string>()));
124 }
125 maps[dimension].second[mapValue].push_back(string);
126 }
127
128 return value;
129 }
130 else
131 {
132 // We can just return the value that we read.
133 return t;
134 }
135 }
136
137 private:
138 // Note that missingSet and maps are different.
139 // missingSet specifies which value/string should be mapped and may be a
140 // superset of 'maps'.
141 std::set<std::string> missingSet;
142}; // class MissingPolicy
143
144} // namespace data
145} // namespace mlpack
146
147#endif
MissingPolicy is used as a helper class for DatasetMapper.
void MapFirstPass(const std::string &, const size_t)
There is nothing for us to do here, but this is required by the MapPolicy type.
MissingPolicy(std::set< std::string > missingSet)
Create the MissingPolicy object with the given missingSet.
static const bool NeedsFirstPass
This doesn't need a first pass over the data to set up.
T MapString(const std::string &string, const size_t dimension, MapType &maps, std::vector< Datatype > &)
Given the string and the dimension to which it belongs by the user, and the maps and types given by t...
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
Definition: prereqs.hpp:67
The core includes that mlpack expects; standard C++ includes and Armadillo.