mlpack 3.4.2
mock_categorical_data.hpp
Go to the documentation of this file.
1
11#ifndef MLPACK_TESTS_MOCK_CATEGORICAL_DATA_HPP
12#define MLPACK_TESTS_MOCK_CATEGORICAL_DATA_HPP
13
14#include <mlpack/prereqs.hpp>
16
20inline void MockCategoricalData(arma::mat& d,
21 arma::Row<size_t>& l,
22 mlpack::data::DatasetInfo& datasetInfo)
23{
24 // We'll build a spiral dataset plus two noisy categorical features. We need
25 // to build the distributions for the categorical features (they'll be
26 // discrete distributions).
28 // The distribution will be automatically normalized.
29 for (size_t i = 0; i < 5; ++i)
30 {
31 std::vector<arma::vec> probs;
32 probs.push_back(arma::vec(4, arma::fill::randu));
34 }
35
37 for (size_t i = 0; i < 5; ++i)
38 {
39 std::vector<arma::vec> probs;
40 probs.push_back(arma::vec(2, arma::fill::randu));
42 }
43
44 arma::mat spiralDataset(4, 4000);
45 arma::Row<size_t> labels(4000);
46 for (size_t i = 0; i < 4000; ++i)
47 {
48 // One circle every 2000 samples. Plus some noise.
49 const double magnitude = 2.0 + (double(i) / 200.0) +
51 const double angle = (i % 200) * (2 * M_PI) + mlpack::math::Random();
52
53 const double x = magnitude * cos(angle);
54 const double y = magnitude * sin(angle);
55
56 spiralDataset(0, i) = x;
57 spiralDataset(1, i) = y;
58
59 // Set categorical features c1 and c2.
60 if (i < 800)
61 {
62 spiralDataset(2, i) = c1[1].Random()[0];
63 spiralDataset(3, i) = c2[1].Random()[0];
64 labels[i] = 1;
65 }
66 else if (i < 1600)
67 {
68 spiralDataset(2, i) = c1[3].Random()[0];
69 spiralDataset(3, i) = c2[3].Random()[0];
70 labels[i] = 3;
71 }
72 else if (i < 2400)
73 {
74 spiralDataset(2, i) = c1[2].Random()[0];
75 spiralDataset(3, i) = c2[2].Random()[0];
76 labels[i] = 2;
77 }
78 else if (i < 3200)
79 {
80 spiralDataset(2, i) = c1[0].Random()[0];
81 spiralDataset(3, i) = c2[0].Random()[0];
82 labels[i] = 0;
83 }
84 else
85 {
86 spiralDataset(2, i) = c1[4].Random()[0];
87 spiralDataset(3, i) = c2[4].Random()[0];
88 labels[i] = 4;
89 }
90 }
91
92 // Now create the dataset info.
93 datasetInfo = mlpack::data::DatasetInfo(4);
96 // Set mappings.
97 datasetInfo.MapString<double>("0", 2);
98 datasetInfo.MapString<double>("1", 2);
99 datasetInfo.MapString<double>("2", 2);
100 datasetInfo.MapString<double>("3", 2);
101 datasetInfo.MapString<double>("0", 3);
102 datasetInfo.MapString<double>("1", 3);
103
104 // Now shuffle the dataset.
105 arma::uvec indices = arma::shuffle(arma::linspace<arma::uvec>(0, 3999,
106 4000));
107 d = arma::mat(4, 4000);
108 l = arma::Row<size_t>(4000);
109 for (size_t i = 0; i < 4000; ++i)
110 {
111 d.col(i) = spiralDataset.col(indices[i]);
112 l[i] = labels[indices[i]];
113 }
114}
115
116#endif
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...
T MapString(const InputType &input, const size_t dimension)
Given the input and the dimension to which it belongs, return its numeric mapping.
Datatype Type(const size_t dimension) const
Return the type of a given dimension (numeric or categorical).
A discrete distribution where the only observations are discrete observations.
arma::vec Random() const
Return a randomly generated observation (one-dimensional vector; one observation) according to the pr...
void MockCategoricalData(arma::mat &d, arma::Row< size_t > &l, mlpack::data::DatasetInfo &datasetInfo)
Create a mock categorical dataset for testing.
DatasetMapper< data::IncrementPolicy > DatasetInfo
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:83
The core includes that mlpack expects; standard C++ includes and Armadillo.
#define M_PI
Definition: prereqs.hpp:39