mlpack 3.4.2
z_score_normalization.hpp
Go to the documentation of this file.
1
12#ifndef MLPACK_METHODS_CF_NORMALIZATION_Z_SCORE_NORMALIZATION_HPP
13#define MLPACK_METHODS_CF_NORMALIZATION_Z_SCORE_NORMALIZATION_HPP
14
15#include <mlpack/prereqs.hpp>
16
17namespace mlpack {
18namespace cf {
19
39{
40 public:
41 // Empty constructor.
42 ZScoreNormalization() : mean(0), stddev(1) { }
43
49 void Normalize(arma::mat& data)
50 {
51 mean = arma::mean(data.row(2));
52 stddev = arma::stddev(data.row(2));
53
54 if (std::fabs(stddev) < 1e-14)
55 {
56 Log::Fatal << "Standard deviation of all existing ratings is 0! "
57 << "This may indicate that all existing ratings are the same."
58 << std::endl;
59 }
60
61 data.row(2) = (data.row(2) - mean) / stddev;
62 // The algorithm omits rating of zero. If normalized rating equals zero,
63 // it is set to the smallest positive float value.
64 data.row(2).for_each([](double& x)
65 {
66 if (x == 0)
67 x = std::numeric_limits<float>::min();
68 });
69 }
70
76 void Normalize(arma::sp_mat& cleanedData)
77 {
78 // Caculate mean and stdev of all non zero ratings.
79 arma::vec ratings = arma::nonzeros(cleanedData);
80 mean = arma::mean(ratings);
81 stddev = arma::stddev(ratings);
82
83 if (std::fabs(stddev) < 1e-14)
84 {
85 Log::Fatal << "Standard deviation of all existing ratings is 0! "
86 << "This may indicate that all existing ratings are the same."
87 << std::endl;
88 }
89
90 // Subtract mean from existing rating and divide it by stddev.
91 // TODO: consider using spmat::transform() instead of spmat iterators
92 // TODO: http://arma.sourceforge.net/docs.html#transform
93 arma::sp_mat::iterator it = cleanedData.begin();
94 arma::sp_mat::iterator it_end = cleanedData.end();
95 for (; it != it_end; ++it)
96 {
97 double tmp = (*it - mean) / stddev;
98
99 // The algorithm omits rating of zero. If normalized rating equals zero,
100 // it is set to the smallest positive float value.
101 if (tmp == 0)
102 tmp = std::numeric_limits<float>::min();
103
104 *it = tmp;
105 }
106 }
107
115 double Denormalize(const size_t /* user */,
116 const size_t /* item */,
117 const double rating) const
118 {
119 return rating * stddev + mean;
120 }
121
128 void Denormalize(const arma::Mat<size_t>& /* combinations */,
129 arma::vec& predictions) const
130 {
131 predictions = predictions * stddev + mean;
132 }
133
137 double Mean() const
138 {
139 return mean;
140 }
141
145 double Stddev() const
146 {
147 return stddev;
148 }
149
153 template<typename Archive>
154 void serialize(Archive& ar, const unsigned int /* version */)
155 {
156 ar & BOOST_SERIALIZATION_NVP(mean);
157 ar & BOOST_SERIALIZATION_NVP(stddev);
158 }
159
160 private:
162 double mean;
164 double stddev;
165};
166
167} // namespace cf
168} // namespace mlpack
169
170#endif
static MLPACK_EXPORT util::PrefixedOutStream Fatal
Prints fatal messages prefixed with [FATAL], then terminates the program.
Definition: log.hpp:90
This normalization class performs z-score normalization on raw ratings.
double Denormalize(const size_t, const size_t, const double rating) const
Denormalize computed rating by adding mean and multiplying stddev.
double Stddev() const
Return stddev.
void Normalize(arma::mat &data)
Normalize the data to zero mean and one standard deviation.
void Denormalize(const arma::Mat< size_t > &, arma::vec &predictions) const
Denormalize computed rating by adding mean and multiplying stddev.
void Normalize(arma::sp_mat &cleanedData)
Normalize the data to zero mean and one standard deviation.
void serialize(Archive &ar, const unsigned int)
Serialization.
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.