mlpack 3.4.2
user_mean_normalization.hpp
Go to the documentation of this file.
1
13#ifndef MLPACK_METHODS_CF_NORMALIZATION_USER_MEAN_NORMALIZATION_HPP
14#define MLPACK_METHODS_CF_NORMALIZATION_USER_MEAN_NORMALIZATION_HPP
15
16#include <mlpack/prereqs.hpp>
17
18namespace mlpack {
19namespace cf {
20
40{
41 public:
42 // Empty constructor.
44
50 void Normalize(arma::mat& data)
51 {
52 const size_t userNum = arma::max(data.row(0)) + 1;
53 userMean = arma::vec(userNum, arma::fill::zeros);
54 // Number of ratings for each user.
55 arma::Row<size_t> ratingNum(userNum, arma::fill::zeros);
56
57 // Sum ratings for each user.
58 data.each_col([&](arma::vec& datapoint)
59 {
60 const size_t user = (size_t) datapoint(0);
61 const double rating = datapoint(2);
62 userMean(user) += rating;
63 ratingNum(user) += 1;
64 });
65
66 // Calculate user mean and subtract user mean from ratings.
67 // Set user mean to 0 if the user has no rating.
68 for (size_t i = 0; i < userNum; ++i)
69 {
70 if (ratingNum(i) != 0)
71 userMean(i) /= ratingNum(i);
72 }
73
74 data.each_col([&](arma::vec& datapoint)
75 {
76 const size_t user = (size_t) datapoint(0);
77 datapoint(2) -= userMean(user);
78 // The algorithm omits rating of zero. If normalized rating equals zero,
79 // it is set to the smallest positive double value.
80 if (datapoint(2) == 0)
81 datapoint(2) = std::numeric_limits<double>::min();
82 });
83 }
84
90 void Normalize(arma::sp_mat& cleanedData)
91 {
92 // Calculate userMean.
93 userMean = arma::vec(cleanedData.n_cols, arma::fill::zeros);
94 arma::Col<size_t> ratingNum(cleanedData.n_cols, arma::fill::zeros);
95 arma::sp_mat::iterator it = cleanedData.begin();
96 arma::sp_mat::iterator it_end = cleanedData.end();
97 for (; it != it_end; ++it)
98 {
99 userMean(it.col()) += *it;
100 ratingNum(it.col()) += 1;
101 }
102 for (size_t i = 0; i < userMean.n_elem; ++i)
103 {
104 if (ratingNum(i) != 0)
105 userMean(i) /= ratingNum(i);
106 }
107
108 // Normalize the data.
109 it = cleanedData.begin();
110 for (; it != cleanedData.end(); ++it)
111 {
112 double tmp = *it - userMean(it.col());
113
114 // The algorithm omits rating of zero. If normalized rating equals zero,
115 // it is set to the smallest positive float value.
116 if (tmp == 0)
117 tmp = std::numeric_limits<float>::min();
118
119 *it = tmp;
120 }
121 }
122
130 double Denormalize(const size_t user,
131 const size_t /* item */,
132 const double rating) const
133 {
134 return rating + userMean(user);
135 }
136
143 void Denormalize(const arma::Mat<size_t>& combinations,
144 arma::vec& predictions) const
145 {
146 for (size_t i = 0; i < predictions.n_elem; ++i)
147 {
148 const size_t user = combinations(0, i);
149 predictions(i) += userMean(user);
150 }
151 }
152
156 const arma::vec& Mean() const { return userMean; }
157
161 template<typename Archive>
162 void serialize(Archive& ar, const unsigned int /* version */)
163 {
164 ar & BOOST_SERIALIZATION_NVP(userMean);
165 }
166
167 private:
169 arma::vec userMean;
170};
171
172} // namespace cf
173} // namespace mlpack
174
175#endif
This normalization class performs user mean normalization on raw ratings.
const arma::vec & Mean() const
Return user mean.
void Normalize(arma::mat &data)
Normalize the data by subtracting user mean from each of existing ratings.
void Denormalize(const arma::Mat< size_t > &combinations, arma::vec &predictions) const
Denormalize computed rating by adding user mean.
void Normalize(arma::sp_mat &cleanedData)
Normalize the data by subtracting user mean from each of existing rating.
double Denormalize(const size_t user, const size_t, const double rating) const
Denormalize computed rating by adding user mean.
void serialize(Archive &ar, const unsigned int)
Serialization.
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.