mlpack 3.4.2
item_mean_normalization.hpp
Go to the documentation of this file.
1
13#ifndef MLPACK_METHODS_CF_NORMALIZATION_ITEM_MEAN_NORMALIZATION_HPP
14#define MLPACK_METHODS_CF_NORMALIZATION_ITEM_MEAN_NORMALIZATION_HPP
15
16#include <mlpack/prereqs.hpp>
17
18namespace mlpack {
19namespace cf {
20
40{
41 public:
42 // Empty constructor.
44
50 void Normalize(arma::mat& data)
51 {
52 const size_t itemNum = arma::max(data.row(1)) + 1;
53 itemMean = arma::vec(itemNum, arma::fill::zeros);
54 // Number of ratings for each item.
55 arma::Row<size_t> ratingNum(itemNum, arma::fill::zeros);
56
57 // Sum ratings for each item.
58 data.each_col([&](arma::vec& datapoint)
59 {
60 const size_t item = (size_t) datapoint(1);
61 const double rating = datapoint(2);
62 itemMean(item) += rating;
63 ratingNum(item) += 1;
64 });
65
66 // Calculate item mean and subtract item mean from ratings.
67 // Set item mean to 0 if the item has no rating.
68 for (size_t i = 0; i < itemNum; ++i)
69 {
70 if (ratingNum(i) != 0)
71 itemMean(i) /= ratingNum(i);
72 }
73
74 data.each_col([&](arma::vec& datapoint)
75 {
76 const size_t item = (size_t) datapoint(1);
77 datapoint(2) -= itemMean(item);
78 // The algorithm omits rating of zero. If normalized rating equals zero,
79 // it is set to the smallest positive float value.
80 if (datapoint(2) == 0)
81 datapoint(2) = std::numeric_limits<float>::min();
82 });
83 }
84
90 void Normalize(arma::sp_mat& cleanedData)
91 {
92 // Calculate itemMean.
93 itemMean = arma::vec(cleanedData.n_rows, arma::fill::zeros);
94 arma::Col<size_t> ratingNum(cleanedData.n_rows, arma::fill::zeros);
95 arma::sp_mat::iterator it = cleanedData.begin();
96 arma::sp_mat::iterator it_end = cleanedData.end();
97 for (; it != it_end; ++it)
98 {
99 itemMean(it.row()) += *it;
100 ratingNum(it.row()) += 1;
101 }
102 for (size_t i = 0; i < itemMean.n_elem; ++i)
103 {
104 if (ratingNum(i) != 0)
105 itemMean(i) /= ratingNum(i);
106 }
107
108 // Normalize the data.
109 it = cleanedData.begin();
110 for (; it != cleanedData.end(); ++it)
111 {
112 double tmp = *it - itemMean(it.row());
113
114 // The algorithm omits rating of zero. If normalized rating equals zero,
115 // it is set to the smallest positive double value.
116 if (tmp == 0)
117 tmp = std::numeric_limits<float>::min();
118
119 *it = tmp;
120 }
121 }
122
130 double Denormalize(const size_t /* user */,
131 const size_t item,
132 const double rating) const
133 {
134 return rating + itemMean(item);
135 }
136
143 void Denormalize(const arma::Mat<size_t>& combinations,
144 arma::vec& predictions) const
145 {
146 for (size_t i = 0; i < predictions.n_elem; ++i)
147 {
148 const size_t item = combinations(1, i);
149 predictions(i) += itemMean(item);
150 }
151 }
152
156 const arma::vec& Mean() const { return itemMean; }
157
161 template<typename Archive>
162 void serialize(Archive& ar, const unsigned int /* version */)
163 {
164 ar & BOOST_SERIALIZATION_NVP(itemMean);
165 }
166
167 private:
169 arma::vec itemMean;
170};
171
172} // namespace cf
173} // namespace mlpack
174
175#endif
This normalization class performs item mean normalization on raw ratings.
double Denormalize(const size_t, const size_t item, const double rating) const
Denormalize computed rating by adding item mean.
const arma::vec & Mean() const
Return item mean.
void Normalize(arma::mat &data)
Normalize the data by subtracting item mean from each of existing ratings.
void Denormalize(const arma::Mat< size_t > &combinations, arma::vec &predictions) const
Denormalize computed rating by adding item mean.
void Normalize(arma::sp_mat &cleanedData)
Normalize the data by subtracting item mean from each of existing ratings.
void serialize(Archive &ar, const unsigned int)
Serialization.
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.