mlpack 3.4.2
randomized_svd_method.hpp
Go to the documentation of this file.
1
14#ifndef MLPACK_METHODS_CF_DECOMPOSITION_POLICIES_RANDOMIZED_SVD_METHOD_HPP
15#define MLPACK_METHODS_CF_DECOMPOSITION_POLICIES_RANDOMIZED_SVD_METHOD_HPP
16
17#include <mlpack/prereqs.hpp>
19
20namespace mlpack {
21namespace cf {
22
42{
43 public:
52 RandomizedSVDPolicy(const size_t iteratedPower = 0,
53 const size_t maxIterations = 2) :
54 iteratedPower(iteratedPower),
55 maxIterations(maxIterations)
56 {
57 /* Nothing to do here */
58 }
59
72 template<typename MatType>
73 void Apply(const MatType& /* data */,
74 const arma::sp_mat& cleanedData,
75 const size_t rank,
76 const size_t maxIterations,
77 const double /* minResidue */,
78 const bool /* mit */)
79 {
80 arma::vec sigma;
81
82 // Do singular value decomposition using the randomized SVD algorithm.
83 svd::RandomizedSVD rsvd(iteratedPower, maxIterations);
84 rsvd.Apply(cleanedData, w, sigma, h, rank);
85
86 // Sigma matrix is multiplied to w.
87 w = w * arma::diagmat(sigma);
88
89 // Take transpose of the matrix h as required by CF class.
90 h = arma::trans(h);
91 }
92
99 double GetRating(const size_t user, const size_t item) const
100 {
101 double rating = arma::as_scalar(w.row(item) * h.col(user));
102 return rating;
103 }
104
111 void GetRatingOfUser(const size_t user, arma::vec& rating) const
112 {
113 rating = w * h.col(user);
114 }
115
128 template<typename NeighborSearchPolicy>
129 void GetNeighborhood(const arma::Col<size_t>& users,
130 const size_t numUsersForSimilarity,
131 arma::Mat<size_t>& neighborhood,
132 arma::mat& similarities) const
133 {
134 // We want to avoid calculating the full rating matrix, so we will do
135 // nearest neighbor search only on the H matrix, using the observation that
136 // if the rating matrix X = W*H, then d(X.col(i), X.col(j)) = d(W H.col(i),
137 // W H.col(j)). This can be seen as nearest neighbor search on the H
138 // matrix with the Mahalanobis distance where M^{-1} = W^T W. So, we'll
139 // decompose M^{-1} = L L^T (the Cholesky decomposition), and then multiply
140 // H by L^T. Then we can perform nearest neighbor search.
141 arma::mat l = arma::chol(w.t() * w);
142 arma::mat stretchedH = l * h; // Due to the Armadillo API, l is L^T.
143
144 // Temporarily store feature vector of queried users.
145 arma::mat query(stretchedH.n_rows, users.n_elem);
146 // Select feature vectors of queried users.
147 for (size_t i = 0; i < users.n_elem; ++i)
148 query.col(i) = stretchedH.col(users(i));
149
150 NeighborSearchPolicy neighborSearch(stretchedH);
151 neighborSearch.Search(
152 query, numUsersForSimilarity, neighborhood, similarities);
153 }
154
156 const arma::mat& W() const { return w; }
158 const arma::mat& H() const { return h; }
159
161 size_t IteratedPower() const { return iteratedPower; }
163 size_t& IteratedPower() { return iteratedPower; }
164
166 size_t MaxIterations() const { return maxIterations; }
168 size_t& MaxIterations() { return maxIterations; }
169
173 template<typename Archive>
174 void serialize(Archive& ar, const unsigned int /* version */)
175 {
176 ar & BOOST_SERIALIZATION_NVP(w);
177 ar & BOOST_SERIALIZATION_NVP(h);
178 }
179
180 private:
182 size_t iteratedPower;
184 size_t maxIterations;
186 arma::mat w;
188 arma::mat h;
189};
190
191} // namespace cf
192} // namespace mlpack
193
194#endif
Implementation of the Randomized SVD policy to act as a wrapper when accessing Randomized SVD from wi...
double GetRating(const size_t user, const size_t item) const
Return predicted rating given user ID and item ID.
void GetNeighborhood(const arma::Col< size_t > &users, const size_t numUsersForSimilarity, arma::Mat< size_t > &neighborhood, arma::mat &similarities) const
Get the neighborhood and corresponding similarities for a set of users.
size_t MaxIterations() const
Get the number of iterations.
size_t & MaxIterations()
Modify the number of iterations.
RandomizedSVDPolicy(const size_t iteratedPower=0, const size_t maxIterations=2)
Use randomized SVD method to perform collaborative filtering.
size_t IteratedPower() const
Get the size of the normalized power iterations.
const arma::mat & W() const
Get the Item Matrix.
void Apply(const MatType &, const arma::sp_mat &cleanedData, const size_t rank, const size_t maxIterations, const double, const bool)
Apply Collaborative Filtering to the provided data set using the randomized SVD.
const arma::mat & H() const
Get the User Matrix.
void GetRatingOfUser(const size_t user, arma::vec &rating) const
Get predicted ratings for a user.
void serialize(Archive &ar, const unsigned int)
Serialization.
size_t & IteratedPower()
Modify the size of the normalized power iterations.
Randomized SVD is a matrix factorization that is based on randomized matrix approximation techniques,...
void Apply(const arma::sp_mat &data, arma::mat &u, arma::vec &s, arma::mat &v, const size_t rank)
Center the data to apply Principal Component Analysis on given sparse matrix dataset using randomized...
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.