IT++ Logo
mog_diag_em.cpp
Go to the documentation of this file.
1 
30 #include <itpp/stat/mog_diag_em.h>
31 #include <itpp/base/math/log_exp.h>
32 #include <itpp/base/timing.h>
33 
34 #include <iostream>
35 #include <iomanip>
36 
37 namespace itpp
38 {
39 
42 {
43 
44  double Ddiv2_log_2pi = D / 2.0 * std::log(m_2pi);
45 
46  for (int k = 0;k < K;k++) c_log_weights[k] = std::log(c_weights[k]);
47 
48  for (int k = 0;k < K;k++) {
49  double acc = 0.0;
50  double * c_diag_cov = c_diag_covs[k];
51  double * c_diag_cov_inv_etc = c_diag_covs_inv_etc[k];
52 
53  for (int d = 0;d < D;d++) {
54  double tmp = c_diag_cov[d];
55  c_diag_cov_inv_etc[d] = 1.0 / (2.0 * tmp);
56  acc += std::log(tmp);
57  }
58 
59  c_log_det_etc[k] = -Ddiv2_log_2pi - 0.5 * acc;
60  }
61 
62 }
63 
64 
67 {
68 
69  double acc = 0.0;
70  for (int k = 0;k < K;k++) {
72  if (c_weights[k] > 1.0) c_weights[k] = 1.0;
73  acc += c_weights[k];
74  }
75  for (int k = 0;k < K;k++) c_weights[k] /= acc;
76 
77  for (int k = 0;k < K;k++)
78  for (int d = 0;d < D;d++)
79  if (c_diag_covs[k][d] < var_floor) c_diag_covs[k][d] = var_floor;
80 
81 }
82 
85 {
86 
87  double acc_loglhood = 0.0;
88 
89  for (int k = 0;k < K;k++) {
90  c_acc_loglhood_K[k] = 0.0;
91 
92  double * c_acc_mean = c_acc_means[k];
93  double * c_acc_cov = c_acc_covs[k];
94 
95  for (int d = 0;d < D;d++) { c_acc_mean[d] = 0.0; c_acc_cov[d] = 0.0; }
96  }
97 
98  for (int n = 0;n < N;n++) {
99  double * c_x = c_X[n];
100 
101  bool danger = paranoid;
102  for (int k = 0;k < K;k++) {
104  c_tmpvecK[k] = tmp;
105  if (tmp >= log_max_K) danger = true;
106  }
107 
108  if (danger) {
109 
110  double log_sum = c_tmpvecK[0];
111  for (int k = 1;k < K;k++) log_sum = log_add(log_sum, c_tmpvecK[k]);
112  acc_loglhood += log_sum;
113 
114  for (int k = 0;k < K;k++) {
115 
116  double * c_acc_mean = c_acc_means[k];
117  double * c_acc_cov = c_acc_covs[k];
118 
119  double tmp_k = trunc_exp(c_tmpvecK[k] - log_sum);
120  acc_loglhood_K[k] += tmp_k;
121 
122  for (int d = 0;d < D;d++) {
123  double tmp_x = c_x[d];
124  c_acc_mean[d] += tmp_k * tmp_x;
125  c_acc_cov[d] += tmp_k * tmp_x * tmp_x;
126  }
127  }
128  }
129  else {
130 
131  double sum = 0.0;
132  for (int k = 0;k < K;k++) { double tmp = std::exp(c_tmpvecK[k]); c_tmpvecK[k] = tmp; sum += tmp; }
133  acc_loglhood += std::log(sum);
134 
135  for (int k = 0;k < K;k++) {
136 
137  double * c_acc_mean = c_acc_means[k];
138  double * c_acc_cov = c_acc_covs[k];
139 
140  double tmp_k = c_tmpvecK[k] / sum;
141  c_acc_loglhood_K[k] += tmp_k;
142 
143  for (int d = 0;d < D;d++) {
144  double tmp_x = c_x[d];
145  c_acc_mean[d] += tmp_k * tmp_x;
146  c_acc_cov[d] += tmp_k * tmp_x * tmp_x;
147  }
148  }
149  }
150  }
151 
152  for (int k = 0;k < K;k++) {
153 
154  double * c_mean = c_means[k];
155  double * c_diag_cov = c_diag_covs[k];
156 
157  double * c_acc_mean = c_acc_means[k];
158  double * c_acc_cov = c_acc_covs[k];
159 
160  double tmp_k = c_acc_loglhood_K[k];
161 
162  c_weights[k] = tmp_k / N;
163 
164  for (int d = 0;d < D;d++) {
165  double tmp_mean = c_acc_mean[d] / tmp_k;
166  c_mean[d] = tmp_mean;
167  c_diag_cov[d] = c_acc_cov[d] / tmp_k - tmp_mean * tmp_mean;
168  }
169  }
170 
171  return(acc_loglhood / N);
172 
173 }
174 
175 
177 {
178  using std::cout;
179  using std::endl;
180  using std::setw;
181  using std::showpos;
182  using std::noshowpos;
183  using std::scientific;
184  using std::fixed;
185  using std::flush;
186  using std::setprecision;
187 
188  double avg_log_lhood_old = -1.0 * std::numeric_limits<double>::max();
189 
190  Real_Timer tt;
191 
192  if (verbose) {
193  cout << "MOG_diag_EM_sup::ml_iterate()" << endl;
194  cout << setw(14) << "iteration";
195  cout << setw(14) << "avg_loglhood";
196  cout << setw(14) << "delta";
197  cout << setw(10) << "toc";
198  cout << endl;
199  }
200 
201  for (int i = 0; i < max_iter; i++) {
202  sanitise_params();
204 
205  if (verbose) tt.tic();
206  double avg_log_lhood_new = ml_update_params();
207 
208  if (verbose) {
209  double delta = avg_log_lhood_new - avg_log_lhood_old;
210 
211  cout << noshowpos << fixed;
212  cout << setw(14) << i;
213  cout << showpos << scientific << setprecision(3);
214  cout << setw(14) << avg_log_lhood_new;
215  cout << setw(14) << delta;
216  cout << noshowpos << fixed;
217  cout << setw(10) << tt.toc();
218  cout << endl << flush;
219  }
220 
221  if (avg_log_lhood_new <= avg_log_lhood_old) break;
222 
223  avg_log_lhood_old = avg_log_lhood_new;
224  }
225 }
226 
227 
228 void MOG_diag_EM_sup::ml(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double var_floor_in, double weight_floor_in, bool verbose_in)
229 {
230 
231  it_assert(model_in.is_valid(), "MOG_diag_EM_sup::ml(): initial model not valid");
232  it_assert(check_array_uniformity(X_in), "MOG_diag_EM_sup::ml(): 'X' is empty or contains vectors of varying dimensionality");
233  it_assert((max_iter_in > 0), "MOG_diag_EM_sup::ml(): 'max_iter' needs to be greater than zero");
234 
235  verbose = verbose_in;
236 
237  N = X_in.size();
238 
239  Array<vec> means_in = model_in.get_means();
240  Array<vec> diag_covs_in = model_in.get_diag_covs();
241  vec weights_in = model_in.get_weights();
242 
243  init(means_in, diag_covs_in, weights_in);
244 
245  means_in.set_size(0);
246  diag_covs_in.set_size(0);
247  weights_in.set_size(0);
248 
249  if (K > N) {
250  it_warning("MOG_diag_EM_sup::ml(): WARNING: K > N");
251  }
252  else {
253  if (K > N / 10) {
254  it_warning("MOG_diag_EM_sup::ml(): WARNING: K > N/10");
255  }
256  }
257 
258  var_floor = var_floor_in;
259  weight_floor = weight_floor_in;
260 
261  const double tiny = std::numeric_limits<double>::min();
262  if (var_floor < tiny) var_floor = tiny;
263  if (weight_floor < tiny) weight_floor = tiny;
264  if (weight_floor > 1.0 / K) weight_floor = 1.0 / K;
265 
266  max_iter = max_iter_in;
267 
268  tmpvecK.set_size(K);
269  tmpvecD.set_size(D);
270  acc_loglhood_K.set_size(K);
271 
272  acc_means.set_size(K);
273  for (int k = 0;k < K;k++) acc_means(k).set_size(D);
274  acc_covs.set_size(K);
275  for (int k = 0;k < K;k++) acc_covs(k).set_size(D);
276 
277  c_X = enable_c_access(X_in);
278  c_tmpvecK = enable_c_access(tmpvecK);
279  c_tmpvecD = enable_c_access(tmpvecD);
280  c_acc_loglhood_K = enable_c_access(acc_loglhood_K);
281  c_acc_means = enable_c_access(acc_means);
282  c_acc_covs = enable_c_access(acc_covs);
283 
284  ml_iterate();
285 
286  model_in.init(means, diag_covs, weights);
287 
289  disable_c_access(c_tmpvecK);
290  disable_c_access(c_tmpvecD);
291  disable_c_access(c_acc_loglhood_K);
292  disable_c_access(c_acc_means);
293  disable_c_access(c_acc_covs);
294 
295 
296  tmpvecK.set_size(0);
297  tmpvecD.set_size(0);
298  acc_loglhood_K.set_size(0);
299  acc_means.set_size(0);
300  acc_covs.set_size(0);
301 
302  cleanup();
303 
304 }
305 
307  double, double, bool)
308 {
309  it_error("MOG_diag_EM_sup::map(): not implemented yet");
310 }
311 
312 
313 //
314 // convenience functions
315 
316 void MOG_diag_ML(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double var_floor_in, double weight_floor_in, bool verbose_in)
317 {
318  MOG_diag_EM_sup EM;
319  EM.ml(model_in, X_in, max_iter_in, var_floor_in, weight_floor_in, verbose_in);
320 }
321 
322 void MOG_diag_MAP(MOG_diag &, MOG_diag &, Array<vec> &, int, double, double,
323  double, bool)
324 {
325  it_error("MOG_diag_MAP(): not implemented yet");
326 }
327 
328 }
329 
const double m_2pi
Constant 2*Pi.
Definition: misc.h:106
double log_add(double log_a, double log_b)
Safe substitute for log(exp(log_a) + exp(log_b))
Definition: log_exp.cpp:40
double ml_update_params()
ADD DOCUMENTATION HERE.
Definition: mog_diag_em.cpp:84
int size() const
Returns the number of data elements in the array object.
Definition: array.h:155
int D
dimensionality
Definition: mog_generic.h:295
Array< vec > get_means() const
Obtain a copy of the array of mean vectors.
Definition: mog_generic.h:169
Array< vec > get_diag_covs() const
Obtain a copy of the array of diagonal covariance vectors.
Definition: mog_generic.h:172
Expectation Maximisation (EM) based optimisers for MOG - header file.
Array< vec > means
means
Definition: mog_generic.h:298
double * c_log_weights
pointer to the log version of the weight vector
Definition: mog_diag.h:209
double * c_log_det_etc
pointer to the log_det_etc vector
Definition: mog_diag.h:212
double toc(void)
Returns the elapsed time since last tic()
Definition: timing.cpp:125
double ** c_diag_covs
pointers to the covariance vectors
Definition: mog_diag.h:200
void MOG_diag_MAP(MOG_diag &, MOG_diag &, Array< vec > &, int, double, double, double, bool)
T sum(const Vec< T > &v)
Sum of all elements in the vector.
Definition: matfunc.h:59
bool is_valid() const
Returns true if the model&#39;s parameters are valid.
Definition: mog_generic.h:154
#define it_assert(t, s)
Abort if t is not true.
Definition: itassert.h:94
Logarithmic and exponenential functions - header file.
void set_size(int n, bool copy=false)
Resizing an Array<T>.
Definition: array.h:257
double ** c_means
pointers to the mean vectors
Definition: mog_diag.h:197
vec log(const vec &x)
The natural logarithm of the elements.
Definition: log_exp.h:241
T min(const Vec< T > &in)
Minimum value of vector.
Definition: min_max.h:125
Diagonal Mixture of Gaussians (MOG) class.
Definition: mog_diag.h:55
vec exp(const vec &x)
Exp of the elements of a vector x.
Definition: log_exp.h:155
T max(const Vec< T > &v)
Maximum value of vector.
Definition: min_max.h:45
double ** disable_c_access(double **A_in)
Disable C style access to an Array of vectors (vec)
Definition: mog_diag.cpp:300
double log_lhood_single_gaus_internal(const double *c_x_in, const int k) const
ADD DOCUMENTATION HERE.
Definition: mog_diag.cpp:37
void MOG_diag_ML(MOG_diag &model_in, Array< vec > &X_in, int max_iter_in, double var_floor_in, double weight_floor_in, bool verbose_in)
double var_floor
ADD DOCUMENTATION HERE.
Definition: mog_diag_em.h:74
it_file & flush(it_file &f)
Flush operatorFlushes the data. Usage:
Definition: itfile.h:409
void cleanup()
Release memory used by the model. The model will be empty.
Definition: mog_diag.h:111
itpp namespace
Definition: itmex.h:36
void map(MOG_diag &model_in, MOG_diag &prior_model, Array< vec > &X_in, int max_iter_in=10, double alpha_in=0.5, double var_floor_in=0.0, double weight_floor_in=0.0, bool verbose_in=false)
ADD DOCUMENTATION HERE.
A real time timer classMeasures real time.
Definition: timing.h:138
double ** c_diag_covs_inv_etc
pointers to the inverted covariance vectors
Definition: mog_diag.h:203
double ** enable_c_access(Array< vec > &A_in)
Enable C style access to an Array of vectors (vec)
Definition: mog_diag.cpp:284
void sanitise_params()
ADD DOCUMENTATION HERE.
Definition: mog_diag_em.cpp:66
bool verbose
Whether we print the progress.
Definition: mog_diag_em.h:62
void update_internals()
ADD DOCUMENTATION HERE.
Definition: mog_diag_em.cpp:41
void init()
Initialise the model to be empty.
Definition: mog_generic.cpp:43
double weight_floor
ADD DOCUMENTATION HERE.
Definition: mog_diag_em.h:76
#define it_warning(s)
Display a warning message.
Definition: itassert.h:173
int max_iter
Maximum number of iterations.
Definition: mog_diag_em.h:68
double trunc_exp(double x)
Truncated exponential function.
Definition: log_exp.h:137
void ml_iterate()
ADD DOCUMENTATION HERE.
bool paranoid
indicates whether we are paranoid about numerical stability
Definition: mog_generic.h:289
int K
number of gaussians
Definition: mog_generic.h:292
vec get_weights() const
Obtain a copy of the weight vector.
Definition: mog_generic.h:166
#define it_error(s)
Abort unconditionally.
Definition: itassert.h:126
support class for MOG_diag_ML() and MOG_diag_MAP()
Definition: mog_diag_em.h:43
int N
number of training vectors
Definition: mog_diag_em.h:65
Array< vec > diag_covs
diagonal covariance matrices, stored as vectors
Definition: mog_generic.h:301
double log_max_K
Pre-calcualted std::log(std::numeric_limits<double>::max() / K), where K is the number of Gaussians...
Definition: mog_generic.h:310
void ml(MOG_diag &model_in, Array< vec > &X_in, int max_iter_in=10, double var_floor_in=0.0, double weight_floor_in=0.0, bool verbose_in=false)
ADD DOCUMENTATION HERE.
double * c_weights
pointer to the weight vector
Definition: mog_diag.h:206
vec weights
weights
Definition: mog_generic.h:307
bool check_array_uniformity(const Array< vec > &A) const
Check if all vectors in Array X_in have the same dimensionality.
double ** c_X
&#39;C&#39; pointers to training vectors
Definition: mog_diag_em.h:71
void tic(void)
Resets the timer and starts it.
Definition: timing.cpp:119
Definitions of Timing classes.
SourceForge Logo

Generated on Sun Apr 10 2022 12:00:00 for IT++ by Doxygen 1.8.14