Sierra Toolkit  Version of the Day
PrintTimer.cpp
1 /*------------------------------------------------------------------------*/
2 /* Copyright 2010 - 2011 Sandia Corporation. */
3 /* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */
4 /* license for use of this work by or on behalf of the U.S. Government. */
5 /* Export of this program may require a license from the */
6 /* United States Government. */
7 /*------------------------------------------------------------------------*/
8 
9 #include <stk_util/diag/PrintTimer.hpp>
10 #include <stk_util/diag/PrintTable.hpp>
11 
12 #include <iomanip>
13 #include <ostream>
14 #include <stdexcept>
15 #include <typeinfo>
16 #include <utility>
17 #include <algorithm>
18 #include <limits>
19 
20 #include <stk_util/diag/Writer.hpp>
21 #include <stk_util/diag/WriterManip.hpp>
22 #include <stk_util/diag/WriterExt.hpp>
23 #include <stk_util/util/string_case_compare.hpp>
24 #include <stk_util/util/Marshal.hpp>
25 
26 namespace stk_classic {
27 namespace diag {
28 namespace {
29 struct ParallelTimer;
30 }}
31 
32 template <class T>
33 Marshal &operator<<(Marshal &mout, const diag::Timer::Metric<T> &t);
34 
35 Marshal &operator<<(Marshal &mout, const diag::Timer &t);
36 
37 Marshal &operator>>(Marshal &min, diag::ParallelTimer &t);
38 }
39 
40 namespace stk_classic {
41 namespace diag {
42 
43 namespace {
44 
51 struct Percent
52 {
53  Percent(double numerator, double denominator)
54  : m_numerator(numerator),
55  m_denominator(denominator)
56  {}
57 
68  std::ostream &operator()(std::ostream &os) const;
69 
70 private:
71  double m_numerator;
72  double m_denominator;
73 };
74 
75 
76 std::ostream &
77 Percent::operator()(
78  std::ostream & os) const
79 {
80  std::ostringstream strout;
81 
82  if (m_numerator == 0.0)
83  strout << "(0.00%)";
84  else if (m_denominator == 0.0)
85  strout << "( NaN)";
86  else {
87  double ratio = m_numerator/m_denominator*100.0;
88  if (ratio < 0.01)
89  strout << "(<0.01%)";
90  else if (ratio >= 100.0)
91  strout << "(" << std::setw(5) << std::setprecision(1) << std::fixed << ratio << "%)";
92  else
93  strout << "(" << std::setw(5) << std::setprecision(2) << std::fixed << ratio << "%)";
94  }
95 
96  return os << strout.str();
97 }
98 
99 
109 inline std::ostream &operator<<(std::ostream &os, const Percent &p) {
110  return p(os);
111 }
112 
113 struct ParallelTimer
114 {
115  template <typename T>
116  struct Metric
117  {
118  Metric()
119  : m_value(0),
120  m_sum(0.0),
121  m_min(std::numeric_limits<double>::max()),
122  m_max(0.0)
123  {}
124 
125  typename MetricTraits<T>::Type m_value;
126  typename MetricTraits<T>::Type m_checkpoint;
127  double m_sum;
128  double m_min;
129  double m_max;
130 
131  void accumulate(const Metric<T> &metric, bool checkpoint) {
132  double value = static_cast<double>(metric.m_value);
133  if (checkpoint)
134  value -= static_cast<double>(metric.m_checkpoint);
135 
136  m_sum += value;
137  m_min = std::min(m_min, value);
138  m_max = std::max(m_max, value);
139  }
140 
141  Writer &dump(Writer &dout) const {
142  if (dout.shouldPrint()) {
143  dout << "Metric<" << typeid(typename MetricTraits<T>::Type) << ">" << push << dendl;
144  dout << "m_value " << m_value << dendl;
145  dout << "m_checkpoint " << m_value << dendl;
146  dout << "m_sum " << m_sum << dendl;
147  dout << "m_min " << m_min << dendl;
148  dout << "m_max " << m_max << dendl;
149  dout << pop;
150  }
151  return dout;
152  }
153  };
154 
155  ParallelTimer()
156  : m_name(),
157  m_timerMask(0),
158  m_subtimerLapCount(0),
159  m_lapCount(),
160  m_cpuTime(),
161  m_wallTime(),
162  m_MPICount(),
163  m_MPIByteCount(),
164  m_heapAlloc(),
165  m_subtimerList()
166  {}
167 
168  ParallelTimer(const ParallelTimer &parallel_timer)
169  : m_name(parallel_timer.m_name),
170  m_timerMask(parallel_timer.m_timerMask),
171  m_subtimerLapCount(parallel_timer.m_subtimerLapCount),
172  m_lapCount(parallel_timer.m_lapCount),
173  m_cpuTime(parallel_timer.m_cpuTime),
174  m_wallTime(parallel_timer.m_wallTime),
175  m_MPICount(parallel_timer.m_MPICount),
176  m_MPIByteCount(parallel_timer.m_MPIByteCount),
177  m_heapAlloc(parallel_timer.m_heapAlloc),
178  m_subtimerList(parallel_timer.m_subtimerList)
179  {}
180 
181  ParallelTimer &operator=(const ParallelTimer &parallel_timer) {
182  m_name = parallel_timer.m_name;
183  m_timerMask = parallel_timer.m_timerMask;
184  m_subtimerLapCount = parallel_timer.m_subtimerLapCount;
185  m_lapCount = parallel_timer.m_lapCount;
186  m_cpuTime = parallel_timer.m_cpuTime;
187  m_wallTime = parallel_timer.m_wallTime;
188  m_MPICount = parallel_timer.m_MPICount;
189  m_heapAlloc = parallel_timer.m_heapAlloc;
190  m_subtimerList = parallel_timer.m_subtimerList;
191 
192  return *this;
193  }
194 
195  template <class T>
196  const Metric<T> &getMetric() const;
197 
198  std::string m_name;
199  TimerMask m_timerMask;
200  double m_subtimerLapCount;
201 
202  Metric<LapCount> m_lapCount;
203  Metric<CPUTime> m_cpuTime;
204  Metric<WallTime> m_wallTime;
205  Metric<MPICount> m_MPICount;
206  Metric<MPIByteCount> m_MPIByteCount;
207  Metric<HeapAlloc> m_heapAlloc;
208 
209  std::list<ParallelTimer> m_subtimerList;
210 
211  Writer &dump(Writer &dout) const;
212 };
213 
214 template<>
215 const ParallelTimer::Metric<LapCount> &
216 ParallelTimer::getMetric<LapCount>() const {
217  return m_lapCount;
218 }
219 
220 
221 template<>
222 const ParallelTimer::Metric<CPUTime> &
223 ParallelTimer::getMetric<CPUTime>() const {
224  return m_cpuTime;
225 }
226 
227 
228 template<>
229 const ParallelTimer::Metric<WallTime> &
230 ParallelTimer::getMetric<WallTime>() const {
231  return m_wallTime;
232 }
233 
234 
235 template<>
236 const ParallelTimer::Metric<MPICount> &
237 ParallelTimer::getMetric<MPICount>() const {
238  return m_MPICount;
239 }
240 
241 
242 template<>
243 const ParallelTimer::Metric<MPIByteCount> &
244 ParallelTimer::getMetric<MPIByteCount>() const {
245  return m_MPIByteCount;
246 }
247 
248 
249 template<>
250 const ParallelTimer::Metric<HeapAlloc> &
251 ParallelTimer::getMetric<HeapAlloc>() const {
252  return m_heapAlloc;
253 }
254 
255 
256 template <typename T>
257 Writer &operator<<(Writer &dout, const ParallelTimer::Metric<T> &t) {
258  return t.dump(dout);
259 }
260 
261 Writer &operator<<(Writer &dout, const ParallelTimer &parallel_timer) {
262  return parallel_timer.dump(dout);
263 }
264 
265 Writer &
266 ParallelTimer::dump(Writer &dout) const {
267  if (dout.shouldPrint()) {
268  dout << "ParallelTimer " << m_name << push << dendl;
269  dout << "m_name " << m_name << dendl;
270  dout << "m_timerMask " << hex << m_timerMask << dendl;
271  dout << "m_subtimerLapCount " << m_subtimerLapCount << dendl;
272  dout << "m_lapCount " << m_lapCount << dendl;
273  dout << "m_cpuTime " << m_cpuTime << dendl;
274  dout << "m_wallTime " << m_wallTime << dendl;
275  dout << "m_MPICount " << m_MPICount << dendl;
276  dout << "m_MPIByteCount " << m_MPIByteCount << dendl;
277  dout << "m_heapAlloc " << m_heapAlloc << dendl;
278  dout << "m_subtimerList " << m_subtimerList << dendl;
279  dout << pop;
280  }
281  return dout;
282 }
283 
284 #ifdef __INTEL_COMPILER
285 #pragma warning(push)
286 #pragma warning(disable: 444)
287 #endif
288 class finder : public std::unary_function<ParallelTimer, bool>
289 {
290 public:
291  finder(const std::string &name)
292  : m_name(name)
293  {}
294 
295  bool operator()(const ParallelTimer &parallel_timer) const {
296  return equal_case(parallel_timer.m_name, m_name);
297  }
298 
299 private:
300  std::string m_name;
301 };
302 #ifdef __INTEL_COMPILER
303 #pragma warning(pop)
304 #endif
305 
306 
307 void
308 merge_parallel_timer(
309  ParallelTimer & p0,
310  const ParallelTimer & p1,
311  bool checkpoint)
312 {
313  p0.m_timerMask = p1.m_timerMask;
314  p0.m_subtimerLapCount += p1.m_subtimerLapCount;
315  p0.m_lapCount.accumulate(p1.m_lapCount, checkpoint);
316  p0.m_cpuTime.accumulate(p1.m_cpuTime, checkpoint);
317  p0.m_wallTime.accumulate(p1.m_wallTime, checkpoint);
318  p0.m_MPICount.accumulate(p1.m_MPICount, checkpoint);
319  p0.m_MPIByteCount.accumulate(p1.m_MPIByteCount, checkpoint);
320  p0.m_heapAlloc.accumulate(p1.m_heapAlloc, checkpoint);
321 
322 
323  for (std::list<ParallelTimer>::const_iterator p1_it = p1.m_subtimerList.begin(); p1_it != p1.m_subtimerList.end(); ++p1_it) {
324  std::list<ParallelTimer>::iterator p0_it = std::find_if(p0.m_subtimerList.begin(), p0.m_subtimerList.end(), finder((*p1_it).m_name));
325  if (p0_it == p0.m_subtimerList.end()) {
326  p0.m_subtimerList.push_back((*p1_it));
327  p0_it = --p0.m_subtimerList.end();
328  merge_parallel_timer(*p0_it, *p1_it, checkpoint);
329  }
330  else
331  merge_parallel_timer(*p0_it, *p1_it, checkpoint);
332  }
333 }
334 
335 
336 void
337 collect_timers(
338  Timer & root_timer,
339  ParallelTimer & parallel_timer,
340  bool checkpoint,
341  ParallelMachine comm)
342 {
343  Marshal mout;
344  mout << root_timer;
345 
346 #ifdef STK_HAS_MPI
347  const int parallel_root = 0 ;
348  const int parallel_size = parallel_machine_size(comm);
349  const int parallel_rank = parallel_machine_rank(comm);
350 
351  // Gather the send counts on root processor
352  std::string send_string(mout.str());
353 
354  ParallelTimer root_parallel_timer;
355 
356  //We need to gather the timer data in a number of 'cycles' where we
357  //only receive from a portion of the other processors each cycle.
358  //This is because buffer allocation-failures have been observed for
359  //runs on very large numbers of processors if the 'root' processor tries
360  //to allocate a buffer large enough to hold timing data from all other
361  //procesors.
362  int num_cycles = 16;
363  if (parallel_size < 1024) {
364  //If less than 1024 processors, just do them all at once.
365  num_cycles = 1;
366  }
367 
368  std::vector<char> buffer;
369 
370  for(int ii=0; ii<num_cycles; ++ii) {
371  std::vector<int> recv_count(parallel_size, 0);
372  int * const recv_count_ptr = &recv_count[0] ;
373 
374  //send_count is the amount of data this processor needs to send.
375  int send_count = send_string.size();
376 
377  //should this processor send on the current cycle ? If not, set send_count to 0.
378  if ((parallel_rank+ii)%num_cycles!=0) {
379  send_count = 0;
380  }
381 
382  int result = MPI_Gather(&send_count, 1, MPI_INT,
383  recv_count_ptr, 1, MPI_INT,
384  parallel_root, comm);
385  if (MPI_SUCCESS != result) {
386  std::ostringstream message ;
387  message << "stk_classic::diag::collect_timers FAILED: MPI_Gather = " << result ;
388  throw std::runtime_error(message.str());
389  }
390 
391  // Receive counts are only non-zero on the root processor:
392  std::vector<int> recv_displ(parallel_size + 1, 0);
393 
394  for (int i = 0 ; i < parallel_size ; ++i) {
395  recv_displ[i + 1] = recv_displ[i] + recv_count[i] ;
396  }
397 
398  const int recv_size = recv_displ[parallel_size] ;
399 
400  buffer.assign(recv_size, 0);
401 
402  {
403  const char * const send_ptr = send_string.data();
404  char * const recv_ptr = recv_size ? & buffer[0] : 0;
405  int * const recv_displ_ptr = & recv_displ[0] ;
406 
407  result = MPI_Gatherv((void *) send_ptr, send_count, MPI_CHAR,
408  recv_ptr, recv_count_ptr, recv_displ_ptr, MPI_CHAR,
409  parallel_root, comm);
410  if (MPI_SUCCESS != result) {
411  std::ostringstream message ;
412  message << "stk_classic::diag::collect_timers FAILED: MPI_Gatherv = " << result ;
413  throw std::runtime_error(message.str());
414  }
415 
416  std::vector<ParallelTimer> parallel_timer_vector;
417  parallel_timer_vector.reserve(parallel_size);
418 
419  if (parallel_rank == parallel_root) {
420  for (int j = 0; j < parallel_size; ++j) {
421  int received_count = recv_displ[j+1] - recv_displ[j];
422  if (received_count > 0) {
423  //grow parallel_timer_vector by 1:
424  parallel_timer_vector.resize(parallel_timer_vector.size()+1);
425  Marshal min(std::string(recv_ptr + recv_displ[j], recv_ptr + recv_displ[j + 1]));
426  //put this data into the last entry of parallel_timer_vector:
427  min >> parallel_timer_vector[parallel_timer_vector.size()-1];
428  }
429  }
430 
431  if (parallel_rank==parallel_root && send_count>0) root_parallel_timer = parallel_timer_vector[0];
432 
433  for (size_t j = 0; j < parallel_timer_vector.size(); ++j)
434  merge_parallel_timer(root_parallel_timer, parallel_timer_vector[j], checkpoint);
435  }
436  }
437  }
438  parallel_timer = root_parallel_timer;
439 #endif
440 }
441 
442 // PrintTable &printTable(PrintTable &table, MPI_Comm mpi_comm, MetricsMask metrics_mask) const;
443 
444 PrintTable &
445 printSubtable(
446  PrintTable & table,
447  const Timer & root_timer,
448  const Timer & timer,
449  MetricsMask metrics_mask,
450  int depth,
451  bool timer_checkpoint)
452 {
453  if (timer.getSubtimerLapCount() != 0.0) {
454  if (timer.shouldRecord()) {
455  if (timer.getTimerMask() == 0 || timer.getMetric<LapCount>().getAccumulatedLap(timer_checkpoint) > 0) {
456  table << justify(PrintTable::Cell::LEFT) << indent(depth) << timer.getName() << end_col
457  << justify(PrintTable::Cell::RIGHT) << timer.getMetric<LapCount>().getAccumulatedLap(timer_checkpoint) << end_col;
458 
459  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
460  table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().getAccumulatedLap(timer_checkpoint))
461  << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<CPUTime>().getAccumulatedLap(timer_checkpoint)) << end_col;
462  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
463  table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().getAccumulatedLap(timer_checkpoint))
464  << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<WallTime>().getAccumulatedLap(timer_checkpoint)) << end_col;
465  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
466  table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().getAccumulatedLap(timer_checkpoint))
467  << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<MPICount>().getAccumulatedLap(timer_checkpoint)) << end_col;
468  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
469  table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().getAccumulatedLap(timer_checkpoint))
470  << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<MPIByteCount>().getAccumulatedLap(timer_checkpoint)) << end_col;
471  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
472  table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().getAccumulatedLap(timer_checkpoint))
473  << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().getAccumulatedLap(timer_checkpoint), root_timer.getMetric<HeapAlloc>().getAccumulatedLap(timer_checkpoint)) << end_col;
474  }
475  else
476  table << justify(PrintTable::Cell::LEFT) << indent(depth) << span << timer.getName() << end_col;
477 
478  table << end_row;
479  depth++;
480  }
481 
482  for (TimerList::const_iterator it = timer.begin(); it != timer.end(); ++it)
483  printSubtable(table, root_timer, *it, metrics_mask, depth, timer_checkpoint);
484  }
485 
486  return table;
487 }
488 
489 
490 PrintTable &
491 printSubtable(
492  PrintTable & table,
493  const ParallelTimer & root_timer,
494  const ParallelTimer & timer,
495  MetricsMask metrics_mask,
496  int depth,
497  bool timer_checkpoint)
498 {
499  if (timer.m_subtimerLapCount != 0.0) {
500  if (timer.m_timerMask == 0 || timer.getMetric<LapCount>().m_sum > 0) {
501  table << justify(PrintTable::Cell::LEFT) << indent(depth) << timer.m_name << end_col
502  << justify(PrintTable::Cell::RIGHT) << timer.getMetric<LapCount>().m_sum << end_col;
503 
504  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
505  table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().m_sum)
506  << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().m_sum, root_timer.getMetric<CPUTime>().m_sum) << end_col
507  << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().m_min)
508  << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().m_min, root_timer.getMetric<CPUTime>().m_sum) << end_col
509  << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<CPUTime>::format(timer.getMetric<CPUTime>().m_max)
510  << " " << std::setw(8) << Percent(timer.getMetric<CPUTime>().m_max, root_timer.getMetric<CPUTime>().m_sum) << end_col;
511  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
512  table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().m_sum)
513  << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().m_sum, root_timer.getMetric<WallTime>().m_sum) << end_col
514  << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().m_min)
515  << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().m_min, root_timer.getMetric<WallTime>().m_sum) << end_col
516  << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<WallTime>::format(timer.getMetric<WallTime>().m_max)
517  << " " << std::setw(8) << Percent(timer.getMetric<WallTime>().m_max, root_timer.getMetric<WallTime>().m_sum) << end_col;
518  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
519  table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().m_sum)
520  << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().m_sum, root_timer.getMetric<MPICount>().m_sum) << end_col
521  << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().m_min)
522  << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().m_min, root_timer.getMetric<MPICount>().m_sum) << end_col
523  << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPICount>::format(timer.getMetric<MPICount>().m_max)
524  << " " << std::setw(8) << Percent(timer.getMetric<MPICount>().m_max, root_timer.getMetric<MPICount>().m_sum) << end_col;
525  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
526  table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().m_sum)
527  << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().m_sum, root_timer.getMetric<MPIByteCount>().m_sum) << end_col
528  << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().m_min)
529  << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().m_min, root_timer.getMetric<MPIByteCount>().m_sum) << end_col
530  << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<MPIByteCount>::format(timer.getMetric<MPIByteCount>().m_max)
531  << " " << std::setw(8) << Percent(timer.getMetric<MPIByteCount>().m_max, root_timer.getMetric<MPIByteCount>().m_sum) << end_col;
532  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
533  table << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().m_sum)
534  << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().m_sum, root_timer.getMetric<HeapAlloc>().m_sum) << end_col
535  << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().m_min)
536  << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().m_min, root_timer.getMetric<HeapAlloc>().m_sum) << end_col
537  << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits<HeapAlloc>::format(timer.getMetric<HeapAlloc>().m_max)
538  << " " << std::setw(8) << Percent(timer.getMetric<HeapAlloc>().m_max, root_timer.getMetric<HeapAlloc>().m_sum) << end_col;
539  }
540  else
541  table << justify(PrintTable::Cell::LEFT) << indent(depth) << span << timer.m_name << end_col;
542 
543  table << end_row;
544  depth++;
545  }
546 
547  for (std::list<ParallelTimer>::const_iterator it = timer.m_subtimerList.begin(); it != timer.m_subtimerList.end(); ++it)
548  printSubtable(table, root_timer, *it, metrics_mask, depth, timer_checkpoint);
549 
550  return table;
551 }
552 
553 
554 PrintTable &
555 printTable(
556  PrintTable & table,
557  Timer & root_timer,
558  MetricsMask metrics_mask,
559  size_t name_width,
560  bool timer_checkpoint)
561 {
562  updateRootTimer(root_timer);
563 
564  root_timer.accumulateSubtimerLapCounts();
565 
566  if (metrics_mask & getEnabledTimerMetricsMask()) {
567  table.setAutoEndCol(false);
568 
569  table << cell_width(name_width) << justify(PrintTable::Cell::CENTER) << "Timer" << (timer_checkpoint ? " (delta time)" : "") << end_col
570  << justify(PrintTable::Cell::CENTER) << "Count" << end_col;
571 
572  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
573  table << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col;
574  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
575  table << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col;
576  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
577  table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col;
578  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
579  table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col;
580  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
581  table << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col;
582 
583  table << end_header;
584 
585  printSubtable(table, root_timer, root_timer, metrics_mask, 0, timer_checkpoint);
586 
587  if (timer_checkpoint)
588  root_timer.checkpoint();
589  }
590 
591  return table;
592 }
593 
594 
595 PrintTable &
596 printTable(
597  PrintTable & table,
598  Timer & root_timer,
599  MetricsMask metrics_mask,
600  size_t name_width,
601  bool timer_checkpoint,
602  ParallelMachine parallel_machine)
603 {
604  updateRootTimer(root_timer);
605 
606  root_timer.accumulateSubtimerLapCounts();
607 
608  ParallelTimer parallel_timer;
609 
610  stk_classic::diag::collect_timers(root_timer, parallel_timer, timer_checkpoint, parallel_machine);
611 
612  int parallel_rank = parallel_machine_rank(parallel_machine);
613  if (parallel_rank == 0) {
614  if (metrics_mask & getEnabledTimerMetricsMask()) {
615  table.setAutoEndCol(false);
616 
617  table << end_col << end_col;
618 
619  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
620  table << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col
621  << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col
622  << justify(PrintTable::Cell::CENTER) << MetricTraits<CPUTime>::table_header() << end_col;
623  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
624  table << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col
625  << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col
626  << justify(PrintTable::Cell::CENTER) << MetricTraits<WallTime>::table_header() << end_col;
627  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
628  table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col
629  << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col
630  << justify(PrintTable::Cell::CENTER) << MetricTraits<MPICount>::table_header() << end_col;
631  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
632  table << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col
633  << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col
634  << justify(PrintTable::Cell::CENTER) << MetricTraits<MPIByteCount>::table_header() << end_col;
635  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
636  table << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col
637  << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col
638  << justify(PrintTable::Cell::CENTER) << MetricTraits<HeapAlloc>::table_header() << end_col;
639 
640  table << end_header;
641  table << cell_width(name_width) << justify(PrintTable::Cell::CENTER) << "Timer" << (timer_checkpoint ? " (delta time)" : "") << end_col
642  << justify(PrintTable::Cell::CENTER) << "Count" << end_col;
643 
644  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<CPUTime>::METRIC)
645  table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
646  << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
647  << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
648  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<WallTime>::METRIC)
649  table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
650  << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
651  << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
652  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPICount>::METRIC)
653  table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
654  << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
655  << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
656  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<MPIByteCount>::METRIC)
657  table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
658  << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
659  << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
660  if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits<HeapAlloc>::METRIC)
661  table << justify(PrintTable::Cell::CENTER) << "Sum (% of System)" << end_col
662  << justify(PrintTable::Cell::CENTER) << "Min (% of System)" << end_col
663  << justify(PrintTable::Cell::CENTER) << "Max (% of System)" << end_col;
664 
665  table << end_header;
666 
667  printSubtable(table, parallel_timer, parallel_timer, metrics_mask, 0, timer_checkpoint);
668  }
669 
670  if (timer_checkpoint)
671  root_timer.checkpoint();
672  }
673 
674  return table;
675 }
676 
677 } // namespace <empty>
678 
679 
680 std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask metrics_mask, bool timer_checkpoint)
681 {
682  stk_classic::PrintTable print_table;
683 
684  printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint);
685 
686  os << print_table;
687 
688  return os;
689 }
690 
691 
692 std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask metrics_mask, bool timer_checkpoint, ParallelMachine parallel_machine)
693 {
694  stk_classic::PrintTable print_table;
695 
696  int parallel_size = parallel_machine_size(parallel_machine);
697  if (parallel_size == 1)
698  printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint);
699  else
700  printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint, parallel_machine);
701 
702  os << print_table;
703 
704  return os;
705 }
706 
707 
708 // std::ostream &printXML(std::ostream &os, MPI_Comm mpi_comm, MetricsMask metrics_mask) const;
709 std::ostream &printXML(std::ostream &os, MetricsMask metrics_mask, bool timer_checkpoint);
710 
711 std::ostream &printSubXML(std::ostream &os, MetricsMask metrics_mask, int depth, bool timer_checkpoint);
712 
713 } // namespace diag
714 
715 Marshal &operator<<(stk_classic::Marshal &mout, const diag::Timer &t);
716 
717 template <class T>
718 Marshal &operator<<(Marshal &mout, const diag::Timer::Metric<T> &t) {
719  mout << t.getAccumulatedLap(false) << t.getAccumulatedLap(true);
720 
721  return mout;
722 }
723 
724 Marshal &operator<<(Marshal &mout, const diag::Timer &t) {
725  mout << t.getName() << t.getTimerMask() << t.getSubtimerLapCount()
726  << t.getMetric<diag::LapCount>() << t.getMetric<diag::CPUTime>() << t.getMetric<diag::WallTime>()
727  << t.getMetric<diag::MPICount>() << t.getMetric<diag::MPIByteCount>() << t.getMetric<diag::HeapAlloc>();
728 
729  mout << t.getTimerList();
730 
731  return mout;
732 }
733 
734 Marshal &operator>>(Marshal &min, diag::ParallelTimer &t) {
735  min >> t.m_name >> t.m_timerMask >> t.m_subtimerLapCount
736  >> t.m_lapCount.m_value
737  >> t.m_lapCount.m_checkpoint
738  >> t.m_cpuTime.m_value
739  >> t.m_cpuTime.m_checkpoint
740  >> t.m_wallTime.m_value
741  >> t.m_wallTime.m_checkpoint
742  >> t.m_MPICount.m_value
743  >> t.m_MPICount.m_checkpoint
744  >> t.m_MPIByteCount.m_value
745  >> t.m_MPIByteCount.m_checkpoint
746  >> t.m_heapAlloc.m_value
747  >> t.m_heapAlloc.m_checkpoint;
748 
749  min >> t.m_subtimerList;
750 
751  return min;
752 }
753 
754 } // namespace stk_classic
std::ostream & dout()
Diagnostic output stream.
Definition: OutputLog.cpp:674
int parallel_rank()
function parallel_rank returns the rank of this processor in the current mpi communicator.
Definition: Env.cpp:318
double getSubtimerLapCount() const
Definition: Timer.cpp:760
Struct Marshal is a data packer for sending and receiving parallel messages. The data put-to (<<) is ...
Definition: Marshal.hpp:49
unsigned parallel_machine_rank(ParallelMachine parallel_machine)
Member function parallel_machine_rank ...
Definition: Parallel.cpp:29
_setprecision setprecision(int precision)
Function setprecision sets the numeric precision as a manipulator.
Definition: WriterManip.hpp:70
std::ostream & operator<<(std::ostream &s, const Bucket &k)
Print the part names for which this bucket is a subset.
Definition: Bucket.cpp:239
Writer & dendl(Writer &dout)
Writer function dendl calls the Writer::dendl manipulator.
Definition: Writer.hpp:520
unsigned parallel_machine_size(ParallelMachine parallel_machine)
Member function parallel_machine_size ...
Definition: Parallel.cpp:18
Writer & dump(Writer &dout, const std::vector< T > &t)
Template dump prints the object contained within a std::vector object to the diagnostic writer...
Definition: WriterExt.hpp:115
TimerMask getTimerMask() const
Definition: Timer.cpp:750
_setw setw(int width)
Function setw sets the width for the next field as a manipulator.
Definition: WriterManip.hpp:44
Sierra Toolkit.
MPI_Comm ParallelMachine
Definition: Parallel.hpp:32
bool equal_case(const char *lhs, const char *rhs)
Case-insensitive equality compare.
const std::string & getName() const
Definition: Timer.cpp:745
int parallel_size()
function parallel_size returns the number of processors in the current mpi communicator.
Definition: Env.cpp:314
const Metric< T > & getMetric() const
Definition: Timer.cpp:771