Kokkos Core Kernels Package  Version of the Day
KokkosExp_MDRangePolicy.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 2.0
6 // Copyright (2014) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
44 #ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
45 #define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
46 
47 #include <Kokkos_ExecPolicy.hpp>
48 #include <Kokkos_Parallel.hpp>
49 #include <initializer_list>
50 
51 #if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
52 #define KOKKOS_MDRANGE_IVDEP
53 #endif
54 
55 namespace Kokkos { namespace Experimental {
56 
57 enum class Iterate
58 {
59  Default, // Default for the device
60  Left, // Left indices stride fastest
61  Right, // Right indices stride fastest
62  Flat, // Do not tile, only valid for inner direction
63 };
64 
65 template <typename ExecSpace>
66 struct default_outer_direction
67 {
68  using type = Iterate;
69  static constexpr Iterate value = Iterate::Right;
70 };
71 
72 template <typename ExecSpace>
73 struct default_inner_direction
74 {
75  using type = Iterate;
76  static constexpr Iterate value = Iterate::Right;
77 };
78 
79 
80 // Iteration Pattern
81 template < unsigned N
82  , Iterate OuterDir = Iterate::Default
83  , Iterate InnerDir = Iterate::Default
84  >
85 struct Rank
86 {
87  static_assert( N != 0u, "Kokkos Error: rank 0 undefined");
88  static_assert( N != 1u, "Kokkos Error: rank 1 is not a multi-dimensional range");
89  static_assert( N < 4u, "Kokkos Error: Unsupported rank...");
90 
91  using iteration_pattern = Rank<N, OuterDir, InnerDir>;
92 
93  static constexpr int rank = N;
94  static constexpr Iterate outer_direction = OuterDir;
95  static constexpr Iterate inner_direction = InnerDir;
96 };
97 
98 
99 
100 // multi-dimensional iteration pattern
101 template <typename... Properties>
102 struct MDRangePolicy
103 {
104  using range_policy = RangePolicy<Properties...>;
105 
106  static_assert( !std::is_same<range_policy,void>::value
107  , "Kokkos Error: MD iteration pattern not defined" );
108 
109  using iteration_pattern = typename range_policy::iteration_pattern;
110  using work_tag = typename range_policy::work_tag;
111 
112  static constexpr int rank = iteration_pattern::rank;
113 
114  static constexpr int outer_direction = static_cast<int> (
115  (iteration_pattern::outer_direction != Iterate::Default && iteration_pattern::outer_direction != Iterate::Flat)
116  ? iteration_pattern::outer_direction
117  : default_outer_direction< typename range_policy::execution_space>::value );
118 
119  static constexpr int inner_direction = static_cast<int> (
120  iteration_pattern::inner_direction != Iterate::Default
121  ? iteration_pattern::inner_direction
122  : default_inner_direction< typename range_policy::execution_space>::value ) ;
123 
124 
125  // Ugly ugly workaround intel 14 not handling scoped enum correctly
126  static constexpr int Flat = static_cast<int>( Iterate::Flat );
127  static constexpr int Right = static_cast<int>( Iterate::Right );
128 
129 
130  using size_type = typename range_policy::index_type;
131  using index_type = typename std::make_signed<size_type>::type;
132 
133 
134  template <typename I>
135  MDRangePolicy( std::initializer_list<I> upper_corner )
136  {
137  static_assert( std::is_integral<I>::value, "Kokkos Error: corner defined with non-integral type" );
138 
139  // TODO check size of lists equal to rank
140  // static_asserts on initializer_list.size() require c++14
141 
142  //static_assert( upper_corner.size() == rank, "Kokkos Error: upper_corner has incorrect rank" );
143 
144  const auto u = upper_corner.begin();
145 
146  m_num_tiles = 1;
147  for (int i=0; i<rank; ++i) {
148  m_offset[i] = static_cast<index_type>(0);
149  m_dim[i] = static_cast<index_type>(u[i]);
150  if (inner_direction != Flat) {
151  // default tile size to 4
152  m_tile[i] = 4;
153  } else {
154  m_tile[i] = 1;
155  }
156  m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i];
157  m_num_tiles *= m_tile_dim[i];
158  }
159  }
160 
161  template <typename IA, typename IB>
162  MDRangePolicy( std::initializer_list<IA> corner_a
163  , std::initializer_list<IB> corner_b
164  )
165  {
166  static_assert( std::is_integral<IA>::value, "Kokkos Error: corner A defined with non-integral type" );
167  static_assert( std::is_integral<IB>::value, "Kokkos Error: corner B defined with non-integral type" );
168 
169  // TODO check size of lists equal to rank
170  // static_asserts on initializer_list.size() require c++14
171  //static_assert( corner_a.size() == rank, "Kokkos Error: corner_a has incorrect rank" );
172  //static_assert( corner_b.size() == rank, "Kokkos Error: corner_b has incorrect rank" );
173 
174 
175  using A = typename std::make_signed<IA>::type;
176  using B = typename std::make_signed<IB>::type;
177 
178  const auto a = [=](int i) { return static_cast<A>(corner_a.begin()[i]); };
179  const auto b = [=](int i) { return static_cast<B>(corner_b.begin()[i]); };
180 
181  m_num_tiles = 1;
182  for (int i=0; i<rank; ++i) {
183  m_offset[i] = static_cast<index_type>(a(i) <= b(i) ? a(i) : b(i));
184  m_dim[i] = static_cast<index_type>(a(i) <= b(i) ? b(i) - a(i) : a(i) - b(i));
185  if (inner_direction != Flat) {
186  // default tile size to 4
187  m_tile[i] = 4;
188  } else {
189  m_tile[i] = 1;
190  }
191  m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i];
192  m_num_tiles *= m_tile_dim[i];
193  }
194  }
195 
196  template <typename IA, typename IB, typename T>
197  MDRangePolicy( std::initializer_list<IA> corner_a
198  , std::initializer_list<IB> corner_b
199  , std::initializer_list<T> tile
200  )
201  {
202  static_assert( std::is_integral<IA>::value, "Kokkos Error: corner A defined with non-integral type" );
203  static_assert( std::is_integral<IB>::value, "Kokkos Error: corner B defined with non-integral type" );
204  static_assert( std::is_integral<T>::value, "Kokkos Error: tile defined with non-integral type" );
205  static_assert( inner_direction != Flat, "Kokkos Error: tiling not support with flat iteration" );
206 
207  // TODO check size of lists equal to rank
208  // static_asserts on initializer_list.size() require c++14
209  //static_assert( corner_a.size() == rank, "Kokkos Error: corner_a has incorrect rank" );
210  //static_assert( corner_b.size() == rank, "Kokkos Error: corner_b has incorrect rank" );
211  //static_assert( tile.size() == rank, "Kokkos Error: tile has incorrect rank" );
212 
213  using A = typename std::make_signed<IA>::type;
214  using B = typename std::make_signed<IB>::type;
215 
216  const auto a = [=](int i) { return static_cast<A>(corner_a.begin()[i]); };
217  const auto b = [=](int i) { return static_cast<B>(corner_b.begin()[i]); };
218  const auto t = tile.begin();
219 
220  m_num_tiles = 1;
221  for (int i=0; i<rank; ++i) {
222  m_offset[i] = static_cast<index_type>(a(i) <= b(i) ? a(i) : b(i));
223  m_dim[i] = static_cast<index_type>(a(i) <= b(i) ? b(i) - a(i) : a(i) - b(i));
224  m_tile[i] = static_cast<int>(t[i] > (T)0 ? t[i] : (T)1 );
225  m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i];
226  m_num_tiles *= m_tile_dim[i];
227  }
228  }
229 
230  index_type m_offset[rank];
231  index_type m_dim[rank];
232  int m_tile[rank];
233  index_type m_tile_dim[rank];
234  size_type m_num_tiles; // product of tile dims
235 };
236 
237 namespace Impl {
238 
239 // Serial, Threads, OpenMP
240 // use enable_if to overload for Cuda
241 template < typename MDRange, typename Functor, typename Enable = void >
242 struct MDForFunctor
243 {
244  using work_tag = typename MDRange::work_tag;
245  using index_type = typename MDRange::index_type;
246  using size_type = typename MDRange::size_type;
247 
248  MDRange m_range;
249  Functor m_func;
250 
251  KOKKOS_INLINE_FUNCTION
252  MDForFunctor( MDRange const& range, Functor const& f )
253  : m_range(range)
254  , m_func( f )
255  {}
256 
257  KOKKOS_INLINE_FUNCTION
258  MDForFunctor( MDRange const& range, Functor && f )
259  : m_range(range)
260  , m_func( std::forward<Functor>(f) )
261  {}
262 
263  KOKKOS_INLINE_FUNCTION
264  MDForFunctor( MDRange && range, Functor const& f )
265  : m_range( std::forward<MDRange>(range) )
266  , m_func( f )
267  {}
268 
269  KOKKOS_INLINE_FUNCTION
270  MDForFunctor( MDRange && range, Functor && f )
271  : m_range( std::forward<MDRange>(range) )
272  , m_func( std::forward<Functor>(f) )
273  {}
274 
275 
276  KOKKOS_INLINE_FUNCTION
277  MDForFunctor( MDForFunctor const& ) = default;
278 
279  KOKKOS_INLINE_FUNCTION
280  MDForFunctor& operator=( MDForFunctor const& ) = default;
281 
282  KOKKOS_INLINE_FUNCTION
283  MDForFunctor( MDForFunctor && ) = default;
284 
285  KOKKOS_INLINE_FUNCTION
286  MDForFunctor& operator=( MDForFunctor && ) = default;
287 
288  // Rank-2, Flat, No Tag
289  template <typename Idx>
290  KOKKOS_FORCEINLINE_FUNCTION
291  typename std::enable_if<( std::is_integral<Idx>::value
292  && std::is_same<void, work_tag>::value
293  && MDRange::rank == 2
294  && MDRange::inner_direction == MDRange::Flat
295  )>::type
296  operator()(Idx t) const
297  {
298  if ( MDRange::outer_direction == MDRange::Right ) {
299  m_func( m_range.m_offset[0] + ( t / m_range.m_dim[1] )
300  , m_range.m_offset[1] + ( t % m_range.m_dim[1] ) );
301  } else {
302  m_func( m_range.m_offset[0] + ( t % m_range.m_dim[0] )
303  , m_range.m_offset[1] + ( t / m_range.m_dim[0] ) );
304  }
305  }
306 
307  // Rank-2, Flat, Tag
308  template <typename Idx>
309  KOKKOS_FORCEINLINE_FUNCTION
310  typename std::enable_if<( std::is_integral<Idx>::value
311  && !std::is_same<void, work_tag>::value
312  && MDRange::rank == 2
313  && MDRange::inner_direction == MDRange::Flat
314  )>::type
315  operator()(Idx t) const
316  {
317  if ( MDRange::outer_direction == MDRange::Right ) {
318  m_func( work_tag{}, m_range.m_offset[0] + ( t / m_range.m_dim[1] )
319  , m_range.m_offset[1] + ( t % m_range.m_dim[1] ) );
320  } else {
321  m_func( work_tag{}, m_range.m_offset[0] + ( t % m_range.m_dim[0] )
322  , m_range.m_offset[1] + ( t / m_range.m_dim[0] ) );
323  }
324  }
325 
326  // Rank-2, Not Flat, No Tag
327  template <typename Idx>
328  KOKKOS_FORCEINLINE_FUNCTION
329  typename std::enable_if<( std::is_integral<Idx>::value
330  && std::is_same<void, work_tag>::value
331  && MDRange::rank == 2
332  && MDRange::inner_direction != MDRange::Flat
333  )>::type
334  operator()(Idx t) const
335  {
336  index_type t0, t1;
337  if ( MDRange::outer_direction == MDRange::Right ) {
338  t0 = t / m_range.m_tile_dim[1];
339  t1 = t % m_range.m_tile_dim[1];
340  } else {
341  t0 = t % m_range.m_tile_dim[0];
342  t1 = t / m_range.m_tile_dim[0];
343  }
344 
345  const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0];
346  const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1];
347 
348  const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] );
349  const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] );
350 
351  if ( MDRange::inner_direction == MDRange::Right ) {
352  for (int i0=b0; i0<e0; ++i0) {
353  #if defined(KOKKOS_MDRANGE_IVDEP)
354  #pragma ivdep
355  #endif
356  for (int i1=b1; i1<e1; ++i1) {
357  m_func( i0, i1 );
358  }}
359  } else {
360  for (int i1=b1; i1<e1; ++i1) {
361  #if defined(KOKKOS_MDRANGE_IVDEP)
362  #pragma ivdep
363  #endif
364  for (int i0=b0; i0<e0; ++i0) {
365  m_func( i0, i1 );
366  }}
367  }
368  }
369 
370  // Rank-2, Not Flat, Tag
371  template <typename Idx>
372  KOKKOS_FORCEINLINE_FUNCTION
373  typename std::enable_if<( std::is_integral<Idx>::value
374  && !std::is_same<void, work_tag>::value
375  && MDRange::rank == 2
376  && MDRange::inner_direction != MDRange::Flat
377  )>::type
378  operator()(Idx t) const
379  {
380  work_tag tag;
381 
382  index_type t0, t1;
383  if ( MDRange::outer_direction == MDRange::Right ) {
384  t0 = t / m_range.m_tile_dim[1];
385  t1 = t % m_range.m_tile_dim[1];
386  } else {
387  t0 = t % m_range.m_tile_dim[0];
388  t1 = t / m_range.m_tile_dim[0];
389  }
390 
391  const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0];
392  const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1];
393 
394  const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] );
395  const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] );
396 
397  if ( MDRange::inner_direction == MDRange::Right ) {
398  for (int i0=b0; i0<e0; ++i0) {
399  #if defined(KOKKOS_MDRANGE_IVDEP)
400  #pragma ivdep
401  #endif
402  for (int i1=b1; i1<e1; ++i1) {
403  m_func( tag, i0, i1 );
404  }}
405  } else {
406  for (int i1=b1; i1<e1; ++i1) {
407  #if defined(KOKKOS_MDRANGE_IVDEP)
408  #pragma ivdep
409  #endif
410  for (int i0=b0; i0<e0; ++i0) {
411  m_func( tag, i0, i1 );
412  }}
413  }
414  }
415 
416  //---------------------------------------------------------------------------
417 
418  // Rank-3, Flat, No Tag
419  template <typename Idx>
420  KOKKOS_FORCEINLINE_FUNCTION
421  typename std::enable_if<( std::is_integral<Idx>::value
422  && std::is_same<void, work_tag>::value
423  && MDRange::rank == 3
424  && MDRange::inner_direction == MDRange::Flat
425  )>::type
426  operator()(Idx t) const
427  {
428  if ( MDRange::outer_direction == MDRange::Right ) {
429  const int64_t tmp_prod = m_range.m_dim[1]*m_range.m_dim[2];
430  m_func( m_range.m_offset[0] + ( t / tmp_prod )
431  , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[2] )
432  , m_range.m_offset[2] + ( (t % tmp_prod) % m_range.m_dim[2] )
433  );
434  } else {
435  const int64_t tmp_prod = m_range.m_dim[0]*m_range.m_dim[1];
436  m_func( m_range.m_offset[0] + ( (t % tmp_prod) % m_range.m_dim[0] )
437  , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[0] )
438  , m_range.m_offset[2] + ( t / tmp_prod )
439  );
440  }
441  }
442 
443  // Rank-3, Flat, Tag
444  template <typename Idx>
445  KOKKOS_FORCEINLINE_FUNCTION
446  typename std::enable_if<( std::is_integral<Idx>::value
447  && !std::is_same<void, work_tag>::value
448  && MDRange::rank == 3
449  && MDRange::inner_direction == MDRange::Flat
450  )>::type
451  operator()(Idx t) const
452  {
453  if ( MDRange::outer_direction == MDRange::Right ) {
454  const int64_t tmp_prod = m_range.m_dim[1]*m_range.m_dim[2];
455  m_func( work_tag{}
456  , m_range.m_offset[0] + ( t / tmp_prod )
457  , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[2] )
458  , m_range.m_offset[2] + ( (t % tmp_prod) % m_range.m_dim[2] )
459  );
460  } else {
461  const int64_t tmp_prod = m_range.m_dim[0]*m_range.m_dim[1];
462  m_func( work_tag{}
463  , m_range.m_offset[0] + ( (t % tmp_prod) % m_range.m_dim[0] )
464  , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[0] )
465  , m_range.m_offset[2] + ( t / tmp_prod )
466  );
467  }
468  }
469 
470  // Rank-3, Not Flat, No Tag
471  template <typename Idx>
472  KOKKOS_FORCEINLINE_FUNCTION
473  typename std::enable_if<( std::is_integral<Idx>::value
474  && std::is_same<void, work_tag>::value
475  && MDRange::rank == 3
476  && MDRange::inner_direction != MDRange::Flat
477  )>::type
478  operator()(Idx t) const
479  {
480  index_type t0, t1, t2;
481  if ( MDRange::outer_direction == MDRange::Right ) {
482  const index_type tmp_prod = ( m_range.m_tile_dim[1]*m_range.m_tile_dim[2]);
483  t0 = t / tmp_prod;
484  t1 = ( t % tmp_prod ) / m_range.m_tile_dim[2];
485  t2 = ( t % tmp_prod ) % m_range.m_tile_dim[2];
486  } else {
487  const index_type tmp_prod = ( m_range.m_tile_dim[0]*m_range.m_tile_dim[1]);
488  t0 = ( t % tmp_prod ) % m_range.m_tile_dim[0];
489  t1 = ( t % tmp_prod ) / m_range.m_tile_dim[0];
490  t2 = t / tmp_prod;
491  }
492 
493  const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0];
494  const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1];
495  const index_type b2 = t2 * m_range.m_tile[2] + m_range.m_offset[2];
496 
497  const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] );
498  const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] );
499  const index_type e2 = b2 + m_range.m_tile[2] <= (m_range.m_dim[2] + m_range.m_offset[2] ) ? b2 + m_range.m_tile[2] : ( m_range.m_dim[2] + m_range.m_offset[2] );
500 
501  if ( MDRange::inner_direction == MDRange::Right ) {
502  for (int i0=b0; i0<e0; ++i0) {
503  for (int i1=b1; i1<e1; ++i1) {
504  #if defined(KOKKOS_MDRANGE_IVDEP)
505  #pragma ivdep
506  #endif
507  for (int i2=b2; i2<e2; ++i2) {
508  m_func( i0, i1, i2 );
509  }}}
510  } else {
511  for (int i2=b2; i2<e2; ++i2) {
512  for (int i1=b1; i1<e1; ++i1) {
513  #if defined(KOKKOS_MDRANGE_IVDEP)
514  #pragma ivdep
515  #endif
516  for (int i0=b0; i0<e0; ++i0) {
517  m_func( i0, i1, i2 );
518  }}}
519  }
520  }
521 
522  // Rank-3, Not Flat, Tag
523  template <typename Idx>
524  KOKKOS_FORCEINLINE_FUNCTION
525  typename std::enable_if<( std::is_integral<Idx>::value
526  && !std::is_same<void, work_tag>::value
527  && MDRange::rank == 3
528  && MDRange::inner_direction != MDRange::Flat
529  )>::type
530  operator()(Idx t) const
531  {
532  work_tag tag;
533 
534  index_type t0, t1, t2;
535  if ( MDRange::outer_direction == MDRange::Right ) {
536  const index_type tmp_prod = ( m_range.m_tile_dim[1]*m_range.m_tile_dim[2]);
537  t0 = t / tmp_prod;
538  t1 = ( t % tmp_prod ) / m_range.m_tile_dim[2];
539  t2 = ( t % tmp_prod ) % m_range.m_tile_dim[2];
540  } else {
541  const index_type tmp_prod = ( m_range.m_tile_dim[0]*m_range.m_tile_dim[1]);
542  t0 = ( t % tmp_prod ) % m_range.m_tile_dim[0];
543  t1 = ( t % tmp_prod ) / m_range.m_tile_dim[0];
544  t2 = t / tmp_prod;
545  }
546 
547  const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0];
548  const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1];
549  const index_type b2 = t2 * m_range.m_tile[2] + m_range.m_offset[2];
550 
551  const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] );
552  const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] );
553  const index_type e2 = b2 + m_range.m_tile[2] <= (m_range.m_dim[2] + m_range.m_offset[2] ) ? b2 + m_range.m_tile[2] : ( m_range.m_dim[2] + m_range.m_offset[2] );
554 
555  if ( MDRange::inner_direction == MDRange::Right ) {
556  for (int i0=b0; i0<e0; ++i0) {
557  for (int i1=b1; i1<e1; ++i1) {
558  #if defined(KOKKOS_MDRANGE_IVDEP)
559  #pragma ivdep
560  #endif
561  for (int i2=b2; i2<e2; ++i2) {
562  m_func( tag, i0, i1, i2 );
563  }}}
564  } else {
565  for (int i2=b2; i2<e2; ++i2) {
566  for (int i1=b1; i1<e1; ++i1) {
567  #if defined(KOKKOS_MDRANGE_IVDEP)
568  #pragma ivdep
569  #endif
570  for (int i0=b0; i0<e0; ++i0) {
571  m_func( tag, i0, i1, i2 );
572  }}}
573  }
574  }
575 };
576 
577 
578 
579 } // namespace Impl
580 
581 
582 template <typename MDRange, typename Functor>
583 void md_parallel_for( MDRange const& range
584  , Functor const& f
585  , const std::string& str = ""
586  )
587 {
588  Impl::MDForFunctor<MDRange, Functor> g(range, f);
589 
590  using range_policy = typename MDRange::range_policy;
591 
592  Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str );
593 }
594 
595 template <typename MDRange, typename Functor>
596 void md_parallel_for( const std::string& str
597  , MDRange const& range
598  , Functor const& f
599  )
600 {
601  Impl::MDForFunctor<MDRange, Functor> g(range, f);
602 
603  using range_policy = typename MDRange::range_policy;
604 
605  Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str );
606 }
607 
608 }} // namespace Kokkos::Experimental
609 
610 #endif //KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
611 
Declaration of parallel operators.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
KOKKOS_INLINE_FUNCTION constexpr unsigned rank(const View< D, P... > &V)
Temporary free function rank() until rank() is implemented in the View.