Sacado Package Browser (Single Doxygen Collection)  Version of the Day
Sacado_DynamicArrayTraits.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Sacado Package
5 // Copyright (2006) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // This library is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as
12 // published by the Free Software Foundation; either version 2.1 of the
13 // License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 // USA
24 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25 // (etphipp@sandia.gov).
26 //
27 // ***********************************************************************
28 // @HEADER
29 
30 #ifndef SACADO_DYNAMICARRAYTRAITS_HPP
31 #define SACADO_DYNAMICARRAYTRAITS_HPP
32 
33 #include <new>
34 #include <cstring>
35 #include <stdint.h>
36 
37 #include "Sacado_Traits.hpp"
38 #if defined(HAVE_SACADO_KOKKOSCORE)
39 #include "Kokkos_Core.hpp"
40 #if defined(KOKKOS_ENABLE_CUDA)
41 #include "Cuda/Kokkos_Cuda_Vectorization.hpp"
42 #endif
43 #if !defined(SACADO_DISABLE_CUDA_IN_KOKKOS)
44 #include "Kokkos_MemoryPool.hpp"
45 #endif
46 #endif
47 
48 namespace Sacado {
49 
50  template <typename ExecSpace>
51  void createGlobalMemoryPool(const ExecSpace& space
52  , const size_t min_total_alloc_size
53  , const uint32_t min_block_alloc_size
54  , const uint32_t max_block_alloc_size
55  , const uint32_t min_superblock_size
56  ) {}
57 
58  template <typename ExecSpace>
59  void destroyGlobalMemoryPool(const ExecSpace& space) {}
60 
61 #if 0 && defined(HAVE_SACADO_KOKKOSCORE) && defined(KOKKOS_ENABLE_OPENMP)
62  namespace Impl {
63  extern const Kokkos::MemoryPool<Kokkos::OpenMP>* global_sacado_openmp_memory_pool;
64  }
65 
66  inline void
67  createGlobalMemoryPool(const ExecSpace& space
68  , const size_t min_total_alloc_size
69  , const uint32_t min_block_alloc_size
70  , const uint32_t max_block_alloc_size
71  , const uint32_t min_superblock_size
72  )
73  {
74  typedef Kokkos::MemoryPool<Kokkos::OpenMP> pool_t;
75  Impl::global_sacado_openmp_memory_pool =
76  new pool_t(typename Kokkos::OpenMP::memory_space(),
77  min_total_alloc_size,
78  min_block_alloc_size,
79  max_block_alloc_size,
80  min_superblock_size);
81  }
82 
83  inline void destroyGlobalMemoryPool(const Kokkos::OpenMP& space)
84  {
85  delete Impl::global_sacado_openmp_memory_pool;
86  }
87 #endif
88 
89 #if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
90 
91  namespace Impl {
92 
93  extern const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_host;
94  extern const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_device;
95 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
96  extern __device__ const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_on_device;
97 #else
98  __device__ const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_on_device = 0;
99 #endif
100 
101  struct SetMemoryPoolPtr {
102  Kokkos::MemoryPool<Kokkos::Cuda>* pool_device;
103  __device__ inline void operator()(int) const {
104  global_sacado_cuda_memory_pool_on_device = pool_device;
105  };
106  };
107 
108  }
109 
110  // For some reason we get memory errors if these functions are defined in
111  // Sacado_DynamicArrayTraits.cpp
112  inline void
113  createGlobalMemoryPool(const Kokkos::Cuda& space
114  , const size_t min_total_alloc_size
115  , const uint32_t min_block_alloc_size
116  , const uint32_t max_block_alloc_size
117  , const uint32_t min_superblock_size
118  )
119  {
120  typedef Kokkos::MemoryPool<Kokkos::Cuda> pool_t;
121  pool_t* pool =
122  new pool_t(typename Kokkos::Cuda::memory_space(),
123  min_total_alloc_size,
124  min_block_alloc_size,
125  max_block_alloc_size,
126  min_superblock_size);
127  Impl::SetMemoryPoolPtr f;
128  CUDA_SAFE_CALL( cudaMalloc( &f.pool_device, sizeof(pool_t) ) );
129  CUDA_SAFE_CALL( cudaMemcpy( f.pool_device, pool,
130  sizeof(pool_t),
131  cudaMemcpyHostToDevice ) );
132  Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,1),f);
133  Impl::global_sacado_cuda_memory_pool_host = pool;
134  Impl::global_sacado_cuda_memory_pool_device = f.pool_device;
135  }
136 
137  inline void destroyGlobalMemoryPool(const Kokkos::Cuda& space)
138  {
139  CUDA_SAFE_CALL( cudaFree( (void*) Impl::global_sacado_cuda_memory_pool_device ) );
140  delete Impl::global_sacado_cuda_memory_pool_host;
141  }
142 
143 #endif
144 
145 #if !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
146 
147  namespace Impl {
148 
149  // Compute warp lane/thread index
150  __device__ inline int warpLane(const int warp_size = 32) {
151  return ( threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x ) % warp_size;
152  }
153 
154  // Reduce y across the warp and broadcast to all lanes
155  template <typename T>
156  __device__ inline T warpReduce(T y, const int warp_size = 32) {
157  for (int i=1; i<warp_size; i*=2) {
158  y += Kokkos::shfl_down(y, i, warp_size);
159  }
160  y = Kokkos::shfl(y, 0, warp_size);
161  return y;
162  }
163 
164  // Non-inclusive plus-scan up the warp, replacing the first entry with 0
165  template <typename T>
166  __device__ inline int warpScan(T y, const int warp_size = 32) {
167  const int lane = warpLane();
168  y = Kokkos::shfl_up(y, 1, warp_size);
169  if (lane == 0)
170  y = T(0);
171  for (int i=1; i<warp_size; i*=2) {
172  T t = Kokkos::shfl_up(y, i, warp_size);
173  if (lane > i)
174  y += t;
175  }
176  return y;
177  }
178 
179  template <typename T>
180  __device__ inline T warpBcast(T y, int id, const int warp_size = 32) {
181  return Kokkos::shfl(y, id, warp_size);
182  }
183 
184  }
185 
186 #endif
187 
188  namespace Impl {
189 
190  template <typename T>
192  static T* ds_alloc(const int sz) {
193 #if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && defined(KOKKOS_ENABLE_CUDA_UVM) && !defined( __CUDA_ARCH__ )
194  T* m = 0;
195  if (sz > 0)
196  CUDA_SAFE_CALL( cudaMallocManaged( (void**) &m, sz*sizeof(T), cudaMemAttachGlobal ) );
197 #elif defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)
198  // This code assumes all threads enter ds_alloc, even those with sz == 0
199  T* m = 0;
200  const int total_sz = warpReduce(sz);
201  const int lane = warpLane();
202  if (total_sz > 0 && lane == 0) {
203  m = static_cast<T*>(global_sacado_cuda_memory_pool_on_device->allocate(total_sz*sizeof(T)));
204  if (m == 0)
205  Kokkos::abort("Allocation failed. Kokkos memory pool is out of memory");
206  }
207  m = warpBcast(m,0);
208  m += warpScan(sz);
209 #elif 0 && defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && defined(KOKKOS_ENABLE_OPENMP)
210  T* m = 0;
211  if (sz > 0) {
212  if (global_sacado_openmp_memory_pool != 0) {
213  m = static_cast<T*>(global_sacado_openmp_memory_pool->allocate(sz*sizeof(T)));
214  if (m == 0)
215  Kokkos::abort("Allocation failed. Kokkos memory pool is out of memory");
216  }
217  else
218  m = static_cast<T* >(operator new(sz*sizeof(T)));
219  }
220 #else
221  T* m = 0;
222  if (sz > 0) {
223  m = static_cast<T* >(operator new(sz*sizeof(T)));
224 #if defined(HAVE_SACADO_KOKKOSCORE)
225  if (m == 0)
226  Kokkos::abort("Allocation failed.");
227 #endif
228  }
229 #endif
230  return m;
231  }
232 
233  template <typename T>
235  static void ds_free(T* m, int sz) {
236 #if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && defined(KOKKOS_ENABLE_CUDA_UVM) && !defined( __CUDA_ARCH__ )
237  if (sz > 0)
238  CUDA_SAFE_CALL( cudaFree(m) );
239 #elif defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)
240  const int total_sz = warpReduce(sz);
241  const int lane = warpLane();
242  if (total_sz > 0 && lane == 0) {
243  global_sacado_cuda_memory_pool_on_device->deallocate((void*) m, total_sz*sizeof(T));
244  }
245 #elif 0 && defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && defined(KOKKOS_ENABLE_OPENMP)
246  if (sz > 0) {
247  if (global_sacado_openmp_memory_pool != 0)
248  global_sacado_openmp_memory_pool->deallocate((void*) m, sz*sizeof(T));
249  else
250  operator delete((void*) m);
251  }
252 #else
253  if (sz > 0)
254  operator delete((void*) m);
255 #endif
256  }
257 
258  }
259 
264  struct ds_array {
265 
268  static T* get(int sz) {
269  T* m = Impl::ds_alloc<T>(sz);
270  T* p = m;
271  for (int i=0; i<sz; ++i)
272  new (p++) T();
273  return m;
274  }
275 
278  static T* get_and_fill(int sz) {
279  T* m = Impl::ds_alloc<T>(sz);
280  T* p = m;
281  for (int i=0; i<sz; ++i)
282  new (p++) T(0.0);
283  return m;
284  }
285 
291  static T* get_and_fill(const T* src, int sz) {
292  T* m = Impl::ds_alloc<T>(sz);
293  T* p = m;
294  for (int i=0; i<sz; ++i)
295  new (p++) T(*(src++));
296  return m;
297  }
298 
304  static T* strided_get_and_fill(const T* src, int stride, int sz) {
305  T* m = Impl::ds_alloc<T>(sz);
306  T* p = m;
307  for (int i=0; i<sz; ++i) {
308  new (p++) T(*(src));
309  src += stride;
310  }
311  return m;
312  }
313 
316  static void copy(const T* src, T* dest, int sz) {
317  for (int i=0; i<sz; ++i)
318  *(dest++) = *(src++);
319  }
320 
323  static void strided_copy(const T* src, int src_stride,
324  T* dest, int dest_stride, int sz) {
325  for (int i=0; i<sz; ++i) {
326  *(dest) = *(src);
327  dest += dest_stride;
328  src += src_stride;
329  }
330  }
331 
334  static void zero(T* dest, int sz) {
335  for (int i=0; i<sz; ++i)
336  *(dest++) = T(0.);
337  }
338 
341  static void strided_zero(T* dest, int stride, int sz) {
342  for (int i=0; i<sz; ++i) {
343  *(dest) = T(0.);
344  dest += stride;
345  }
346  }
347 
350  static void destroy_and_release(T* m, int sz) {
351  T* e = m+sz;
352  for (T* b = m; b!=e; b++)
353  b->~T();
354  Impl::ds_free(m, sz);
355  }
356  };
357 
358 #if defined(SACADO_VIEW_CUDA_HIERARCHICAL_DFAD) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)
359 
360  namespace Impl {
361 
362  template <typename T>
364  static T* ds_strided_alloc(const int sz) {
365  T* m = 0;
366  // Only do strided memory allocations when we are doing hierarchical
367  // parallelism with a vector dimension of 32. The limitation on the
368  // memory pool allowing only a single thread in a warp to allocate
369  // makes it too difficult to do otherwise.
370  if (blockDim.x == 32) {
371  //const int lane = warpLane();
372  const int lane = threadIdx.x;
373  if (sz > 0 && lane == 0) {
374 #if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)
375  m = static_cast<T*>(global_sacado_cuda_memory_pool_on_device->allocate(sz*sizeof(T)));
376  if (m == 0)
377  Kokkos::abort("Allocation failed. Kokkos memory pool is out of memory");
378 #else
379  m = static_cast<T* >(operator new(sz*sizeof(T)));
380 #if defined(HAVE_SACADO_KOKKOSCORE)
381  if (m == 0)
382  Kokkos::abort("Allocation failed.");
383 #endif
384 #endif
385  }
386  m = warpBcast(m,0,blockDim.x);
387  }
388  else {
389  if (sz > 0) {
390  m = static_cast<T* >(operator new(sz*sizeof(T)));
391 #if defined(HAVE_SACADO_KOKKOSCORE)
392  if (m == 0)
393  Kokkos::abort("Allocation failed.");
394 #endif
395  }
396  }
397 
398  return m;
399  }
400 
401  template <typename T>
403  static void ds_strided_free(T* m, int sz) {
404  if (blockDim.x == 32) {
405  // const int lane = warpLane();
406  const int lane = threadIdx.x;
407  if (sz > 0 && lane == 0) {
408 #if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)
409  global_sacado_cuda_memory_pool_on_device->deallocate((void*) m, sz*sizeof(T));
410 #else
411  operator delete((void*) m);
412 #endif
413  }
414  }
415  else {
416  if (sz > 0)
417  operator delete((void*) m);
418  }
419 
420  }
421 
422  }
423 
428  template <typename T>
429  struct ds_array<T,true> {
430 
433  static T* get(int sz) {
434  T* m = Impl::ds_strided_alloc<T>(sz);
435  return m;
436  }
437 
440  static T* get_and_fill(int sz) {
441  T* m = Impl::ds_strided_alloc<T>(sz);
442  for (int i=threadIdx.x; i<sz; i+=blockDim.x)
443  m[i] = 0.0;
444  return m;
445  }
446 
452  static T* get_and_fill(const T* src, int sz) {
453  T* m = Impl::ds_strided_alloc<T>(sz);
454  for (int i=threadIdx.x; i<sz; i+=blockDim.x)
455  m[i] = src[i];
456  return m;
457  }
458 
464  static T* strided_get_and_fill(const T* src, int stride, int sz) {
465  T* m = Impl::ds_strided_alloc<T>(sz);
466  for (int i=threadIdx.x; i<sz; i+=blockDim.x)
467  m[i] = src[i*stride];
468  return m;
469  }
470 
473  static void copy(const T* src, T* dest, int sz) {
474  for (int i=threadIdx.x; i<sz; i+=blockDim.x)
475  dest[i] = src[i];
476  }
477 
480  static void strided_copy(const T* src, int src_stride,
481  T* dest, int dest_stride, int sz) {
482  for (int i=threadIdx.x; i<sz; i+=blockDim.x) {
483  dest[i*dest_stride] = src[i*src_stride];
484  }
485  }
486 
489  static void zero(T* dest, int sz) {
490  for (int i=threadIdx.x; i<sz; i+=blockDim.x)
491  dest[i] = T(0.);
492  }
493 
496  static void strided_zero(T* dest, int stride, int sz) {
497  for (int i=threadIdx.x; i<sz; i+=blockDim.x) {
498  dest[i*stride] = T(0.);
499  }
500  }
501 
504  static void destroy_and_release(T* m, int sz) {
505  Impl::ds_strided_free(m, sz);
506  }
507  };
508 
509 #elif defined(SACADO_VIEW_CUDA_HIERARCHICAL_DFAD_STRIDED) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)
510 
511  namespace Impl {
512 
513  template <typename T>
515  static T* ds_strided_alloc(const int sz) {
516  T* m = 0;
517  // Only do strided memory allocations when we are doing hierarchical
518  // parallelism with a vector dimension of 32. The limitation on the
519  // memory pool allowing only a single thread in a warp to allocate
520  // makes it too difficult to do otherwise.
521  if (blockDim.x == 32) {
522  // const int total_sz = warpReduce(sz);
523  // const int lane = warpLane();
524  const int total_sz = warpReduce(sz, blockDim.x);
525  const int lane = threadIdx.x;
526  if (total_sz > 0 && lane == 0) {
527 #if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)
528  m = static_cast<T*>(global_sacado_cuda_memory_pool_on_device->allocate(total_sz*sizeof(T)));
529  if (m == 0)
530  Kokkos::abort("Allocation failed. Kokkos memory pool is out of memory");
531 #else
532  m = static_cast<T* >(operator new(total_sz*sizeof(T)));
533 #if defined(HAVE_SACADO_KOKKOSCORE)
534  if (m == 0)
535  Kokkos::abort("Allocation failed.");
536 #endif
537 #endif
538  }
539  m = warpBcast(m,0,blockDim.x);
540  m += lane;
541  }
542  else {
543  if (sz > 0) {
544  m = static_cast<T* >(operator new(sz*sizeof(T)));
545 #if defined(HAVE_SACADO_KOKKOSCORE)
546  if (m == 0)
547  Kokkos::abort("Allocation failed.");
548 #endif
549  }
550  }
551 
552  return m;
553  }
554 
555  template <typename T>
557  static void ds_strided_free(T* m, int sz) {
558  if (blockDim.x == 32) {
559  // const int total_sz = warpReduce(sz);
560  // const int lane = warpLane();
561  const int total_sz = warpReduce(sz, blockDim.x);
562  const int lane = threadIdx.x;
563  if (total_sz > 0 && lane == 0) {
564 #if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)
565  global_sacado_cuda_memory_pool_on_device->deallocate((void*) m, total_sz*sizeof(T));
566 #else
567  operator delete((void*) m);
568 #endif
569  }
570  }
571  else {
572  if (sz > 0)
573  operator delete((void*) m);
574  }
575  }
576  }
577 
582  template <typename T>
583  struct ds_array<T,true> {
584 
587  static T* get(int sz) {
588  T* m = Impl::ds_strided_alloc<T>(sz);
589  return m;
590  }
591 
594  static T* get_and_fill(int sz) {
595  T* m = Impl::ds_strided_alloc<T>(sz);
596  for (int i=0; i<sz; ++i)
597  m[i*blockDim.x] = 0.0;
598  return m;
599  }
600 
606  static T* get_and_fill(const T* src, int sz) {
607  T* m = Impl::ds_strided_alloc<T>(sz);
608  for (int i=0; i<sz; ++i)
609  m[i*blockDim.x] = src[i*blockDim.x];
610  return m;
611  }
612 
618  static T* strided_get_and_fill(const T* src, int stride, int sz) {
619  T* m = Impl::ds_strided_alloc<T>(sz);
620  for (int i=0; i<sz; ++i)
621  m[i*blockDim.x] = src[i*stride];
622  return m;
623  }
624 
627  static void copy(const T* src, T* dest, int sz) {
628  for (int i=0; i<sz; ++i)
629  dest[i*blockDim.x] = src[i*blockDim.x];
630  }
631 
634  static void strided_copy(const T* src, int src_stride,
635  T* dest, int dest_stride, int sz) {
636  for (int i=0; i<sz; ++i) {
637  *(dest) = *(src);
638  dest += dest_stride;
639  src += src_stride;
640  }
641  }
642 
645  static void zero(T* dest, int sz) {
646  for (int i=0; i<sz; ++i)
647  dest[i*blockDim.x] = T(0.);
648  }
649 
652  static void strided_zero(T* dest, int stride, int sz) {
653  for (int i=0; i<sz; ++i) {
654  *(dest) = T(0.);
655  dest += stride;
656  }
657  }
658 
661  static void destroy_and_release(T* m, int sz) {
662  Impl::ds_strided_free(m, sz);
663  }
664  };
665 
666 #else
667 
672  template <typename T>
673  struct ds_array<T,true> {
674 
677  static T* get(int sz) {
678  T* m = Impl::ds_alloc<T>(sz);
679  return m;
680  }
681 
684  static T* get_and_fill(int sz) {
685  T* m = Impl::ds_alloc<T>(sz);
686 #ifdef __CUDACC__
687  for (int i=0; i<sz; ++i)
688  m[i] = 0.0;
689 #else
690  if (sz > 0)
691  std::memset(m,0,sz*sizeof(T));
692 #endif
693  return m;
694  }
695 
701  static T* get_and_fill(const T* src, int sz) {
702  T* m = Impl::ds_alloc<T>(sz);
703  for (int i=0; i<sz; ++i)
704  m[i] = src[i];
705  return m;
706  }
707 
713  static T* strided_get_and_fill(const T* src, int stride, int sz) {
714  T* m = Impl::ds_alloc<T>(sz);
715  for (int i=0; i<sz; ++i)
716  m[i] = src[i*stride];
717  return m;
718  }
719 
722  static void copy(const T* src, T* dest, int sz) {
723  if (sz > 0 && dest != NULL && src != NULL)
724 #ifdef __CUDACC__
725  for (int i=0; i<sz; ++i)
726  dest[i] = src[i];
727 #else
728  std::memcpy(dest,src,sz*sizeof(T));
729 #endif
730  }
731 
734  static void strided_copy(const T* src, int src_stride,
735  T* dest, int dest_stride, int sz) {
736  for (int i=0; i<sz; ++i) {
737  *(dest) = *(src);
738  dest += dest_stride;
739  src += src_stride;
740  }
741  }
742 
745  static void zero(T* dest, int sz) {
746  if (sz > 0 && dest != NULL)
747 #ifdef __CUDACC__
748  for (int i=0; i<sz; ++i)
749  dest[i] = T(0.);
750 #else
751  std::memset(dest,0,sz*sizeof(T));
752 #endif
753  }
754 
757  static void strided_zero(T* dest, int stride, int sz) {
758  for (int i=0; i<sz; ++i) {
759  *(dest) = T(0.);
760  dest += stride;
761  }
762  }
763 
766  static void destroy_and_release(T* m, int sz) {
767  Impl::ds_free(m, sz);
768  }
769  };
770 
771 #endif
772 
773 } // namespace Sacado
774 
775 #endif // SACADO_DYNAMICARRAY_HPP
const char * p
static SACADO_INLINE_FUNCTION void strided_copy(const T *src, int src_stride, T *dest, int dest_stride, int sz)
Copy array from src to dest of length sz.
static SACADO_INLINE_FUNCTION void strided_zero(T *dest, int stride, int sz)
Zero out array dest of length sz.
static SACADO_INLINE_FUNCTION void copy(const T *src, T *dest, int sz)
Copy array from src to dest of length sz.
void createGlobalMemoryPool(const ExecSpace &space, const size_t min_total_alloc_size, const uint32_t min_block_alloc_size, const uint32_t max_block_alloc_size, const uint32_t min_superblock_size)
static SACADO_INLINE_FUNCTION void zero(T *dest, int sz)
Zero out array dest of length sz.
static SACADO_INLINE_FUNCTION void destroy_and_release(T *m, int sz)
Destroy array elements and release memory.
static SACADO_INLINE_FUNCTION T * get_and_fill(const T *src, int sz)
Get memory for new array of length sz and fill with entries from src.
static SACADO_INLINE_FUNCTION void zero(T *dest, int sz)
Zero out array dest of length sz.
static SACADO_INLINE_FUNCTION T * strided_get_and_fill(const T *src, int stride, int sz)
Get memory for new array of length sz and fill with entries from src.
expr true
static SACADO_INLINE_FUNCTION void strided_copy(const T *src, int src_stride, T *dest, int dest_stride, int sz)
Copy array from src to dest of length sz.
#define T
Definition: Sacado_rad.hpp:573
static SACADO_INLINE_FUNCTION void ds_free(T *m, int sz)
static SACADO_INLINE_FUNCTION void destroy_and_release(T *m, int sz)
Destroy array elements and release memory.
int value
static SACADO_INLINE_FUNCTION T * ds_alloc(const int sz)
static SACADO_INLINE_FUNCTION void copy(const T *src, T *dest, int sz)
Copy array from src to dest of length sz.
static SACADO_INLINE_FUNCTION T * strided_get_and_fill(const T *src, int stride, int sz)
Get memory for new array of length sz and fill with entries from src.
void destroyGlobalMemoryPool(const ExecSpace &space)
#define SACADO_INLINE_FUNCTION
static SACADO_INLINE_FUNCTION T * get_and_fill(int sz)
Get memory for new array of length sz and fill with zeros.
static SACADO_INLINE_FUNCTION T * get_and_fill(const T *src, int sz)
Get memory for new array of length sz and fill with entries from src.
Dynamic array allocation class that works for any type.
static SACADO_INLINE_FUNCTION void strided_zero(T *dest, int stride, int sz)
Zero out array dest of length sz.
static SACADO_INLINE_FUNCTION T * get_and_fill(int sz)
Get memory for new array of length sz and fill with zeros.
const double y