Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
FadMPAssembly/fenl_functors.hpp
Go to the documentation of this file.
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos: Manycore Performance-Portable Multidimensional Arrays
6 // Copyright (2012) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
44 #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
45 #define KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
46 
47 #include <stdio.h>
48 
49 #include <iostream>
50 #include <fstream>
51 #include <iomanip>
52 #include <cstdlib>
53 #include <cmath>
54 #include <limits>
55 
56 #include <Kokkos_Pair.hpp>
57 #include <Kokkos_UnorderedMap.hpp>
58 
59 #include <impl/Kokkos_Timer.hpp>
60 
61 #include <BoxElemFixture.hpp>
62 #include <HexElement.hpp>
63 
65 #include "Sacado.hpp"
66 //#include "Fad/Sacado_Fad_SFad_MP_Vector.hpp"
68 //#include "Fad/Sacado_Fad_DFad_MP_Vector.hpp"
69 
70 //----------------------------------------------------------------------------
71 //----------------------------------------------------------------------------
72 
73 namespace Kokkos {
74 namespace Example {
75 namespace FENL {
76 
77 struct DeviceConfig {
78  struct Dim3 {
79  size_t x, y, z;
80  Dim3(const size_t x_, const size_t y_ = 1, const size_t z_ = 1) :
81  x(x_), y(y_), z(z_) {}
82  };
83 
85  size_t num_blocks;
87 
88  DeviceConfig(const size_t num_blocks_ = 0,
89  const size_t threads_per_block_x_ = 0,
90  const size_t threads_per_block_y_ = 0,
91  const size_t threads_per_block_z_ = 1) :
92  block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
93  num_blocks(num_blocks_),
95  {}
96 };
97 
98 template< typename ValueType , class Space >
99 struct CrsMatrix {
100  typedef Kokkos::StaticCrsGraph< unsigned , Space , void , unsigned > StaticCrsGraphType ;
101  typedef View< ValueType * , Space > values_type ;
102 
105 
106  CrsMatrix() : graph(), values() {}
107 
108  CrsMatrix( const StaticCrsGraphType & arg_graph )
109  : graph( arg_graph )
110  , values( "crs_matrix_values" , arg_graph.entries.dimension_0() )
111  {}
112 };
113 
114 // Traits class for creating strided local views for embedded ensemble-based,
115 // specialized for ensemble UQ scalar type
116 template <typename ViewType, typename Enabled = void>
118  typedef ViewType view_type;
119  // typedef Kokkos::View<typename view_type::data_type,
120  // typename view_type::array_layout,
121  // typename view_type::execution_space,
122  // Kokkos::MemoryUnmanaged> local_view_type;
123  typedef const view_type& local_view_type;
125  static const bool use_team = false;
126  KOKKOS_INLINE_FUNCTION
128  const unsigned local_rank)
129  { return v; }
130 };
131 
132 #if defined( KOKKOS_HAVE_CUDA )
133 
134 template <typename ViewType>
135 struct LocalViewTraits<
136  ViewType,
137  typename std::enable_if< std::is_same<typename ViewType::execution_space,
138  Kokkos::Cuda>::value &&
139  Kokkos::is_view_mp_vector<ViewType>::value
140  >::type > {
141  typedef ViewType view_type;
144  static const bool use_team = true;
145 
146  KOKKOS_INLINE_FUNCTION
148  const unsigned local_rank)
149  {
150  return Kokkos::partition<1>(v, local_rank);
151  }
152 };
153 
154 #endif /* #if defined( KOKKOS_HAVE_CUDA ) */
155 
156 // Compute DeviceConfig struct's based on scalar type
157 template <typename ScalarType>
159  static void eval( Kokkos::Example::FENL::DeviceConfig& dev_config_elem,
160  Kokkos::Example::FENL::DeviceConfig& dev_config_bc ) {
161  dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
162  dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
163  }
164 };
165 
166 // Compute DeviceConfig struct's based on scalar type
167 template <typename StorageType>
168 struct CreateDeviceConfigs< Sacado::MP::Vector<StorageType> > {
170  static void eval( Kokkos::Example::FENL::DeviceConfig& dev_config_elem,
171  Kokkos::Example::FENL::DeviceConfig& dev_config_bc ) {
172  static const unsigned VectorSize = StorageType::static_size;
173 #if defined( KOKKOS_HAVE_CUDA )
174  enum { is_cuda = Kokkos::Impl::is_same< execution_space, Kokkos::Cuda >::value };
175 #else
176  enum { is_cuda = false };
177 #endif /* #if defined( KOKKOS_HAVE_CUDA ) */
178  if ( is_cuda ) {
179  dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , VectorSize , 64/VectorSize );
180  dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , VectorSize , 256/VectorSize );
181  }
182  else {
183  dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
184  dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
185  }
186  }
187 };
188 
189 template< class ElemNodeIdView , class CrsGraphType , unsigned ElemNode >
191 public:
192 
194  typedef pair<unsigned,unsigned> key_type ;
195 
196  typedef Kokkos::UnorderedMap< key_type, void , execution_space > SetType ;
197  typedef typename CrsGraphType::row_map_type::non_const_type RowMapType ;
198  typedef Kokkos::View< unsigned , execution_space > UnsignedValue ;
199 
200  // Static dimensions of 0 generate compiler warnings or errors.
201  typedef Kokkos::View< unsigned*[ElemNode][ElemNode] , execution_space >
203 
204 private:
205 
211 
212  const unsigned node_count ;
213  const ElemNodeIdView elem_node_id ;
219 
220 public:
221 
222  CrsGraphType graph ;
224 
225  struct Times
226  {
227  double ratio;
233  };
234 
235  NodeNodeGraph( const ElemNodeIdView & arg_elem_node_id ,
236  const unsigned arg_node_count,
237  Times & results
238  )
239  : node_count(arg_node_count)
240  , elem_node_id( arg_elem_node_id )
241  , row_total( "row_total" )
242  , row_count(Kokkos::ViewAllocateWithoutInitializing("row_count") , node_count ) // will deep_copy to 0 inside loop
243  , row_map( "graph_row_map" , node_count + 1 )
244  , node_node_set()
245  , phase( FILL_NODE_SET )
246  , graph()
247  , elem_graph()
248  {
249  //--------------------------------
250  // Guess at capacity required for the map:
251 
252  Kokkos::Impl::Timer wall_clock ;
253 
254  wall_clock.reset();
255  phase = FILL_NODE_SET ;
256 
257  // upper bound on the capacity
258  size_t set_capacity = (((28ull * node_count) / 2ull)*4ull)/3ull;
259 
260 
261  // Increase capacity until the (node,node) map is successfully filled.
262  {
263  // Zero the row count to restart the fill
265 
266  node_node_set = SetType( set_capacity );
267 
268  // May be larger that requested:
269  set_capacity = node_node_set.capacity();
270 
271  Kokkos::parallel_for( elem_node_id.dimension_0() , *this );
272  }
273 
274  execution_space::fence();
275  results.ratio = (double)node_node_set.size() / (double)node_node_set.capacity();
276  results.fill_node_set = wall_clock.seconds();
277  //--------------------------------
278 
279  wall_clock.reset();
281 
282  // Exclusive scan of row_count into row_map
283  // including the final total in the 'node_count + 1' position.
284  // Zero the 'row_count' values.
285  Kokkos::parallel_scan( node_count , *this );
286 
287  // Zero the row count for the fill:
289 
290  unsigned graph_entry_count = 0 ;
291 
292  Kokkos::deep_copy( graph_entry_count , row_total );
293 
294  // Assign graph's row_map and allocate graph's entries
295  graph.row_map = row_map ;
296  graph.entries = typename CrsGraphType::entries_type( "graph_entries" , graph_entry_count );
297 
298  //--------------------------------
299  // Fill graph's entries from the (node,node) set.
300 
301  execution_space::fence();
302  results.scan_node_count = wall_clock.seconds();
303 
304  wall_clock.reset();
306  Kokkos::parallel_for( node_node_set.capacity() , *this );
307 
308  execution_space::fence();
309  results.fill_graph_entries = wall_clock.seconds();
310 
311  //--------------------------------
312  // Done with the temporary sets and arrays
313  wall_clock.reset();
315 
317  row_count = RowMapType();
318  row_map = RowMapType();
319  node_node_set.clear();
320 
321  //--------------------------------
322 
323  Kokkos::parallel_for( node_count , *this );
324 
325  execution_space::fence();
326  results.sort_graph_entries = wall_clock.seconds();
327 
328  //--------------------------------
329  // Element-to-graph mapping:
330  wall_clock.reset();
332  elem_graph = ElemGraphType("elem_graph", elem_node_id.dimension_0() );
333  Kokkos::parallel_for( elem_node_id.dimension_0() , *this );
334 
335  execution_space::fence();
336  results.fill_element_graph = wall_clock.seconds();
337  }
338 
339  //------------------------------------
340  // parallel_for: create map and count row length
341 
342  KOKKOS_INLINE_FUNCTION
343  void fill_set( const unsigned ielem ) const
344  {
345  // Loop over element's (row_local_node,col_local_node) pairs:
346  for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.dimension_1() ; ++row_local_node ) {
347 
348  const unsigned row_node = elem_node_id( ielem , row_local_node );
349 
350  for ( unsigned col_local_node = row_local_node ; col_local_node < elem_node_id.dimension_1() ; ++col_local_node ) {
351 
352  const unsigned col_node = elem_node_id( ielem , col_local_node );
353 
354  // If either node is locally owned then insert the pair into the unordered map:
355 
356  if ( row_node < row_count.dimension_0() || col_node < row_count.dimension_0() ) {
357 
358  const key_type key = (row_node < col_node) ? make_pair( row_node, col_node ) : make_pair( col_node, row_node ) ;
359 
360  const typename SetType::insert_result result = node_node_set.insert( key );
361 
362  if ( result.success() ) {
363  if ( row_node < row_count.dimension_0() ) { atomic_fetch_add( & row_count( row_node ) , 1 ); }
364  if ( col_node < row_count.dimension_0() && col_node != row_node ) { atomic_fetch_add( & row_count( col_node ) , 1 ); }
365  }
366  }
367  }
368  }
369  }
370 
371  KOKKOS_INLINE_FUNCTION
372  void fill_graph_entries( const unsigned iset ) const
373  {
374  if ( node_node_set.valid_at(iset) ) {
375  const key_type key = node_node_set.key_at(iset) ;
376  const unsigned row_node = key.first ;
377  const unsigned col_node = key.second ;
378 
379  if ( row_node < row_count.dimension_0() ) {
380  const unsigned offset = graph.row_map( row_node ) + atomic_fetch_add( & row_count( row_node ) , 1 );
381  graph.entries( offset ) = col_node ;
382  }
383 
384  if ( col_node < row_count.dimension_0() && col_node != row_node ) {
385  const unsigned offset = graph.row_map( col_node ) + atomic_fetch_add( & row_count( col_node ) , 1 );
386  graph.entries( offset ) = row_node ;
387  }
388  }
389  }
390 
391  KOKKOS_INLINE_FUNCTION
392  void sort_graph_entries( const unsigned irow ) const
393  {
394  typedef typename CrsGraphType::size_type size_type;
395  const size_type row_beg = graph.row_map( irow );
396  const size_type row_end = graph.row_map( irow + 1 );
397  for ( size_type i = row_beg + 1 ; i < row_end ; ++i ) {
398  const typename CrsGraphType::data_type col = graph.entries(i);
399  size_type j = i ;
400  for ( ; row_beg < j && col < graph.entries(j-1) ; --j ) {
401  graph.entries(j) = graph.entries(j-1);
402  }
403  graph.entries(j) = col ;
404  }
405  }
406 
407  KOKKOS_INLINE_FUNCTION
408  void fill_elem_graph_map( const unsigned ielem ) const
409  {
410  typedef typename CrsGraphType::data_type entry_type;
411  for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.dimension_1() ; ++row_local_node ) {
412 
413  const unsigned row_node = elem_node_id( ielem , row_local_node );
414 
415  for ( unsigned col_local_node = 0 ; col_local_node < elem_node_id.dimension_1() ; ++col_local_node ) {
416 
417  const unsigned col_node = elem_node_id( ielem , col_local_node );
418 
419  entry_type entry = 0 ;
420 
421  if ( row_node + 1 < graph.row_map.dimension_0() ) {
422 
423  const entry_type entry_end = static_cast<entry_type> (graph.row_map( row_node + 1 ));
424 
425  entry = graph.row_map( row_node );
426 
427  for ( ; entry < entry_end && graph.entries(entry) != static_cast<entry_type> (col_node) ; ++entry );
428 
429  if ( entry == entry_end ) entry = ~0u ;
430  }
431 
432  elem_graph( ielem , row_local_node , col_local_node ) = entry ;
433  }
434  }
435  }
436 
437  KOKKOS_INLINE_FUNCTION
438  void operator()( const unsigned iwork ) const
439  {
440  if ( phase == FILL_NODE_SET ) {
441  fill_set( iwork );
442  }
443  else if ( phase == FILL_GRAPH_ENTRIES ) {
444  fill_graph_entries( iwork );
445  }
446  else if ( phase == SORT_GRAPH_ENTRIES ) {
447  sort_graph_entries( iwork );
448  }
449  else if ( phase == FILL_ELEMENT_GRAPH ) {
450  fill_elem_graph_map( iwork );
451  }
452  }
453 
454  //------------------------------------
455  // parallel_scan: row offsets
456 
457  typedef unsigned value_type ;
458 
459  KOKKOS_INLINE_FUNCTION
460  void operator()( const unsigned irow , unsigned & update , const bool final ) const
461  {
462  // exclusive scan
463  if ( final ) { row_map( irow ) = update ; }
464 
465  update += row_count( irow );
466 
467  if ( final ) {
468  if ( irow + 1 == row_count.dimension_0() ) {
469  row_map( irow + 1 ) = update ;
470  row_total() = update ;
471  }
472  }
473  }
474 
475  KOKKOS_INLINE_FUNCTION
476  void init( unsigned & update ) const { update = 0 ; }
477 
478  KOKKOS_INLINE_FUNCTION
479  void join( volatile unsigned & update , const volatile unsigned & input ) const { update += input ; }
480 
481  //------------------------------------
482 };
483 
484 } /* namespace FENL */
485 } /* namespace Example */
486 } /* namespace Kokkos */
487 
488 //----------------------------------------------------------------------------
489 //----------------------------------------------------------------------------
490 
491 namespace Kokkos {
492 namespace Example {
493 namespace FENL {
494 
496  enum { is_constant = false };
497 
498  const double coeff_k ;
499 
500  KOKKOS_INLINE_FUNCTION
501  double operator()( double pt[], unsigned ensemble_rank) const
502  { return coeff_k * std::sin(pt[0]) * std::sin(pt[1]) * std::sin(pt[2]); }
503 
505  : coeff_k( val ) {}
506 
508  : coeff_k( rhs.coeff_k ) {}
509 };
510 
511 // Exponential KL from Stokhos
512 template < typename Scalar, typename MeshScalar, typename Device >
514 public:
515 
516  // Turn into a meta-function class usable with Sacado::mpl::apply
517  template <typename T1, typename T2 = MeshScalar, typename T3 = Device>
518  struct apply {
520  };
521 
522  enum { is_constant = false };
523  typedef Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device> RandomVariableView;
524  typedef typename RandomVariableView::size_type size_type;
525 
530 
531  rf_type m_rf; // Exponential random field
532  const MeshScalar m_mean; // Mean of random field
533  const MeshScalar m_variance; // Variance of random field
534  const MeshScalar m_corr_len; // Correlation length of random field
535  const size_type m_num_rv; // Number of random variables
536  RandomVariableView m_rv; // KL random variables
537 
538 public:
539 
541  const MeshScalar mean ,
542  const MeshScalar variance ,
543  const MeshScalar correlation_length ,
544  const size_type num_rv ) :
545  m_mean( mean ),
546  m_variance( variance ),
547  m_corr_len( correlation_length ),
548  m_num_rv( num_rv ),
549  m_rv( "KL Random Variables", m_num_rv )
550  {
551  Teuchos::ParameterList solverParams;
552  solverParams.set("Number of KL Terms", int(num_rv));
553  solverParams.set("Mean", mean);
554  solverParams.set("Standard Deviation", std::sqrt(variance));
555  int ndim = 3;
556  Teuchos::Array<double> domain_upper(ndim, 1.0), domain_lower(ndim, 0.0),
557  correlation_lengths(ndim, correlation_length);
558  solverParams.set("Domain Upper Bounds", domain_upper);
559  solverParams.set("Domain Lower Bounds", domain_lower);
560  solverParams.set("Correlation Lengths", correlation_lengths);
561 
562  m_rf = rf_type(solverParams);
563  }
564 
566  m_rf( rhs.m_rf ) ,
567  m_mean( rhs.m_mean ) ,
568  m_variance( rhs.m_variance ) ,
569  m_corr_len( rhs.m_corr_len ) ,
570  m_num_rv( rhs.m_num_rv ) ,
571  m_rv( rhs.m_rv ) {}
572 
573  KOKKOS_INLINE_FUNCTION
574  void setRandomVariables( const RandomVariableView& rv) { m_rv = rv; }
575 
576  KOKKOS_INLINE_FUNCTION
578 
579  KOKKOS_INLINE_FUNCTION
580  local_scalar_type operator() ( const MeshScalar point[],
581  const size_type ensemble_rank ) const
582  {
583  local_rv_view_type local_rv =
585 
586  local_scalar_type val = m_rf.evaluate(point, local_rv);
587 
588  return val;
589  }
590 };
591 
592 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
593  class CoordinateMap , typename ScalarType >
595 {
596 public:
597 
600 
601  //------------------------------------
602 
603  typedef ExecutionSpace execution_space ;
604  typedef ScalarType scalar_type ;
605 
608  typedef Kokkos::View< scalar_type* , Kokkos::LayoutLeft, execution_space > vector_type ;
609 
610  //------------------------------------
611 
613  static const unsigned TensorDim = SpatialDim * SpatialDim ;
617 
618  //------------------------------------
619 
622  typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
623  typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space > elem_vectors_type ;
624 
626 
627  //------------------------------------
628 
629 
630  //------------------------------------
631  // Computational data:
632 
642 
644  : elem_data()
646  , node_coords( rhs.node_coords )
647  , elem_graph( rhs.elem_graph )
650  , solution( rhs.solution )
651  , residual( rhs.residual )
652  , jacobian( rhs.jacobian )
653  {}
654 
655  ElementComputationBase( const mesh_type & arg_mesh ,
656  const vector_type & arg_solution ,
657  const elem_graph_type & arg_elem_graph ,
658  const sparse_matrix_type & arg_jacobian ,
659  const vector_type & arg_residual )
660  : elem_data()
661  , elem_node_ids( arg_mesh.elem_node() )
662  , node_coords( arg_mesh.node_coord() )
663  , elem_graph( arg_elem_graph )
664  , elem_jacobians()
665  , elem_residuals()
666  , solution( arg_solution )
667  , residual( arg_residual )
668  , jacobian( arg_jacobian )
669  {}
670 
671  //------------------------------------
672 
673  KOKKOS_INLINE_FUNCTION
675  const double grad[][ FunctionCount ] , // Gradient of bases master element
676  const double x[] ,
677  const double y[] ,
678  const double z[] ,
679  double dpsidx[] ,
680  double dpsidy[] ,
681  double dpsidz[] ) const
682  {
683  enum { j11 = 0 , j12 = 1 , j13 = 2 ,
684  j21 = 3 , j22 = 4 , j23 = 5 ,
685  j31 = 6 , j32 = 7 , j33 = 8 };
686 
687  // Jacobian accumulation:
688 
689  double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
690 
691  for( unsigned i = 0; i < FunctionCount ; ++i ) {
692  const double x1 = x[i] ;
693  const double x2 = y[i] ;
694  const double x3 = z[i] ;
695 
696  const double g1 = grad[0][i] ;
697  const double g2 = grad[1][i] ;
698  const double g3 = grad[2][i] ;
699 
700  J[j11] += g1 * x1 ;
701  J[j12] += g1 * x2 ;
702  J[j13] += g1 * x3 ;
703 
704  J[j21] += g2 * x1 ;
705  J[j22] += g2 * x2 ;
706  J[j23] += g2 * x3 ;
707 
708  J[j31] += g3 * x1 ;
709  J[j32] += g3 * x2 ;
710  J[j33] += g3 * x3 ;
711  }
712 
713  // Inverse jacobian:
714 
715  double invJ[ TensorDim ] = {
716  static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
717  static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
718  static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
719 
720  static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
721  static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
722  static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
723 
724  static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
725  static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
726  static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
727 
728  const double detJ = J[j11] * invJ[j11] +
729  J[j21] * invJ[j12] +
730  J[j31] * invJ[j13] ;
731 
732  const double detJinv = 1.0 / detJ ;
733 
734  for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
735 
736  // Transform gradients:
737 
738  for( unsigned i = 0; i < FunctionCount ; ++i ) {
739  const double g0 = grad[0][i];
740  const double g1 = grad[1][i];
741  const double g2 = grad[2][i];
742 
743  dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
744  dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
745  dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
746  }
747 
748  return detJ ;
749  }
750 
751 };
752 
758 };
759 
760 template< class FiniteElementMeshType ,
761  class SparseMatrixType ,
762  AssemblyMethod Method ,
763  class CoeffFunctionType = ElementComputationConstantCoefficient
764  >
766 
767 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
768  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
770  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
771  CrsMatrix< ScalarType , ExecutionSpace > ,
772  Analytic , CoeffFunctionType > :
773  public ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
774  ScalarType> {
775 public:
776 
777  typedef ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
778  ScalarType> base_type;
779 
782 
783  static const unsigned FunctionCount = base_type::FunctionCount;
784  static const unsigned IntegrationCount = base_type::IntegrationCount;
785  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
786 
787  const CoeffFunctionType coeff_function ;
789 
791  base_type(rhs) ,
792  coeff_function( rhs.coeff_function ) ,
793  dev_config( rhs.dev_config ) {}
794 
796  const typename base_type::mesh_type & arg_mesh ,
797  const CoeffFunctionType & arg_coeff_function ,
798  const typename base_type::vector_type & arg_solution ,
799  const typename base_type::elem_graph_type & arg_elem_graph ,
800  const typename base_type::sparse_matrix_type & arg_jacobian ,
801  const typename base_type::vector_type & arg_residual ,
802  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
803  base_type(arg_mesh, arg_solution, arg_elem_graph, arg_jacobian,
804  arg_residual),
805  coeff_function( arg_coeff_function ) ,
806  dev_config( arg_dev_config ) {}
807 
808  //------------------------------------
809 
810  void apply() const
811  {
812  const size_t nelem = this->elem_node_ids.dimension_0();
813  parallel_for( nelem , *this );
814  }
815 
816  KOKKOS_INLINE_FUNCTION
817  void gatherSolution(const unsigned ielem,
818  scalar_type val[],
819  unsigned node_index[],
820  double x[], double y[], double z[],
821  scalar_type res[],
822  scalar_type mat[][FunctionCount]) const
823  {
824  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
825  const unsigned ni = this->elem_node_ids( ielem , i );
826 
827  node_index[i] = ni ;
828 
829  x[i] = this->node_coords( ni , 0 );
830  y[i] = this->node_coords( ni , 1 );
831  z[i] = this->node_coords( ni , 2 );
832 
833  val[i] = this->solution( ni ) ;
834  res[i] = 0 ;
835 
836  for( unsigned j = 0; j < FunctionCount ; j++){
837  mat[i][j] = 0 ;
838  }
839  }
840  }
841 
842  KOKKOS_INLINE_FUNCTION
843  void scatterResidual(const unsigned ielem,
844  const unsigned node_index[],
845  const scalar_type res[],
846  const scalar_type mat[][FunctionCount]) const
847  {
848  for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
849  const unsigned row = node_index[i] ;
850  if ( row < this->residual.dimension_0() ) {
851  atomic_add( & this->residual( row ) , res[i] );
852 
853  for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
854  const unsigned entry = this->elem_graph( ielem , i , j );
855  if ( entry != ~0u ) {
856  atomic_add( & this->jacobian.values( entry ) , mat[i][j] );
857  }
858  }
859  }
860  }
861  }
862 
863  KOKKOS_INLINE_FUNCTION
865  const scalar_type dof_values[] ,
866  const double x[],
867  const double y[],
868  const double z[],
869  scalar_type elem_res[] ,
870  scalar_type elem_mat[][FunctionCount] ) const
871  {
872  scalar_type coeff_k;
873  double coeff_src = 1.234;
874  double advection[] = { 1.1, 1.2, 1.3 };
875  double dpsidx[ FunctionCount ] ;
876  double dpsidy[ FunctionCount ] ;
877  double dpsidz[ FunctionCount ] ;
878  double pt[] = {0.0, 0.0, 0.0};
879  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
880 
881  // If function is not constant
882  // then compute physical coordinates of integration point
883  if ( ! coeff_function.is_constant ) {
884  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
885  pt[0] += x[j] * this->elem_data.values[i][j] ;
886  pt[1] += y[j] * this->elem_data.values[i][j] ;
887  pt[2] += z[j] * this->elem_data.values[i][j] ;
888  }
889  }
890  coeff_k = coeff_function(pt, 0);
891 
892  const double integ_weight = this->elem_data.weights[i];
893  const double* bases_vals = this->elem_data.values[i];
894  const double detJ =
895  this->transform_gradients( this->elem_data.gradients[i] ,
896  x , y , z ,
897  dpsidx , dpsidy , dpsidz );
898  const double detJ_weight = detJ * integ_weight;
899  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
900 
901  scalar_type value_at_pt = 0 ;
902  scalar_type gradx_at_pt = 0 ;
903  scalar_type grady_at_pt = 0 ;
904  scalar_type gradz_at_pt = 0 ;
905  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
906  value_at_pt += dof_values[m] * bases_vals[m] ;
907  gradx_at_pt += dof_values[m] * dpsidx[m] ;
908  grady_at_pt += dof_values[m] * dpsidy[m] ;
909  gradz_at_pt += dof_values[m] * dpsidz[m] ;
910  }
911 
912  const scalar_type source_term =
913  coeff_src * value_at_pt * value_at_pt ;
914  const scalar_type source_deriv =
915  2.0 * coeff_src * value_at_pt ;
916 
917  const scalar_type advection_x = advection[0];
918  const scalar_type advection_y = advection[1];
919  const scalar_type advection_z = advection[2];
920 
921  const scalar_type advection_term =
922  advection_x*gradx_at_pt +
923  advection_y*grady_at_pt +
924  advection_z*gradz_at_pt ;
925 
926  for ( unsigned m = 0; m < FunctionCount; ++m) {
927  scalar_type * const mat = elem_mat[m] ;
928  const double bases_val_m = bases_vals[m] * detJ_weight ;
929  const double dpsidx_m = dpsidx[m] ;
930  const double dpsidy_m = dpsidy[m] ;
931  const double dpsidz_m = dpsidz[m] ;
932 
933  elem_res[m] +=
934  detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
935  dpsidy_m * grady_at_pt +
936  dpsidz_m * gradz_at_pt ) +
937  bases_val_m * ( advection_term + source_term ) ;
938 
939  for( unsigned n = 0; n < FunctionCount; n++) {
940  const double dpsidx_n = dpsidx[n] ;
941  const double dpsidy_n = dpsidy[n] ;
942  const double dpsidz_n = dpsidz[n] ;
943  mat[n] +=
944  detJ_weight_coeff_k * ( dpsidx_m * dpsidx_n +
945  dpsidy_m * dpsidy_n +
946  dpsidz_m * dpsidz_n ) +
947  bases_val_m * ( advection_x * dpsidx_n +
948  advection_y * dpsidy_n +
949  advection_z * dpsidz_n +
950  source_deriv * bases_vals[n] ) ;
951  }
952  }
953  }
954  }
955 
956  KOKKOS_INLINE_FUNCTION
957  void operator()( const unsigned ielem ) const
958  {
959  double x[ FunctionCount ] ;
960  double y[ FunctionCount ] ;
961  double z[ FunctionCount ] ;
962  unsigned node_index[ ElemNodeCount ];
963 
964  scalar_type val[ FunctionCount ] ;
965  scalar_type elem_res[ FunctionCount ] ;
966  scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
967 
968  // Gather nodal coordinates and solution vector:
969  gatherSolution(ielem, val, node_index, x, y, z, elem_res, elem_mat);
970 
971  // Compute nodal element residual vector and Jacobian matrix
972  computeElementResidualJacobian( val, x, y, z, elem_res , elem_mat );
973 
974  // Scatter nodal element residual and Jacobian in global vector and matrix:
975  scatterResidual( ielem, node_index, elem_res, elem_mat );
976  }
977 }; /* ElementComputation */
978 
979 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
980  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
982  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
983  CrsMatrix< ScalarType , ExecutionSpace > ,
984  FadElement , CoeffFunctionType > :
985  public ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
986  ScalarType> {
987 public:
988 
989  typedef ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
990  ScalarType> base_type;
991 
994 
995  static const unsigned FunctionCount = base_type::FunctionCount;
996  static const unsigned IntegrationCount = base_type::IntegrationCount;
997  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
998 
999  typedef Sacado::Fad::SLFad<scalar_type,FunctionCount> fad_scalar_type;
1000  //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
1001 
1002  const CoeffFunctionType coeff_function ;
1004 
1006  base_type(rhs) ,
1007  coeff_function( rhs.coeff_function ) ,
1008  dev_config( rhs.dev_config ) {}
1009 
1011  const typename base_type::mesh_type & arg_mesh ,
1012  const CoeffFunctionType & arg_coeff_function ,
1013  const typename base_type::vector_type & arg_solution ,
1014  const typename base_type::elem_graph_type & arg_elem_graph ,
1015  const typename base_type::sparse_matrix_type & arg_jacobian ,
1016  const typename base_type::vector_type & arg_residual ,
1017  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
1018  base_type(arg_mesh, arg_solution, arg_elem_graph,
1019  arg_jacobian, arg_residual),
1020  coeff_function( arg_coeff_function ) ,
1021  dev_config( arg_dev_config ) {}
1022 
1023  //------------------------------------
1024 
1025  void apply() const
1026  {
1027  const size_t nelem = this->elem_node_ids.dimension_0();
1028  parallel_for( nelem , *this );
1029  }
1030 
1031  KOKKOS_INLINE_FUNCTION
1032  void gatherSolution(const unsigned ielem,
1033  fad_scalar_type val[],
1034  unsigned node_index[],
1035  double x[], double y[], double z[],
1036  fad_scalar_type res[]) const
1037  {
1038  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1039  const unsigned ni = this->elem_node_ids( ielem , i );
1040 
1041  node_index[i] = ni ;
1042 
1043  x[i] = this->node_coords( ni , 0 );
1044  y[i] = this->node_coords( ni , 1 );
1045  z[i] = this->node_coords( ni , 2 );
1046 
1047  val[i].val() = this->solution( ni );
1048  val[i].diff( i, FunctionCount );
1049  }
1050  }
1051 
1052  KOKKOS_INLINE_FUNCTION
1053  void scatterResidual(const unsigned ielem,
1054  const unsigned node_index[],
1055  fad_scalar_type res[]) const
1056  {
1057  for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
1058  const unsigned row = node_index[i] ;
1059  if ( row < this->residual.dimension_0() ) {
1060  atomic_add( & this->residual( row ) , res[i].val() );
1061 
1062  for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
1063  const unsigned entry = this->elem_graph( ielem , i , j );
1064  if ( entry != ~0u ) {
1065  atomic_add( & this->jacobian.values( entry ) ,
1066  res[i].fastAccessDx(j) );
1067  }
1068  }
1069  }
1070  }
1071  }
1072 
1073  template <typename local_scalar_type>
1074  KOKKOS_INLINE_FUNCTION
1075  void computeElementResidual(const local_scalar_type dof_values[] ,
1076  const double x[],
1077  const double y[],
1078  const double z[],
1079  local_scalar_type elem_res[] ) const
1080  {
1081  scalar_type coeff_k;
1082  double coeff_src = 1.234;
1083  double advection[] = { 1.1, 1.2, 1.3 };
1084  double dpsidx[ FunctionCount ] ;
1085  double dpsidy[ FunctionCount ] ;
1086  double dpsidz[ FunctionCount ] ;
1087  double pt[] = {0.0, 0.0, 0.0};
1088  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1089 
1090  // If function is not constant
1091  // then compute physical coordinates of integration point
1092  if ( ! coeff_function.is_constant ) {
1093  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1094  pt[0] += x[j] * this->elem_data.values[i][j] ;
1095  pt[1] += y[j] * this->elem_data.values[i][j] ;
1096  pt[2] += z[j] * this->elem_data.values[i][j] ;
1097  }
1098  }
1099  coeff_k = coeff_function(pt, 0);
1100 
1101  const double integ_weight = this->elem_data.weights[i];
1102  const double* bases_vals = this->elem_data.values[i];
1103  const double detJ =
1104  this->transform_gradients( this->elem_data.gradients[i] ,
1105  x , y , z ,
1106  dpsidx , dpsidy , dpsidz );
1107  const double detJ_weight = detJ * integ_weight;
1108  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1109 
1110  local_scalar_type value_at_pt = 0 ;
1111  local_scalar_type gradx_at_pt = 0 ;
1112  local_scalar_type grady_at_pt = 0 ;
1113  local_scalar_type gradz_at_pt = 0 ;
1114  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1115  value_at_pt += dof_values[m] * bases_vals[m] ;
1116  gradx_at_pt += dof_values[m] * dpsidx[m] ;
1117  grady_at_pt += dof_values[m] * dpsidy[m] ;
1118  gradz_at_pt += dof_values[m] * dpsidz[m] ;
1119  }
1120 
1121  const local_scalar_type source_term =
1122  coeff_src * value_at_pt * value_at_pt ;
1123 
1124  const local_scalar_type advection_term =
1125  advection[0]*gradx_at_pt +
1126  advection[1]*grady_at_pt +
1127  advection[2]*gradz_at_pt;
1128 
1129  for ( unsigned m = 0; m < FunctionCount; ++m) {
1130  const double bases_val_m = bases_vals[m] * detJ_weight ;
1131  const double dpsidx_m = dpsidx[m] ;
1132  const double dpsidy_m = dpsidy[m] ;
1133  const double dpsidz_m = dpsidz[m] ;
1134 
1135  elem_res[m] +=
1136  detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
1137  dpsidy_m * grady_at_pt +
1138  dpsidz_m * gradz_at_pt ) +
1139  bases_val_m * ( advection_term + source_term ) ;
1140  }
1141  }
1142  }
1143 
1144  KOKKOS_INLINE_FUNCTION
1145  void operator()( const unsigned ielem ) const
1146  {
1147  double x[ FunctionCount ] ;
1148  double y[ FunctionCount ] ;
1149  double z[ FunctionCount ] ;
1150  unsigned node_index[ ElemNodeCount ];
1151 
1152  fad_scalar_type val[ FunctionCount ] ;
1153  fad_scalar_type elem_res[ FunctionCount ] ; // this zeros elem_res
1154 
1155  // Gather nodal coordinates and solution vector:
1156  gatherSolution( ielem, val, node_index, x, y, z, elem_res );
1157 
1158  // Compute nodal element residual vector:
1159  computeElementResidual( val, x, y, z, elem_res );
1160 
1161  // Scatter nodal element residual in global vector:
1162  scatterResidual( ielem, node_index, elem_res );
1163  }
1164 }; /* ElementComputation */
1165 
1166 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
1167  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
1169  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1170  CrsMatrix< ScalarType , ExecutionSpace > ,
1171  FadElementOptimized , CoeffFunctionType > :
1172  public ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1173  CrsMatrix< ScalarType , ExecutionSpace > ,
1174  FadElement, CoeffFunctionType > {
1175 public:
1176 
1179  FadElement , CoeffFunctionType > base_type;
1180 
1183 
1184  static const unsigned FunctionCount = base_type::FunctionCount;
1185  static const unsigned IntegrationCount = base_type::IntegrationCount;
1186  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
1187 
1188  typedef Sacado::Fad::SLFad<scalar_type,FunctionCount> fad_scalar_type;
1189  //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
1190 
1192 
1194  const typename base_type::mesh_type & arg_mesh ,
1195  const CoeffFunctionType & arg_coeff_function ,
1196  const typename base_type::vector_type & arg_solution ,
1197  const typename base_type::elem_graph_type & arg_elem_graph ,
1198  const typename base_type::sparse_matrix_type & arg_jacobian ,
1199  const typename base_type::vector_type & arg_residual ,
1200  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
1201  base_type(arg_mesh, arg_coeff_function, arg_solution, arg_elem_graph,
1202  arg_jacobian, arg_residual, arg_dev_config) {}
1203 
1204  //------------------------------------
1205 
1206  void apply() const
1207  {
1208  const size_t nelem = this->elem_node_ids.dimension_0();
1209  parallel_for( nelem , *this );
1210  }
1211 
1212  KOKKOS_INLINE_FUNCTION
1213  void gatherSolution(const unsigned ielem,
1214  scalar_type val[],
1215  unsigned node_index[],
1216  double x[], double y[], double z[],
1217  fad_scalar_type res[]) const
1218  {
1219  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1220  const unsigned ni = this->elem_node_ids( ielem , i );
1221 
1222  node_index[i] = ni ;
1223 
1224  x[i] = this->node_coords( ni , 0 );
1225  y[i] = this->node_coords( ni , 1 );
1226  z[i] = this->node_coords( ni , 2 );
1227 
1228  val[i] = this->solution( ni );
1229  }
1230  }
1231 
1232  template <typename local_scalar_type>
1233  KOKKOS_INLINE_FUNCTION
1234  void computeElementResidual(const scalar_type dof_values[] ,
1235  const double x[],
1236  const double y[],
1237  const double z[],
1238  local_scalar_type elem_res[] ) const
1239  {
1240  scalar_type coeff_k;
1241  double coeff_src = 1.234;
1242  double advection[] = { 1.1, 1.2, 1.3 };
1243  double dpsidx[ FunctionCount ] ;
1244  double dpsidy[ FunctionCount ] ;
1245  double dpsidz[ FunctionCount ] ;
1246  double pt[] = {0.0, 0.0, 0.0};
1247  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1248 
1249  // If function is not constant
1250  // then compute physical coordinates of integration point
1251  if ( ! this->coeff_function.is_constant ) {
1252  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1253  pt[0] += x[j] * this->elem_data.values[i][j] ;
1254  pt[1] += y[j] * this->elem_data.values[i][j] ;
1255  pt[2] += z[j] * this->elem_data.values[i][j] ;
1256  }
1257  }
1258  coeff_k = this->coeff_function(pt, 0);
1259 
1260  const double integ_weight = this->elem_data.weights[i];
1261  const double* bases_vals = this->elem_data.values[i];
1262  const double detJ =
1263  this->transform_gradients( this->elem_data.gradients[i] ,
1264  x , y , z ,
1265  dpsidx , dpsidy , dpsidz );
1266  const double detJ_weight = detJ * integ_weight;
1267  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1268 
1269  local_scalar_type value_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1270  local_scalar_type gradx_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1271  local_scalar_type grady_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1272  local_scalar_type gradz_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1273  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1274  value_at_pt.val() += dof_values[m] * bases_vals[m] ;
1275  value_at_pt.fastAccessDx(m) = bases_vals[m] ;
1276 
1277  gradx_at_pt.val() += dof_values[m] * dpsidx[m] ;
1278  gradx_at_pt.fastAccessDx(m) = dpsidx[m] ;
1279 
1280  grady_at_pt.val() += dof_values[m] * dpsidy[m] ;
1281  grady_at_pt.fastAccessDx(m) = dpsidy[m] ;
1282 
1283  gradz_at_pt.val() += dof_values[m] * dpsidz[m] ;
1284  gradz_at_pt.fastAccessDx(m) = dpsidz[m] ;
1285  }
1286 
1287  const local_scalar_type source_term =
1288  coeff_src * value_at_pt * value_at_pt ;
1289 
1290  const local_scalar_type advection_term =
1291  advection[0]*gradx_at_pt +
1292  advection[1]*grady_at_pt +
1293  advection[2]*gradz_at_pt;
1294 
1295  for ( unsigned m = 0; m < FunctionCount; ++m) {
1296  const double bases_val_m = bases_vals[m] * detJ_weight ;
1297  const double dpsidx_m = dpsidx[m] ;
1298  const double dpsidy_m = dpsidy[m] ;
1299  const double dpsidz_m = dpsidz[m] ;
1300 
1301  elem_res[m] +=
1302  detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
1303  dpsidy_m * grady_at_pt +
1304  dpsidz_m * gradz_at_pt ) +
1305  bases_val_m * ( advection_term + source_term ) ;
1306  }
1307  }
1308  }
1309 
1310  KOKKOS_INLINE_FUNCTION
1311  void operator()( const unsigned ielem ) const
1312  {
1313  double x[ FunctionCount ] ;
1314  double y[ FunctionCount ] ;
1315  double z[ FunctionCount ] ;
1316  unsigned node_index[ ElemNodeCount ];
1317 
1318  scalar_type val[ FunctionCount ] ;
1319  fad_scalar_type elem_res[ FunctionCount ] ;
1320 
1321  // Gather nodal coordinates and solution vector:
1322  gatherSolution( ielem, val, node_index, x, y, z, elem_res );
1323 
1324  // Compute nodal element residual vector:
1325  computeElementResidual( val, x, y, z, elem_res );
1326 
1327  // Scatter nodal element residual in global vector:
1328  this->scatterResidual( ielem, node_index, elem_res );
1329  }
1330 }; /* ElementComputation */
1331 
1332 template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
1333  class CoordinateMap , typename ScalarType , class CoeffFunctionType >
1335  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1336  CrsMatrix< ScalarType , ExecutionSpace > ,
1337  FadQuadPoint , CoeffFunctionType > :
1338  public ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1339  CrsMatrix< ScalarType , ExecutionSpace > ,
1340  Analytic , CoeffFunctionType > {
1341 public:
1342 
1345  Analytic , CoeffFunctionType > base_type;
1346 
1349 
1350  static const unsigned FunctionCount = base_type::FunctionCount;
1351  static const unsigned IntegrationCount = base_type::IntegrationCount;
1352  static const unsigned ElemNodeCount = base_type::ElemNodeCount;
1353 
1354  typedef Sacado::Fad::SLFad<scalar_type,4> fad_scalar_type;
1355  //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
1356 
1358 
1360  const typename base_type::mesh_type & arg_mesh ,
1361  const CoeffFunctionType & arg_coeff_function ,
1362  const typename base_type::vector_type & arg_solution ,
1363  const typename base_type::elem_graph_type & arg_elem_graph ,
1364  const typename base_type::sparse_matrix_type & arg_jacobian ,
1365  const typename base_type::vector_type & arg_residual ,
1366  const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
1367  base_type(arg_mesh, arg_coeff_function, arg_solution, arg_elem_graph,
1368  arg_jacobian, arg_residual, arg_dev_config) {}
1369 
1370  //------------------------------------
1371 
1372  void apply() const
1373  {
1374  const size_t nelem = this->elem_node_ids.dimension_0();
1375  parallel_for( nelem , *this );
1376  }
1377 
1378  KOKKOS_INLINE_FUNCTION
1380  const scalar_type dof_values[] ,
1381  const double x[],
1382  const double y[],
1383  const double z[],
1384  scalar_type elem_res[] ,
1385  scalar_type elem_mat[][FunctionCount] ) const
1386  {
1387  scalar_type coeff_k;
1388  double coeff_src = 1.234;
1389  double advection[] = { 1.1, 1.2, 1.3 };
1390  double dpsidx[ FunctionCount ] ;
1391  double dpsidy[ FunctionCount ] ;
1392  double dpsidz[ FunctionCount ] ;
1393  double pt[] = {0.0, 0.0, 0.0};
1394 
1395  fad_scalar_type value_at_pt(4, 0, 0.0) ;
1396  fad_scalar_type gradx_at_pt(4, 1, 0.0) ;
1397  fad_scalar_type grady_at_pt(4, 2, 0.0) ;
1398  fad_scalar_type gradz_at_pt(4, 3, 0.0) ;
1399  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1400 
1401  // If function is not constant
1402  // then compute physical coordinates of integration point
1403  if ( ! this->coeff_function.is_constant ) {
1404  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1405  pt[0] += x[j] * this->elem_data.values[i][j] ;
1406  pt[1] += y[j] * this->elem_data.values[i][j] ;
1407  pt[2] += z[j] * this->elem_data.values[i][j] ;
1408  }
1409  }
1410  coeff_k = this->coeff_function(pt, 0);
1411 
1412  const double integ_weight = this->elem_data.weights[i];
1413  const double* bases_vals = this->elem_data.values[i];
1414  const double detJ =
1415  this->transform_gradients( this->elem_data.gradients[i] ,
1416  x , y , z ,
1417  dpsidx , dpsidy , dpsidz );
1418  const double detJ_weight = detJ * integ_weight;
1419  const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1420 
1421  value_at_pt.val() = 0.0 ;
1422  gradx_at_pt.val() = 0.0 ;
1423  grady_at_pt.val() = 0.0 ;
1424  gradz_at_pt.val() = 0.0 ;
1425  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1426  value_at_pt.val() += dof_values[m] * bases_vals[m] ;
1427  gradx_at_pt.val() += dof_values[m] * dpsidx[m] ;
1428  grady_at_pt.val() += dof_values[m] * dpsidy[m] ;
1429  gradz_at_pt.val() += dof_values[m] * dpsidz[m] ;
1430  }
1431 
1432  const fad_scalar_type source_term =
1433  coeff_src * value_at_pt * value_at_pt ;
1434 
1435  const fad_scalar_type advection_term =
1436  advection[0]*gradx_at_pt +
1437  advection[1]*grady_at_pt +
1438  advection[2]*gradz_at_pt;
1439 
1440  for ( unsigned m = 0; m < FunctionCount; ++m) {
1441  const double bases_val_m = bases_vals[m] * detJ_weight ;
1442  fad_scalar_type res =
1443  detJ_weight_coeff_k * ( dpsidx[m] * gradx_at_pt +
1444  dpsidy[m] * grady_at_pt +
1445  dpsidz[m] * gradz_at_pt ) +
1446  bases_val_m * ( advection_term + source_term ) ;
1447 
1448  elem_res[m] += res.val();
1449 
1450  scalar_type * const mat = elem_mat[m] ;
1451  for( unsigned n = 0; n < FunctionCount; n++) {
1452  mat[n] += res.fastAccessDx(0) * bases_vals[n] +
1453  res.fastAccessDx(1) * dpsidx[n] +
1454  res.fastAccessDx(2) * dpsidy[n] +
1455  res.fastAccessDx(3) * dpsidz[n];
1456  }
1457  }
1458  }
1459  }
1460 
1461  KOKKOS_INLINE_FUNCTION
1462  void operator()( const unsigned ielem ) const
1463  {
1464  double x[ FunctionCount ] ;
1465  double y[ FunctionCount ] ;
1466  double z[ FunctionCount ] ;
1467  unsigned node_index[ ElemNodeCount ];
1468 
1469  scalar_type val[ FunctionCount ] ;
1470  scalar_type elem_res[ FunctionCount ] ;
1471  scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
1472 
1473  // Gather nodal coordinates and solution vector:
1474  this->gatherSolution( ielem, val, node_index, x, y, z, elem_res, elem_mat );
1475 
1476  // Compute nodal element residual vector and Jacobian matrix:
1477  computeElementResidualJacobian( val, x, y, z, elem_res, elem_mat );
1478 
1479  // Scatter nodal element residual and Jacobian in global vector and matrix:
1480  this->scatterResidual( ielem, node_index, elem_res, elem_mat );
1481  }
1482 }; /* ElementComputation */
1483 
1484 #if 0
1485 template< class FiniteElementMeshType , class SparseMatrixType
1486  , class CoeffFunctionType = ElementComputationConstantCoefficient
1487  >
1488 class ElementComputation ;
1489 
1490 
1491 template< class ExecutionSpace , BoxElemPart::ElemOrder Order , class CoordinateMap ,
1492  typename ScalarType , class CoeffFunctionType >
1493 class ElementComputation
1494  < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap >
1495  , Kokkos::Example::FENL::CrsMatrix< ScalarType , ExecutionSpace >
1496  , CoeffFunctionType >
1497 {
1498 public:
1499 
1502 
1503  //------------------------------------
1504 
1505  typedef ExecutionSpace execution_space ;
1506  typedef ScalarType scalar_type ;
1507 
1509  typedef typename sparse_matrix_type::StaticCrsGraphType sparse_graph_type ;
1510  typedef typename sparse_matrix_type::values_type matrix_values_type ;
1511  typedef Kokkos::View< scalar_type* , Kokkos::LayoutLeft, execution_space > vector_type ;
1512 
1513  //------------------------------------
1514 
1515  typedef LocalViewTraits< vector_type > local_vector_view_traits;
1516  typedef LocalViewTraits< matrix_values_type> local_matrix_view_traits;
1517  typedef typename local_vector_view_traits::local_view_type local_vector_type;
1518  typedef typename local_matrix_view_traits::local_view_type local_matrix_type;
1519  typedef typename local_vector_view_traits::local_value_type local_scalar_type;
1520  static const bool use_team = local_vector_view_traits::use_team;
1521 
1522  static const unsigned SpatialDim = element_data_type::spatial_dimension ;
1523  static const unsigned TensorDim = SpatialDim * SpatialDim ;
1524  static const unsigned ElemNodeCount = element_data_type::element_node_count ;
1525  static const unsigned FunctionCount = element_data_type::function_count ;
1526  static const unsigned IntegrationCount = element_data_type::integration_count ;
1527 
1528  //------------------------------------
1529 
1530  typedef typename mesh_type::node_coord_type node_coord_type ;
1531  typedef typename mesh_type::elem_node_type elem_node_type ;
1532  typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
1533  typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space > elem_vectors_type ;
1534 
1535  typedef LocalViewTraits< elem_matrices_type > local_elem_matrices_traits;
1536  typedef LocalViewTraits< elem_vectors_type > local_elem_vectors_traits;
1537  typedef typename local_elem_matrices_traits::local_view_type local_elem_matrices_type;
1538  typedef typename local_elem_vectors_traits::local_view_type local_elem_vectors_type;
1539 
1541 
1542  //------------------------------------
1543 
1544 
1545  //------------------------------------
1546  // Computational data:
1547 
1548  const element_data_type elem_data ;
1549  const elem_node_type elem_node_ids ;
1550  const node_coord_type node_coords ;
1551  const elem_graph_type elem_graph ;
1552  const elem_matrices_type elem_jacobians ;
1553  const elem_vectors_type elem_residuals ;
1554  const vector_type solution ;
1555  const vector_type residual ;
1556  const sparse_matrix_type jacobian ;
1557  const CoeffFunctionType coeff_function ;
1558  const Kokkos::Example::FENL::DeviceConfig dev_config ;
1559 
1560  ElementComputation( const ElementComputation & rhs )
1561  : elem_data()
1562  , elem_node_ids( rhs.elem_node_ids )
1563  , node_coords( rhs.node_coords )
1564  , elem_graph( rhs.elem_graph )
1565  , elem_jacobians( rhs.elem_jacobians )
1566  , elem_residuals( rhs.elem_residuals )
1567  , solution( rhs.solution )
1568  , residual( rhs.residual )
1569  , jacobian( rhs.jacobian )
1570  , coeff_function( rhs.coeff_function )
1571  , dev_config( rhs.dev_config )
1572  {}
1573 
1574  // If the element->sparse_matrix graph is provided then perform atomic updates
1575  // Otherwise fill per-element contributions for subequent gather-add into a residual and jacobian.
1576  ElementComputation( const mesh_type & arg_mesh ,
1577  const CoeffFunctionType & arg_coeff_function ,
1578  const vector_type & arg_solution ,
1579  const elem_graph_type & arg_elem_graph ,
1580  const sparse_matrix_type & arg_jacobian ,
1581  const vector_type & arg_residual ,
1582  const Kokkos::Example::FENL::DeviceConfig arg_dev_config )
1583  : elem_data()
1584  , elem_node_ids( arg_mesh.elem_node() )
1585  , node_coords( arg_mesh.node_coord() )
1586  , elem_graph( arg_elem_graph )
1587  , elem_jacobians()
1588  , elem_residuals()
1589  , solution( arg_solution )
1590  , residual( arg_residual )
1591  , jacobian( arg_jacobian )
1592  , coeff_function( arg_coeff_function )
1593  , dev_config( arg_dev_config )
1594  {}
1595 
1596  //------------------------------------
1597 
1598  void apply() const
1599  {
1600  const size_t nelem = elem_node_ids.dimension_0();
1601  if ( use_team ) {
1602  const size_t team_size = dev_config.block_dim.x * dev_config.block_dim.y;
1603  const size_t league_size =
1604  (nelem + dev_config.block_dim.y-1) / dev_config.block_dim.y;
1605  Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
1606  parallel_for( config , *this );
1607  }
1608  else {
1609  parallel_for( nelem , *this );
1610  }
1611  }
1612 
1613  //------------------------------------
1614 
1615  static const unsigned FLOPS_transform_gradients =
1616  /* Jacobian */ FunctionCount * TensorDim * 2 +
1617  /* Inverse jacobian */ TensorDim * 6 + 6 +
1618  /* Gradient transform */ FunctionCount * 15 ;
1619 
1620  KOKKOS_INLINE_FUNCTION
1621  double transform_gradients(
1622  const double grad[][ FunctionCount ] , // Gradient of bases master element
1623  const double x[] ,
1624  const double y[] ,
1625  const double z[] ,
1626  double dpsidx[] ,
1627  double dpsidy[] ,
1628  double dpsidz[] ) const
1629  {
1630  enum { j11 = 0 , j12 = 1 , j13 = 2 ,
1631  j21 = 3 , j22 = 4 , j23 = 5 ,
1632  j31 = 6 , j32 = 7 , j33 = 8 };
1633 
1634  // Jacobian accumulation:
1635 
1636  double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1637 
1638  for( unsigned i = 0; i < FunctionCount ; ++i ) {
1639  const double x1 = x[i] ;
1640  const double x2 = y[i] ;
1641  const double x3 = z[i] ;
1642 
1643  const double g1 = grad[0][i] ;
1644  const double g2 = grad[1][i] ;
1645  const double g3 = grad[2][i] ;
1646 
1647  J[j11] += g1 * x1 ;
1648  J[j12] += g1 * x2 ;
1649  J[j13] += g1 * x3 ;
1650 
1651  J[j21] += g2 * x1 ;
1652  J[j22] += g2 * x2 ;
1653  J[j23] += g2 * x3 ;
1654 
1655  J[j31] += g3 * x1 ;
1656  J[j32] += g3 * x2 ;
1657  J[j33] += g3 * x3 ;
1658  }
1659 
1660  // Inverse jacobian:
1661 
1662  double invJ[ TensorDim ] = {
1663  static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
1664  static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
1665  static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
1666 
1667  static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
1668  static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
1669  static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
1670 
1671  static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
1672  static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
1673  static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
1674 
1675  const double detJ = J[j11] * invJ[j11] +
1676  J[j21] * invJ[j12] +
1677  J[j31] * invJ[j13] ;
1678 
1679  const double detJinv = 1.0 / detJ ;
1680 
1681  for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
1682 
1683  // Transform gradients:
1684 
1685  for( unsigned i = 0; i < FunctionCount ; ++i ) {
1686  const double g0 = grad[0][i];
1687  const double g1 = grad[1][i];
1688  const double g2 = grad[2][i];
1689 
1690  dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
1691  dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
1692  dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
1693  }
1694 
1695  return detJ ;
1696  }
1697 
1698  KOKKOS_INLINE_FUNCTION
1699  void contributeResidualJacobian(
1700  const local_scalar_type dof_values[] ,
1701  const double dpsidx[] ,
1702  const double dpsidy[] ,
1703  const double dpsidz[] ,
1704  const double detJ ,
1705  const local_scalar_type coeff_k ,
1706  const double integ_weight ,
1707  const double bases_vals[] ,
1708  local_scalar_type elem_res[] ,
1709  local_scalar_type elem_mat[][ FunctionCount ] ) const
1710  {
1711  local_scalar_type value_at_pt = 0 ;
1712  local_scalar_type gradx_at_pt = 0 ;
1713  local_scalar_type grady_at_pt = 0 ;
1714  local_scalar_type gradz_at_pt = 0 ;
1715 
1716  for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1717  value_at_pt += dof_values[m] * bases_vals[m] ;
1718  gradx_at_pt += dof_values[m] * dpsidx[m] ;
1719  grady_at_pt += dof_values[m] * dpsidy[m] ;
1720  gradz_at_pt += dof_values[m] * dpsidz[m] ;
1721  }
1722 
1723  const local_scalar_type k_detJ_weight = coeff_k * detJ * integ_weight ;
1724  const local_scalar_type res_val = value_at_pt * value_at_pt * detJ * integ_weight ;
1725  const local_scalar_type mat_val = 2.0 * value_at_pt * detJ * integ_weight ;
1726 
1727  // $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$
1728  // $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$
1729 
1730  for ( unsigned m = 0; m < FunctionCount; ++m) {
1731  local_scalar_type * const mat = elem_mat[m] ;
1732  const double bases_val_m = bases_vals[m];
1733  const double dpsidx_m = dpsidx[m] ;
1734  const double dpsidy_m = dpsidy[m] ;
1735  const double dpsidz_m = dpsidz[m] ;
1736 
1737  elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt +
1738  dpsidy_m * grady_at_pt +
1739  dpsidz_m * gradz_at_pt ) +
1740  res_val * bases_val_m ;
1741 
1742  for( unsigned n = 0; n < FunctionCount; n++) {
1743 
1744  mat[n] += k_detJ_weight * ( dpsidx_m * dpsidx[n] +
1745  dpsidy_m * dpsidy[n] +
1746  dpsidz_m * dpsidz[n] ) +
1747  mat_val * bases_val_m * bases_vals[n];
1748  }
1749  }
1750  }
1751 
1752  KOKKOS_INLINE_FUNCTION
1753  void operator()( const typename TeamPolicy< execution_space >::member_type & dev ) const
1754  {
1755 
1756  const unsigned num_ensemble_threads = dev_config.block_dim.x ;
1757  const unsigned num_element_threads = dev_config.block_dim.y ;
1758  const unsigned element_rank = dev.team_rank() / num_ensemble_threads ;
1759  const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
1760 
1761  const unsigned ielem =
1762  dev.league_rank() * num_element_threads + element_rank;
1763 
1764  if (ielem >= elem_node_ids.dimension_0())
1765  return;
1766 
1767  (*this)( ielem, ensemble_rank );
1768 
1769  }
1770 
1771  KOKKOS_INLINE_FUNCTION
1772  void operator()( const unsigned ielem ,
1773  const unsigned ensemble_rank = 0 ) const
1774  {
1775  local_vector_type local_solution =
1776  local_vector_view_traits::create_local_view(solution,
1777  ensemble_rank);
1778  local_vector_type local_residual =
1779  local_vector_view_traits::create_local_view(residual,
1780  ensemble_rank);
1781  local_matrix_type local_jacobian_values =
1782  local_matrix_view_traits::create_local_view(jacobian.values,
1783  ensemble_rank);
1784 
1785  // Gather nodal coordinates and solution vector:
1786 
1787  double x[ FunctionCount ] ;
1788  double y[ FunctionCount ] ;
1789  double z[ FunctionCount ] ;
1790  local_scalar_type val[ FunctionCount ] ;
1791  unsigned node_index[ ElemNodeCount ];
1792 
1793  for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1794  const unsigned ni = elem_node_ids( ielem , i );
1795 
1796  node_index[i] = ni ;
1797 
1798  x[i] = node_coords( ni , 0 );
1799  y[i] = node_coords( ni , 1 );
1800  z[i] = node_coords( ni , 2 );
1801 
1802  val[i] = local_solution( ni );
1803  }
1804 
1805 
1806  local_scalar_type elem_vec[ FunctionCount ] ;
1807  local_scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
1808 
1809  for( unsigned i = 0; i < FunctionCount ; i++ ) {
1810  elem_vec[i] = 0 ;
1811  for( unsigned j = 0; j < FunctionCount ; j++){
1812  elem_mat[i][j] = 0 ;
1813  }
1814  }
1815 
1816 
1817  for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1818  double dpsidx[ FunctionCount ] ;
1819  double dpsidy[ FunctionCount ] ;
1820  double dpsidz[ FunctionCount ] ;
1821 
1822  local_scalar_type coeff_k = 0 ;
1823 
1824  {
1825  double pt[] = {0.0, 0.0, 0.0};
1826 
1827  // If function is not constant
1828  // then compute physical coordinates of integration point
1829  if ( ! coeff_function.is_constant ) {
1830  for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1831  pt[0] += x[j] * elem_data.values[i][j] ;
1832  pt[1] += y[j] * elem_data.values[i][j] ;
1833  pt[2] += z[j] * elem_data.values[i][j] ;
1834  }
1835  }
1836 
1837  // Need to fix this for local_scalar_type!!!!!!
1838  coeff_k = coeff_function(pt, ensemble_rank);
1839  }
1840 
1841  const double detJ =
1842  transform_gradients( elem_data.gradients[i] , x , y , z ,
1843  dpsidx , dpsidy , dpsidz );
1844 
1845  contributeResidualJacobian( val , dpsidx , dpsidy , dpsidz ,
1846  detJ , coeff_k ,
1847  elem_data.weights[i] ,
1848  elem_data.values[i] ,
1849  elem_vec , elem_mat );
1850  }
1851 
1852  for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
1853  const unsigned row = node_index[i] ;
1854  if ( row < residual.dimension_0() ) {
1855  atomic_add( & local_residual( row ) , elem_vec[i] );
1856 
1857  for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
1858  const unsigned entry = elem_graph( ielem , i , j );
1859  if ( entry != ~0u ) {
1860  atomic_add( & local_jacobian_values( entry ) , elem_mat[i][j] );
1861  }
1862  }
1863  }
1864  }
1865  }
1866 }; /* ElementComputation */
1867 #endif
1868 
1869 //----------------------------------------------------------------------------
1870 
1871 template< class FixtureType , class SparseMatrixType >
1873 
1874 template< class ExecutionSpace , BoxElemPart::ElemOrder Order , class CoordinateMap ,
1875  typename ScalarType >
1877  Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1878  Kokkos::Example::FENL::CrsMatrix< ScalarType , ExecutionSpace > >
1879 {
1880 public:
1881 
1885 
1886  typedef ExecutionSpace execution_space ;
1887  typedef ScalarType scalar_type ;
1888 
1892  typedef Kokkos::View< scalar_type* , execution_space > vector_type ;
1893 
1894  //------------------------------------
1895 
1900  static const bool use_team = local_vector_view_traits::use_team;
1901 
1902  typedef double bc_scalar_type ;
1903 
1904  //------------------------------------
1905  // Computational data:
1906 
1915  const unsigned bc_plane ;
1916  const unsigned node_count ;
1917  bool init ;
1919 
1920 
1921  DirichletComputation( const mesh_type & arg_mesh ,
1922  const vector_type & arg_solution ,
1923  const sparse_matrix_type & arg_jacobian ,
1924  const vector_type & arg_residual ,
1925  const unsigned arg_bc_plane ,
1926  const bc_scalar_type arg_bc_lower_value ,
1927  const bc_scalar_type arg_bc_upper_value ,
1928  const Kokkos::Example::FENL::DeviceConfig arg_dev_config )
1929  : node_coords( arg_mesh.node_coord() )
1930  , solution( arg_solution )
1931  , jacobian( arg_jacobian )
1932  , residual( arg_residual )
1933  , bc_lower_value( arg_bc_lower_value )
1934  , bc_upper_value( arg_bc_upper_value )
1935  , bc_lower_limit( std::numeric_limits<scalar_coord_type>::epsilon() )
1936  , bc_upper_limit( scalar_coord_type(1) - std::numeric_limits<scalar_coord_type>::epsilon() )
1937  , bc_plane( arg_bc_plane )
1938  , node_count( arg_mesh.node_count_owned() )
1939  , init( false )
1940  , dev_config( arg_dev_config )
1941  {
1942  parallel_for( node_count , *this );
1943  init = true ;
1944  }
1945 
1946  void apply() const
1947  {
1948  if ( use_team ) {
1949  const size_t team_size = dev_config.block_dim.x * dev_config.block_dim.y;
1950  const size_t league_size =
1951  (node_count + dev_config.block_dim.y-1) / dev_config.block_dim.y;
1952  Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
1953  parallel_for( config , *this );
1954  }
1955  else
1956  parallel_for( node_count , *this );
1957  }
1958 
1959  //------------------------------------
1960 
1961  KOKKOS_INLINE_FUNCTION
1962  void operator()( const typename TeamPolicy< execution_space >::member_type & dev ) const
1963  {
1964 
1965  const unsigned num_ensemble_threads = dev_config.block_dim.x ;
1966  const unsigned num_node_threads = dev_config.block_dim.y ;
1967  const unsigned node_rank = dev.team_rank() / num_ensemble_threads ;
1968  const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
1969 
1970  const unsigned inode = dev.league_rank() * num_node_threads + node_rank;
1971 
1972  if (inode >= node_count)
1973  return;
1974 
1975  (*this)( inode, ensemble_rank );
1976 
1977  }
1978 
1979  KOKKOS_INLINE_FUNCTION
1980  void operator()( const unsigned inode ,
1981  const unsigned ensemble_rank = 0) const
1982  {
1983  local_vector_type local_residual =
1984  local_vector_view_traits::create_local_view(residual,
1985  ensemble_rank);
1986  local_matrix_type local_jacobian_values =
1987  local_matrix_view_traits::create_local_view(jacobian.values,
1988  ensemble_rank);
1989 
1990  // Apply dirichlet boundary condition on the Solution and Residual vectors.
1991  // To maintain the symmetry of the original global stiffness matrix,
1992  // zero out the columns that correspond to boundary conditions, and
1993  // update the residual vector accordingly
1994 
1995  const unsigned iBeg = jacobian.graph.row_map[inode];
1996  const unsigned iEnd = jacobian.graph.row_map[inode+1];
1997 
1998  const scalar_coord_type c = node_coords(inode,bc_plane);
1999  const bool bc_lower = c <= bc_lower_limit ;
2000  const bool bc_upper = bc_upper_limit <= c ;
2001 
2002  if ( ! init ) {
2003  solution(inode) = bc_lower ? bc_lower_value : (
2004  bc_upper ? bc_upper_value : 0 );
2005  }
2006  else {
2007  if ( bc_lower || bc_upper ) {
2008 
2009  local_residual(inode) = 0 ;
2010 
2011  // zero each value on the row, and leave a one
2012  // on the diagonal
2013 
2014  for( unsigned i = iBeg ; i < iEnd ; ++i ) {
2015  local_jacobian_values(i) = int(inode) == int(jacobian.graph.entries(i)) ? 1 : 0 ;
2016  }
2017  }
2018  else {
2019 
2020  // Find any columns that are boundary conditions.
2021  // Clear them and adjust the residual vector
2022 
2023  for( unsigned i = iBeg ; i < iEnd ; ++i ) {
2024  const unsigned cnode = jacobian.graph.entries(i) ;
2025  const scalar_coord_type cc = node_coords(cnode,bc_plane);
2026 
2027  if ( ( cc <= bc_lower_limit ) || ( bc_upper_limit <= cc ) ) {
2028  local_jacobian_values(i) = 0 ;
2029  }
2030  }
2031  }
2032  }
2033  }
2034 };
2035 
2036 } /* namespace FENL */
2037 } /* namespace Example */
2038 } /* namespace Kokkos */
2039 
2040 //----------------------------------------------------------------------------
2041 
2042 /* A Cuda-specific specialization for the element computation functor. */
2043 #if defined( __CUDACC__ )
2044 // #include <NonlinearElement_Cuda.hpp>
2045 #endif
2046 
2047 //----------------------------------------------------------------------------
2048 
2049 #endif /* #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP */
static KOKKOS_INLINE_FUNCTION local_view_type create_local_view(const view_type &v, const unsigned local_rank)
KOKKOS_INLINE_FUNCTION void join(volatile unsigned &update, const volatile unsigned &input) const
KOKKOS_INLINE_FUNCTION PCE< Storage > sqrt(const PCE< Storage > &a)
KOKKOS_INLINE_FUNCTION void operator()(const unsigned iwork) const
KOKKOS_INLINE_FUNCTION void computeElementResidualJacobian(const scalar_type dof_values[], const double x[], const double y[], const double z[], scalar_type elem_res[], scalar_type elem_mat[][FunctionCount]) const
ExponentialKLCoefficient(const ExponentialKLCoefficient &rhs)
KOKKOS_INLINE_FUNCTION void setRandomVariables(const RandomVariableView &rv)
LocalViewTraits< RandomVariableView > local_rv_view_traits
KOKKOS_INLINE_FUNCTION RandomVariableView getRandomVariables() const
Kokkos::View< scalar_type *[FunctionCount], execution_space > elem_vectors_type
expr val()
CrsMatrix< ScalarType, ExecutionSpace > sparse_matrix_type
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
Kokkos::View< const double *[SpaceDim], Device > node_coord_type
Kokkos::DefaultExecutionSpace execution_space
Kokkos::View< const unsigned *[ElemNode], Device > elem_node_type
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
KOKKOS_INLINE_FUNCTION double operator()(double pt[], unsigned ensemble_rank) const
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, fad_scalar_type val[], unsigned node_index[], double x[], double y[], double z[], fad_scalar_type res[]) const
Kokkos::View< scalar_type *, Kokkos::LayoutLeft, execution_space > vector_type
KOKKOS_INLINE_FUNCTION void atomic_add(volatile Sacado::UQ::PCE< Storage > *const dest, const Sacado::UQ::PCE< Storage > &src)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
Kokkos::View< Scalar *, Kokkos::LayoutLeft, Device > RandomVariableView
KOKKOS_INLINE_FUNCTION local_scalar_type operator()(const MeshScalar point[], const size_type ensemble_rank) const
KOKKOS_INLINE_FUNCTION void fill_graph_entries(const unsigned iset) const
KOKKOS_INLINE_FUNCTION double transform_gradients(const double grad[][FunctionCount], const double x[], const double y[], const double z[], double dpsidx[], double dpsidy[], double dpsidz[]) const
NodeNodeGraph< elem_node_type, sparse_graph_type, ElemNodeCount >::ElemGraphType elem_graph_type
local_rv_view_traits::local_view_type local_rv_view_type
ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap >, CrsMatrix< ScalarType, ExecutionSpace >, Analytic, CoeffFunctionType > base_type
ElemNodeIdView::execution_space execution_space
KOKKOS_INLINE_FUNCTION void computeElementResidual(const scalar_type dof_values[], const double x[], const double y[], const double z[], local_scalar_type elem_res[]) const
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:260
ElementComputationConstantCoefficient(const ElementComputationConstantCoefficient &rhs)
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, scalar_type val[], unsigned node_index[], double x[], double y[], double z[], fad_scalar_type res[]) const
KOKKOS_INLINE_FUNCTION void operator()(const unsigned irow, unsigned &update, const bool final) const
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
Kokkos::Example::HexElement_Data< mesh_type::ElemNode > element_data_type
CrsGraphType::row_map_type::non_const_type RowMapType
KOKKOS_INLINE_FUNCTION void sort_graph_entries(const unsigned irow) const
ElementComputationBase(const mesh_type &arg_mesh, const vector_type &arg_solution, const elem_graph_type &arg_elem_graph, const sparse_matrix_type &arg_jacobian, const vector_type &arg_residual)
CrsMatrix(const StaticCrsGraphType &arg_graph)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
KOKKOS_INLINE_FUNCTION Teuchos::PromotionTraits< typename rv_type::value_type, value_type >::promote evaluate(const point_type &point, const rv_type &random_variables) const
Evaluate random field at a point.
expr expr expr expr j
KOKKOS_INLINE_FUNCTION void fill_set(const unsigned ielem) const
Kokkos::StaticCrsGraph< unsigned, Space, void, unsigned > StaticCrsGraphType
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, scalar_type val[], unsigned node_index[], double x[], double y[], double z[], scalar_type res[], scalar_type mat[][FunctionCount]) const
KOKKOS_INLINE_FUNCTION void computeElementResidualJacobian(const scalar_type dof_values[], const double x[], const double y[], const double z[], scalar_type elem_res[], scalar_type elem_mat[][FunctionCount]) const
Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap > mesh_type
NodeNodeGraph(const ElemNodeIdView &arg_elem_node_id, const unsigned arg_node_count, Times &results)
ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap >, CrsMatrix< ScalarType, ExecutionSpace >, FadElement, CoeffFunctionType > base_type
KOKKOS_INLINE_FUNCTION void fill_elem_graph_map(const unsigned ielem) const
sparse_matrix_type::StaticCrsGraphType sparse_graph_type
Kokkos::View< unsigned, execution_space > UnsignedValue
KOKKOS_INLINE_FUNCTION void scatterResidual(const unsigned ielem, const unsigned node_index[], const scalar_type res[], const scalar_type mat[][FunctionCount]) const
DeviceConfig(const size_t num_blocks_=0, const size_t threads_per_block_x_=0, const size_t threads_per_block_y_=0, const size_t threads_per_block_z_=1)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
local_rv_view_traits::local_value_type local_scalar_type
DirichletComputation(const mesh_type &arg_mesh, const vector_type &arg_solution, const sparse_matrix_type &arg_jacobian, const vector_type &arg_residual, const unsigned arg_bc_plane, const bc_scalar_type arg_bc_lower_value, const bc_scalar_type arg_bc_upper_value, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION PCE< Storage > sin(const PCE< Storage > &a)
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION void computeElementResidual(const local_scalar_type dof_values[], const double x[], const double y[], const double z[], local_scalar_type elem_res[]) const
Kokkos::View< scalar_type *[FunctionCount][FunctionCount], execution_space > elem_matrices_type
Kokkos::View< unsigned *[ElemNode][ElemNode], execution_space > ElemGraphType
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements...
Stokhos::KL::ExponentialRandomField< MeshScalar, Device > rf_type
ExponentialKLCoefficient(const MeshScalar mean, const MeshScalar variance, const MeshScalar correlation_length, const size_type num_rv)
KOKKOS_INLINE_FUNCTION void init(unsigned &update) const
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition: csr_vector.h:267
void update(const ValueType &alpha, VectorType &x, const ValueType &beta, const VectorType &y)
Kokkos::UnorderedMap< key_type, void, execution_space > SetType