Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
Sacado_Fad_Exp_MP_Vector.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef SACADO_FAD_EXP_MP_VECTOR_HPP
43 #define SACADO_FAD_EXP_MP_VECTOR_HPP
44 
45 #include "Sacado_MP_Vector.hpp"
46 
47 namespace Sacado {
48 
49  namespace Fad {
50  namespace Exp {
51 
53  class ExprSpecMPVector {};
54 
56 
61  template <typename T>
62  class Extender<
63  T,
64  typename std::enable_if<
65  Sacado::is_mp_vector<typename T::value_type>::value >::type
66  > : public T
67  {
68  public:
69 
70  typedef typename T::value_type value_type;
71  typedef typename value_type::value_type val_type;
72 
73  // Define expression template specialization
75 
76  // Bring in constructors
77  using T::T;
78 
79  // Bring in methods we are overloading
80  using T::val;
81  using T::dx;
82  using T::fastAccessDx;
83 
85  KOKKOS_INLINE_FUNCTION
86  const val_type& val(int j) const { return T::val().fastAccessCoeff(j); }
87 
89  KOKKOS_INLINE_FUNCTION
90  val_type& val(int j) { return T::val().fastAccessCoeff(j); }
91 
93  KOKKOS_INLINE_FUNCTION
94  val_type dx(int i, int j) const {
95  return this->size() ? this->dx_[i].fastAccessCoeff(j) : val_type(0.0);
96  }
97 
99  KOKKOS_INLINE_FUNCTION
100  val_type& fastAccessDx(int i, int j) {
101  return this->dx_[i].fastAccessCoeff(j);
102  }
103 
105  KOKKOS_INLINE_FUNCTION
106  const val_type& fastAccessDx(int i, int j) const {
107  return this->dx_[i].fastAccessCoeff(j);
108  }
109 
110  };
111 
113  template <typename DstType>
114  class ExprAssign<
115  DstType,
116  typename std::enable_if<
117  std::is_same< typename DstType::expr_spec_type, ExprSpecMPVector >::value
118  >::type
119  > {
120  public:
121 
123  typedef typename DstType::value_type value_type;
124 
125  // MP::Vector size (assuming static, because that's all we care about)
126  static const int VecNum = Sacado::StaticSize<value_type>::value;
127 
129  template <typename SrcType>
130  KOKKOS_INLINE_FUNCTION
131  static void assign_equal(DstType& dst, const SrcType& x)
132  {
133  const int xsz = x.size();
134 
135  if (xsz != dst.size())
136  dst.resizeAndZero(xsz);
137 
138  const int sz = dst.size();
139 
140  // For ViewStorage, the resize above may not in fact resize the
141  // derivative array, so it is possible that sz != xsz at this point.
142  // The only valid use case here is sz > xsz == 0, so we use sz in the
143  // assignment below
144 
145  if (sz) {
146  if (x.hasFastAccess()) {
147  SACADO_FAD_DERIV_LOOP(i,sz)
148  for (int j=0; j<VecNum; ++j)
149  dst.fastAccessDx(i,j) = x.fastAccessDx(i,j);
150  }
151  else
152  SACADO_FAD_DERIV_LOOP(i,sz)
153  for (int j=0; j<VecNum; ++j)
154  dst.fastAccessDx(i,j) = x.dx(i,j);
155  }
156 
157  for (int j=0; j<VecNum; ++j)
158  dst.val(j) = x.val(j);
159  }
160 
162  template <typename SrcType>
163  KOKKOS_INLINE_FUNCTION
164  static void assign_plus_equal(DstType& dst, const SrcType& x)
165  {
166  const int xsz = x.size(), sz = dst.size();
167 
168 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
169  if ((xsz != sz) && (xsz != 0) && (sz != 0))
170  throw "Fad Error: Attempt to assign with incompatible sizes";
171 #endif
172 
173  if (xsz) {
174  if (sz) {
175  if (x.hasFastAccess())
176  SACADO_FAD_DERIV_LOOP(i,sz)
177  for (int j=0; j<VecNum; ++j)
178  dst.fastAccessDx(i,j) += x.fastAccessDx(i,j);
179  else
180  for (int i=0; i<sz; ++i)
181  for (int j=0; j<VecNum; ++j)
182  dst.fastAccessDx(i,j) += x.dx(i,j);
183  }
184  else {
185  dst.resizeAndZero(xsz);
186  if (x.hasFastAccess())
187  SACADO_FAD_DERIV_LOOP(i,xsz)
188  for (int j=0; j<VecNum; ++j)
189  dst.fastAccessDx(i,j) = x.fastAccessDx(i,j);
190  else
191  SACADO_FAD_DERIV_LOOP(i,xsz)
192  for (int j=0; j<VecNum; ++j)
193  dst.fastAccessDx(i,j) = x.dx(i,j);
194  }
195  }
196 
197  for (int j=0; j<VecNum; ++j)
198  dst.val(j) += x.val(j);
199  }
200 
202  template <typename SrcType>
203  KOKKOS_INLINE_FUNCTION
204  static void assign_minus_equal(DstType& dst, const SrcType& x)
205  {
206  const int xsz = x.size(), sz = dst.size();
207 
208 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
209  if ((xsz != sz) && (xsz != 0) && (sz != 0))
210  throw "Fad Error: Attempt to assign with incompatible sizes";
211 #endif
212 
213  if (xsz) {
214  if (sz) {
215  if (x.hasFastAccess())
216  SACADO_FAD_DERIV_LOOP(i,sz)
217  for (int j=0; j<VecNum; ++j)
218  dst.fastAccessDx(i,j) -= x.fastAccessDx(i,j);
219  else
220  SACADO_FAD_DERIV_LOOP(i,sz)
221  for (int j=0; j<VecNum; ++j)
222  dst.fastAccessDx(i,j) -= x.dx(i,j);
223  }
224  else {
225  dst.resizeAndZero(xsz);
226  if (x.hasFastAccess())
227  SACADO_FAD_DERIV_LOOP(i,xsz)
228  for (int j=0; j<VecNum; ++j)
229  dst.fastAccessDx(i,j) = -x.fastAccessDx(i,j);
230  else
231  SACADO_FAD_DERIV_LOOP(i,xsz)
232  for (int j=0; j<VecNum; ++j)
233  dst.fastAccessDx(i,j) = -x.dx(i,j);
234  }
235  }
236 
237  for (int j=0; j<VecNum; ++j)
238  dst.val(j) -= x.val(j);
239  }
240 
242  template <typename SrcType>
243  KOKKOS_INLINE_FUNCTION
244  static void assign_times_equal(DstType& dst, const SrcType& x)
245  {
246  const int xsz = x.size(), sz = dst.size();
247  const value_type xval = x.val();
248  const value_type v = dst.val();
249 
250 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
251  if ((xsz != sz) && (xsz != 0) && (sz != 0))
252  throw "Fad Error: Attempt to assign with incompatible sizes";
253 #endif
254 
255  if (xsz) {
256  if (sz) {
257  if (x.hasFastAccess())
258  SACADO_FAD_DERIV_LOOP(i,sz)
259  for (int j=0; j<VecNum; ++j)
260  dst.fastAccessDx(i) = v.fastAccessCoeff(j)*x.fastAccessDx(i,j) + dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j);
261  else
262  SACADO_FAD_DERIV_LOOP(i,sz)
263  for (int j=0; j<VecNum; ++j)
264  dst.fastAccessDx(i) = v.fastAccessCoeff(j)*x.dx(i,j) + dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j);
265  }
266  else {
267  dst.resizeAndZero(xsz);
268  if (x.hasFastAccess())
269  SACADO_FAD_DERIV_LOOP(i,xsz)
270  for (int j=0; j<VecNum; ++j)
271  dst.fastAccessDx(i,j) = v.fastAccessCoeff(j)*x.fastAccessDx(i,j);
272  else
273  SACADO_FAD_DERIV_LOOP(i,xsz)
274  for (int j=0; j<VecNum; ++j)
275  dst.fastAccessDx(i,j) = v.fastAccessCoeff(j)*x.dx(i,j);
276  }
277  }
278  else {
279  if (sz) {
280  SACADO_FAD_DERIV_LOOP(i,sz)
281  for (int j=0; j<VecNum; ++j)
282  dst.fastAccessDx(i,j) *= xval.fastAccessCoeff(j);
283  }
284  }
285 
286  for (int j=0; j<VecNum; ++j)
287  dst.val(j) *= xval.fastAccessCoeff(j);
288  }
289 
291  template <typename SrcType>
292  KOKKOS_INLINE_FUNCTION
293  static void assign_divide_equal(DstType& dst, const SrcType& x)
294  {
295  const int xsz = x.size(), sz = dst.size();
296  const value_type xval = x.val();
297  const value_type v = dst.val();
298 
299 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
300  if ((xsz != sz) && (xsz != 0) && (sz != 0))
301  throw "Fad Error: Attempt to assign with incompatible sizes";
302 #endif
303 
304  if (xsz) {
305  const value_type xval2 = xval*xval;
306  if (sz) {
307  if (x.hasFastAccess())
308  SACADO_FAD_DERIV_LOOP(i,sz)
309  for (int j=0; j<VecNum; ++j)
310  dst.fastAccessDx(i,j) =
311  ( dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j) - v.fastAccessCoeff(j)*x.fastAccessDx(i,j) ) / xval2.fastAccessCoeff(j);
312  else
313  SACADO_FAD_DERIV_LOOP(i,sz)
314  for (int j=0; j<VecNum; ++j)
315  dst.fastAccessDx(i,j) =
316  ( dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j) - v.fastAccessCoeff(j)*x.dx(i,j) ) / xval2.fastAccessCoeff(j);
317  }
318  else {
319  dst.resizeAndZero(xsz);
320  if (x.hasFastAccess())
321  SACADO_FAD_DERIV_LOOP(i,xsz)
322  for (int j=0; j<VecNum; ++j)
323  dst.fastAccessDx(i,j) = - v.fastAccessCoeff(j)*x.fastAccessDx(i,j) / xval2.fastAccessCoeff(j);
324  else
325  SACADO_FAD_DERIV_LOOP(i,xsz)
326  for (int j=0; j<VecNum; ++j)
327  dst.fastAccessDx(i,j) = -v.fastAccessCoeff(j)*x.dx(i,j) / xval2.fastAccessCoeff(j);
328  }
329  }
330  else {
331  if (sz) {
332  SACADO_FAD_DERIV_LOOP(i,sz)
333  for (int j=0; j<VecNum; ++j)
334  dst.fastAccessDx(i,j) /= xval.fastAccessCoeff(j);
335  }
336  }
337 
338  for (int j=0; j<VecNum; ++j)
339  dst.val(j) /= xval.fastAccessCoeff(j);
340  }
341 
342  };
343 
348  template <typename DstType>
349  class ExprAssign<
350  DstType,
351  typename std::enable_if<
352  Sacado::IsStaticallySized<DstType>::value &&
353  std::is_same< typename DstType::expr_spec_type, ExprSpecMPVector >::value
354  >::type
355  > {
356  public:
357 
359  typedef typename DstType::value_type value_type;
360 
361  // MP::Vector size (assuming static, because that's all we care about)
362  static const int VecNum = Sacado::StaticSize<value_type>::value;
363 
365  template <typename SrcType>
366  KOKKOS_INLINE_FUNCTION
367  static void assign_equal(DstType& dst, const SrcType& x)
368  {
369  const int sz = dst.size();
370  SACADO_FAD_DERIV_LOOP(i,sz)
371  for (int j=0; j<VecNum; ++j)
372  dst.fastAccessDx(i,j) = x.fastAccessDx(i,j);
373  for (int j=0; j<VecNum; ++j)
374  dst.val(j) = x.val(j);
375  }
376 
378  template <typename SrcType>
379  KOKKOS_INLINE_FUNCTION
380  static void assign_plus_equal(DstType& dst, const SrcType& x)
381  {
382  const int sz = dst.size();
383  SACADO_FAD_DERIV_LOOP(i,sz)
384  for (int j=0; j<VecNum; ++j)
385  dst.fastAccessDx(i,j) += x.fastAccessDx(i,j);
386  for (int j=0; j<VecNum; ++j)
387  dst.val(j) += x.val(j);
388  }
389 
391  template <typename SrcType>
392  KOKKOS_INLINE_FUNCTION
393  static void assign_minus_equal(DstType& dst, const SrcType& x)
394  {
395  const int sz = dst.size();
396  SACADO_FAD_DERIV_LOOP(i,sz)
397  for (int j=0; j<VecNum; ++j)
398  dst.fastAccessDx(i,j) -= x.fastAccessDx(i,j);
399  for (int j=0; j<VecNum; ++j)
400  dst.val(j) -= x.val(j);
401  }
402 
404  template <typename SrcType>
405  KOKKOS_INLINE_FUNCTION
406  static void assign_times_equal(DstType& dst, const SrcType& x)
407  {
408  const int sz = dst.size();
409  const value_type xval = x.val();
410  const value_type v = dst.val();
411  SACADO_FAD_DERIV_LOOP(i,sz)
412  for (int j=0; j<VecNum; ++j)
413  dst.fastAccessDx(i,j) = v.fastAccessCoeff(j)*x.fastAccessDx(i,j) + dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j);
414  for (int j=0; j<VecNum; ++j)
415  dst.val(j) *= xval.fastAccessCoeff(j);
416  }
417 
419  template <typename SrcType>
420  KOKKOS_INLINE_FUNCTION
421  static void assign_divide_equal(DstType& dst, const SrcType& x)
422  {
423  const int sz = dst.size();
424  const value_type xval = x.val();
425  const value_type xval2 = xval*xval;
426  const value_type v = dst.val();
427  SACADO_FAD_DERIV_LOOP(i,sz)
428  for (int j=0; j<VecNum; ++j)
429  dst.fastAccessDx(i,j) =
430  ( dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j) - v.fastAccessCoeff(j)*x.fastAccessDx(i,j) )/ xval2.fastAccessCoeff(j);
431  for (int j=0; j<VecNum; ++j)
432  dst.val(j) /= xval.fastAccessCoeff(j);
433  }
434 
435  };
436 
437  } // namespace Exp
438  } // namespace Fad
439 
440 } // namespace Sacado
441 
442 // Specialize expression template operators to add similar extensions
443 #include "Sacado_Fad_Exp_Ops.hpp"
444 
445 #define FAD_UNARYOP_MACRO(OPNAME,OP,USING,MPVALUE,VALUE,DX,FASTACCESSDX) \
446 namespace Sacado { \
447  namespace Fad { \
448  namespace Exp { \
449  \
450  template <typename T> \
451  class OP< T,ExprSpecMPVector > : \
452  public Expr< OP< T,ExprSpecMPVector > > { \
453  public: \
454  \
455  typedef typename std::remove_cv<T>::type ExprT; \
456  typedef typename ExprT::value_type value_type; \
457  typedef typename ExprT::scalar_type scalar_type; \
458  \
459  typedef typename value_type::value_type val_type; \
460  \
461  typedef ExprSpecMPVector expr_spec_type; \
462  \
463  KOKKOS_INLINE_FUNCTION \
464  OP(const T& expr_) : expr(expr_) {} \
465  \
466  KOKKOS_INLINE_FUNCTION \
467  int size() const { return expr.size(); } \
468  \
469  KOKKOS_INLINE_FUNCTION \
470  bool hasFastAccess() const { \
471  return expr.hasFastAccess(); \
472  } \
473  \
474  KOKKOS_INLINE_FUNCTION \
475  value_type val() const { \
476  USING \
477  return MPVALUE; \
478  } \
479  \
480  KOKKOS_INLINE_FUNCTION \
481  val_type val(int j) const { \
482  USING \
483  return VALUE; \
484  } \
485  \
486  KOKKOS_INLINE_FUNCTION \
487  val_type dx(int i, int j) const { \
488  USING \
489  return DX; \
490  } \
491  \
492  KOKKOS_INLINE_FUNCTION \
493  val_type fastAccessDx(int i, int j) const { \
494  USING \
495  return FASTACCESSDX; \
496  } \
497  \
498  protected: \
499  \
500  const T& expr; \
501  }; \
502  \
503  } \
504  } \
505  \
506 }
507 
508 FAD_UNARYOP_MACRO(operator+,
509  UnaryPlusOp,
510  ;,
511  expr.val(),
512  expr.val(j),
513  expr.dx(i,j),
514  expr.fastAccessDx(i,j))
515 FAD_UNARYOP_MACRO(operator-,
517  ;,
518  -expr.val(),
519  -expr.val(j),
520  -expr.dx(i,j),
521  -expr.fastAccessDx(i,j))
524  using std::exp;,
525  exp(expr.val()),
526  exp(expr.val(j)),
527  exp(expr.val(j))*expr.dx(i,j),
528  exp(expr.val(j))*expr.fastAccessDx(i,j))
530  LogOp,
531  using std::log;,
532  log(expr.val()),
533  log(expr.val(j)),
534  expr.dx(i,j)/expr.val(j),
535  expr.fastAccessDx(i,j)/expr.val(j))
538  using std::log10; using std::log;,
539  log10(expr.val()),
540  log10(expr.val(j)),
541  expr.dx(i,j)/( log(value_type(10))*expr.val()),
542  expr.fastAccessDx(i,j) / ( log(value_type(10))*expr.val()))
545  using std::sqrt;,
546  sqrt(expr.val()),
547  sqrt(expr.val(j)),
548  expr.dx(i,j)/(value_type(2)* sqrt(expr.val())),
549  expr.fastAccessDx(i,j)/(value_type(2)* sqrt(expr.val())))
552  using std::cos; using std::sin;,
553  cos(expr.val()),
554  cos(expr.val(j)),
555  -expr.dx(i,j)* sin(expr.val()),
556  -expr.fastAccessDx(i,j)* sin(expr.val()))
559  using std::cos; using std::sin;,
560  sin(expr.val()),
561  sin(expr.val(j)),
562  expr.dx(i,j)* cos(expr.val()),
563  expr.fastAccessDx(i,j)* cos(expr.val()))
566  using std::tan;,
567  tan(expr.val()),
568  tan(expr.val(j)),
569  expr.dx(i,j)*
570  (value_type(1)+ tan(expr.val())* tan(expr.val())),
571  expr.fastAccessDx(i,j)*
572  (value_type(1)+ tan(expr.val())* tan(expr.val())))
575  using std::acos; using std::sqrt;,
576  acos(expr.val()),
577  acos(expr.val(j)),
578  -expr.dx(i,j)/ sqrt(value_type(1)-expr.val()*expr.val()),
579  -expr.fastAccessDx(i,j) /
580  sqrt(value_type(1)-expr.val()*expr.val()))
583  using std::asin; using std::sqrt;,
584  asin(expr.val()),
585  asin(expr.val(j)),
586  expr.dx(i,j)/ sqrt(value_type(1)-expr.val()*expr.val()),
587  expr.fastAccessDx(i,j) /
588  sqrt(value_type(1)-expr.val()*expr.val()))
591  using std::atan;,
592  atan(expr.val()),
593  atan(expr.val(j)),
594  expr.dx(i,j)/(value_type(1)+expr.val()*expr.val()),
595  expr.fastAccessDx(i,j)/(value_type(1)+expr.val()*expr.val()))
598  using std::cosh; using std::sinh;,
599  cosh(expr.val()),
600  cosh(expr.val(j)),
601  expr.dx(i,j)* sinh(expr.val()),
602  expr.fastAccessDx(i,j)* sinh(expr.val()))
605  using std::cosh; using std::sinh;,
606  sinh(expr.val()),
607  sinh(expr.val(j)),
608  expr.dx(i,j)* cosh(expr.val()),
609  expr.fastAccessDx(i,j)* cosh(expr.val()))
612  using std::tanh; using std::cosh;,
613  tanh(expr.val()),
614  tanh(expr.val(j)),
615  expr.dx(i,j)/( cosh(expr.val())* cosh(expr.val())),
616  expr.fastAccessDx(i,j) /
617  ( cosh(expr.val())* cosh(expr.val())))
620  using std::acosh; using std::sqrt;,
621  acosh(expr.val()),
622  acosh(expr.val(j)),
623  expr.dx(i,j)/ sqrt((expr.val()-value_type(1)) *
624  (expr.val()+value_type(1))),
625  expr.fastAccessDx(i,j)/ sqrt((expr.val()-value_type(1)) *
626  (expr.val()+value_type(1))))
629  using std::asinh; using std::sqrt;,
630  asinh(expr.val()),
631  asinh(expr.val(j)),
632  expr.dx(i,j)/ sqrt(value_type(1)+expr.val()*expr.val()),
633  expr.fastAccessDx(i,j)/ sqrt(value_type(1)+
634  expr.val()*expr.val()))
637  using std::atanh;,
638  atanh(expr.val()),
639  atanh(expr.val(j)),
640  expr.dx(i,j)/(value_type(1)-expr.val()*expr.val()),
641  expr.fastAccessDx(i,j)/(value_type(1)-
642  expr.val()*expr.val()))
645  using std::abs;,
646  abs(expr.val()),
647  abs(expr.val(j)),
648  if_then_else( expr.val() >= 0, expr.dx(i,j), value_type(-expr.dx(i,j)) ),
649  if_then_else( expr.val() >= 0, expr.fastAccessDx(i,j), value_type(-expr.fastAccessDx(i,j)) ) )
652  using std::fabs;,
653  fabs(expr.val()),
654  fabs(expr.val(j)),
655  if_then_else( expr.val() >= 0, expr.dx(i,j), value_type(-expr.dx(i,j)) ),
656  if_then_else( expr.val() >= 0, expr.fastAccessDx(i,j), value_type(-expr.fastAccessDx(i,j)) ) )
659  using std::cbrt;,
660  cbrt(expr.val()),
661  cbrt(expr.val(j)),
662  expr.dx(i,j)/(value_type(3)*cbrt(expr.val()*expr.val())),
663  expr.fastAccessDx(i,j)/(value_type(3)*cbrt(expr.val()*expr.val())))
664 
665 #undef FAD_UNARYOP_MACRO
666 
667 namespace Sacado {
668  namespace Fad {
669  namespace Exp {
670 
671  // For MP::Vector scalar type, promote constants up to expression's value
672  // type. If the constant type is the same as the value type, we can store
673  // the constant as a reference. If it isn't, we must copy it into a new
674  // value type object. We do this so that we can always access the constant
675  // as a value type.
676  template <typename ConstType, typename ValueType>
677  struct ConstTypeRef {
678  typedef ValueType type;
679  };
680 
681  template <typename ValueType>
682  struct ConstTypeRef<ValueType, ValueType> {
683  typedef ValueType& type;
684  };
685  }
686  }
687 }
688 
689 #define FAD_BINARYOP_MACRO(OPNAME,OP,USING,MPVALUE,VALUE,DX,CDX1,CDX2,FASTACCESSDX,MPVAL_CONST_DX_1,MPVAL_CONST_DX_2,VAL_CONST_DX_1,VAL_CONST_DX_2,CONST_DX_1,CONST_DX_2,CONST_FASTACCESSDX_1,CONST_FASTACCESSDX_2) \
690 namespace Sacado { \
691  namespace Fad { \
692  namespace Exp { \
693  \
694  template <typename T1, typename T2 > \
695  class OP< T1, T2, false, false, ExprSpecMPVector > : \
696  public Expr< OP< T1, T2, false, false, ExprSpecMPVector > > { \
697  public: \
698  \
699  typedef typename std::remove_cv<T1>::type ExprT1; \
700  typedef typename std::remove_cv<T2>::type ExprT2; \
701  typedef typename ExprT1::value_type value_type_1; \
702  typedef typename ExprT2::value_type value_type_2; \
703  typedef typename Sacado::Promote<value_type_1, \
704  value_type_2>::type value_type; \
705  \
706  typedef typename ExprT1::scalar_type scalar_type_1; \
707  typedef typename ExprT2::scalar_type scalar_type_2; \
708  typedef typename Sacado::Promote<scalar_type_1, \
709  scalar_type_2>::type scalar_type; \
710  \
711  typedef typename value_type::value_type val_type; \
712  \
713  typedef ExprSpecMPVector expr_spec_type; \
714  \
715  KOKKOS_INLINE_FUNCTION \
716  OP(const T1& expr1_, const T2& expr2_) : \
717  expr1(expr1_), expr2(expr2_) {} \
718  \
719  KOKKOS_INLINE_FUNCTION \
720  int size() const { \
721  const int sz1 = expr1.size(), sz2 = expr2.size(); \
722  return sz1 > sz2 ? sz1 : sz2; \
723  } \
724  \
725  KOKKOS_INLINE_FUNCTION \
726  bool hasFastAccess() const { \
727  return expr1.hasFastAccess() && expr2.hasFastAccess(); \
728  } \
729  \
730  KOKKOS_INLINE_FUNCTION \
731  value_type val() const { \
732  USING \
733  return MPVALUE; \
734  } \
735  \
736  KOKKOS_INLINE_FUNCTION \
737  val_type val(int j) const { \
738  USING \
739  return VALUE; \
740  } \
741  \
742  KOKKOS_INLINE_FUNCTION \
743  val_type dx(int i, int j) const { \
744  USING \
745  const int sz1 = expr1.size(), sz2 = expr2.size(); \
746  if (sz1 > 0 && sz2 > 0) \
747  return DX; \
748  else if (sz1 > 0) \
749  return CDX2; \
750  else \
751  return CDX1; \
752  } \
753  \
754  KOKKOS_INLINE_FUNCTION \
755  val_type fastAccessDx(int i, int j) const { \
756  USING \
757  return FASTACCESSDX; \
758  } \
759  \
760  protected: \
761  \
762  const T1& expr1; \
763  const T2& expr2; \
764  \
765  }; \
766  \
767  template <typename T1, typename T2> \
768  class OP< T1, T2, false, true, ExprSpecMPVector > : \
769  public Expr< OP< T1, T2, false, true, ExprSpecMPVector > > { \
770  public: \
771  \
772  typedef typename std::remove_cv<T1>::type ExprT1; \
773  typedef T2 ConstT; \
774  typedef typename ExprT1::value_type value_type; \
775  typedef typename ExprT1::scalar_type scalar_type; \
776  \
777  typedef typename value_type::value_type val_type; \
778  \
779  typedef ExprSpecMPVector expr_spec_type; \
780  \
781  KOKKOS_INLINE_FUNCTION \
782  OP(const T1& expr1_, const ConstT& c_) : \
783  expr1(expr1_), c(c_) {} \
784  \
785  KOKKOS_INLINE_FUNCTION \
786  int size() const { \
787  return expr1.size(); \
788  } \
789  \
790  KOKKOS_INLINE_FUNCTION \
791  bool hasFastAccess() const { \
792  return expr1.hasFastAccess(); \
793  } \
794  \
795  KOKKOS_INLINE_FUNCTION \
796  value_type val() const { \
797  USING \
798  return MPVAL_CONST_DX_2; \
799  } \
800  \
801  KOKKOS_INLINE_FUNCTION \
802  val_type val(int j) const { \
803  USING \
804  return VAL_CONST_DX_2; \
805  } \
806  \
807  KOKKOS_INLINE_FUNCTION \
808  val_type dx(int i, int j) const { \
809  USING \
810  return CONST_DX_2; \
811  } \
812  \
813  KOKKOS_INLINE_FUNCTION \
814  val_type fastAccessDx(int i, int j) const { \
815  USING \
816  return CONST_FASTACCESSDX_2; \
817  } \
818  \
819  protected: \
820  \
821  const T1& expr1; \
822  const typename ConstTypeRef<ConstT,value_type>::type c; \
823  }; \
824  \
825  template <typename T1, typename T2> \
826  class OP< T1, T2, true, false,ExprSpecMPVector > : \
827  public Expr< OP< T1, T2, true, false, ExprSpecMPVector > > { \
828  public: \
829  \
830  typedef typename std::remove_cv<T2>::type ExprT2; \
831  typedef T1 ConstT; \
832  typedef typename ExprT2::value_type value_type; \
833  typedef typename ExprT2::scalar_type scalar_type; \
834  \
835  typedef typename value_type::value_type val_type; \
836  \
837  typedef ExprSpecMPVector expr_spec_type; \
838  \
839  KOKKOS_INLINE_FUNCTION \
840  OP(const ConstT& c_, const T2& expr2_) : \
841  c(c_), expr2(expr2_) {} \
842  \
843  KOKKOS_INLINE_FUNCTION \
844  int size() const { \
845  return expr2.size(); \
846  } \
847  \
848  KOKKOS_INLINE_FUNCTION \
849  bool hasFastAccess() const { \
850  return expr2.hasFastAccess(); \
851  } \
852  \
853  KOKKOS_INLINE_FUNCTION \
854  value_type val() const { \
855  USING \
856  return MPVAL_CONST_DX_1; \
857  } \
858  \
859  KOKKOS_INLINE_FUNCTION \
860  val_type val(int j) const { \
861  USING \
862  return VAL_CONST_DX_1; \
863  } \
864  \
865  KOKKOS_INLINE_FUNCTION \
866  val_type dx(int i, int j) const { \
867  USING \
868  return CONST_DX_1; \
869  } \
870  \
871  KOKKOS_INLINE_FUNCTION \
872  val_type fastAccessDx(int i, int j) const { \
873  USING \
874  return CONST_FASTACCESSDX_1; \
875  } \
876  \
877  protected: \
878  \
879  const typename ConstTypeRef<ConstT,value_type>::type c; \
880  const T2& expr2; \
881  }; \
882  \
883  } \
884  } \
885  \
886 }
887 
888 
889 FAD_BINARYOP_MACRO(operator+,
890  AdditionOp,
891  ;,
892  expr1.val() + expr2.val(),
893  expr1.val(j) + expr2.val(j),
894  expr1.dx(i,j) + expr2.dx(i,j),
895  expr2.dx(i,j),
896  expr1.dx(i,j),
897  expr1.fastAccessDx(i,j) + expr2.fastAccessDx(i,j),
898  c + expr2.val(),
899  expr1.val() + c,
900  c.fastAccessCoeff(j) + expr2.val(j),
901  expr1.val(j) + c.fastAccessCoeff(j),
902  expr2.dx(i,j),
903  expr1.dx(i,j),
904  expr2.fastAccessDx(i,j),
905  expr1.fastAccessDx(i,j))
906 FAD_BINARYOP_MACRO(operator-,
908  ;,
909  expr1.val() - expr2.val(),
910  expr1.val(j) - expr2.val(j),
911  expr1.dx(i,j) - expr2.dx(i,j),
912  -expr2.dx(i,j),
913  expr1.dx(i,j),
914  expr1.fastAccessDx(i,j) - expr2.fastAccessDx(i,j),
915  c - expr2.val(),
916  expr1.val() - c,
917  c.fastAccessCoeff(j) - expr2.val(j),
918  expr1.val(j) - c.fastAccessCoeff(j),
919  -expr2.dx(i,j),
920  expr1.dx(i,j),
921  -expr2.fastAccessDx(i,j),
922  expr1.fastAccessDx(i,j))
923 FAD_BINARYOP_MACRO(operator*,
925  ;,
926  expr1.val() * expr2.val(),
927  expr1.val(j) * expr2.val(j),
928  expr1.val(j)*expr2.dx(i,j) + expr1.dx(i,j)*expr2.val(j),
929  expr1.val(j)*expr2.dx(i,j),
930  expr1.dx(i,j)*expr2.val(j),
931  expr1.val(j)*expr2.fastAccessDx(i,j) +
932  expr1.fastAccessDx(i,j)*expr2.val(j),
933  c * expr2.val(),
934  expr1.val() * c,
935  c.fastAccessCoeff(j) * expr2.val(j),
936  expr1.val(j) * c.fastAccessCoeff(j),
937  c.fastAccessCoeff(j)*expr2.dx(i,j),
938  expr1.dx(i,j)*c.fastAccessCoeff(j),
939  c.fastAccessCoeff(j)*expr2.fastAccessDx(i,j),
940  expr1.fastAccessDx(i,j)*c.fastAccessCoeff(j))
941 FAD_BINARYOP_MACRO(operator/,
943  ;,
944  expr1.val() / expr2.val(),
945  expr1.val(j) / expr2.val(j),
946  (expr1.dx(i,j)*expr2.val(j) - expr2.dx(i,j)*expr1.val(j)) /
947  (expr2.val(j)*expr2.val(j)),
948  -expr2.dx(i,j)*expr1.val(j) / (expr2.val(j)*expr2.val(j)),
949  expr1.dx(i,j)/expr2.val(j),
950  (expr1.fastAccessDx(i,j)*expr2.val(j) -
951  expr2.fastAccessDx(i,j)*expr1.val(j)) /
952  (expr2.val(j)*expr2.val(j)),
953  c / expr2.val(),
954  expr1.val() / c,
955  c.fastAccessCoeff(j) / expr2.val(j),
956  expr1.val(j) / c.fastAccessCoeff(j),
957  -expr2.dx(i,j)*c.fastAccessCoeff(j) / (expr2.val(j)*expr2.val(j)),
958  expr1.dx(i,j)/c.fastAccessCoeff(j),
959  -expr2.fastAccessDx(i,j)*c.fastAccessCoeff(j) / (expr2.val(j)*expr2.val(j)),
960  expr1.fastAccessDx(i,j)/c.fastAccessCoeff(j))
963  using std::atan2;,
964  atan2(expr1.val(), expr2.val()),
965  atan2(expr1.val(j), expr2.val(j)),
966  (expr2.val(j)*expr1.dx(i,j) - expr1.val(j)*expr2.dx(i,j))/
967  (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
968  -expr1.val(j)*expr2.dx(i,j)/
969  (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
970  expr2.val(j)*expr1.dx(i,j)/
971  (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
972  (expr2.val(j)*expr1.fastAccessDx(i,j) - expr1.val(j)*expr2.fastAccessDx(i,j))/
973  (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
974  atan2(c, expr2.val()),
975  atan2(expr1.val(), c),
976  atan2(c.fastAccessCoeff(j), expr2.val(j)),
977  atan2(expr1.val(j), c.fastAccessCoeff(j)),
978  (-c.fastAccessCoeff(j)*expr2.dx(i,j)) / (c.fastAccessCoeff(j)*c.fastAccessCoeff(j) + expr2.val(j)*expr2.val(j)),
979  (c.fastAccessCoeff(j)*expr1.dx(i,j))/ (expr1.val(j)*expr1.val(j) + c.fastAccessCoeff(j)*c.fastAccessCoeff(j)),
980  (-c.fastAccessCoeff(j)*expr2.fastAccessDx(i,j))/ (c.fastAccessCoeff(j)*c.fastAccessCoeff(j) + expr2.val(j)*expr2.val(j)),
981  (c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j))/ (expr1.val(j)*expr1.val(j) + c.fastAccessCoeff(j)*c.fastAccessCoeff(j)))
982 // FAD_BINARYOP_MACRO(pow,
983 // PowerOp,
984 // using std::pow; using std::log;,
985 // pow(expr1.val(), expr2.val()),
986 // pow(expr1.val(j), expr2.val(j)),
987 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.dx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.dx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) ),
988 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(expr1.val(j))*pow(expr1.val(j),expr2.val(j))) ),
989 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.val(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),expr2.val(j))) ),
990 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.fastAccessDx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.fastAccessDx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) ),
991 // pow(c, expr2.val()),
992 // pow(expr1.val(), c),
993 // pow(c.fastAccessCoeff(j), expr2.val(j)),
994 // pow(expr1.val(j), c.fastAccessCoeff(j)),
995 // if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) ),
996 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j))) ),
997 // if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.fastAccessDx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) ),
998 // if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j)))) )
1001  ;,
1002  if_then_else( expr1.val() >= expr2.val(), expr1.val(), expr2.val() ),
1003  if_then_else( expr1.val(j) >= expr2.val(j), expr1.val(j), expr2.val(j) ),
1004  if_then_else( expr1.val(j) >= expr2.val(j), expr1.dx(i,j), expr2.dx(i,j) ),
1005  if_then_else( expr1.val(j) >= expr2.val(j), val_type(0.0), expr2.dx(i,j) ),
1006  if_then_else( expr1.val(j) >= expr2.val(j), expr1.dx(i,j), val_type(0.0) ),
1007  if_then_else( expr1.val(j) >= expr2.val(j), expr1.fastAccessDx(i,j), expr2.fastAccessDx(i,j) ),
1008  if_then_else( c >= expr2.val(), c, expr2.val() ),
1009  if_then_else( expr1.val() >= c, expr1.val(), c ),
1010  if_then_else( c.fastAccessCoeff(j) >= expr2.val(j), c.fastAccessCoeff(j), expr2.val(j) ),
1011  if_then_else( expr1.val(j) >= c.fastAccessCoeff(j), expr1.val(j), c.fastAccessCoeff(j) ),
1012  if_then_else( c.fastAccessCoeff(j) >= expr2.val(j), val_type(0.0), expr2.dx(i,j) ),
1013  if_then_else( expr1.val(j) >= c.fastAccessCoeff(j), expr1.dx(i,j), val_type(0.0) ),
1014  if_then_else( c.fastAccessCoeff(j) >= expr2.val(j), val_type(0.0), expr2.fastAccessDx(i,j) ),
1015  if_then_else( expr1.val(j) >= c.fastAccessCoeff(j), expr1.fastAccessDx(i,j), val_type(0.0) ) )
1018  ;,
1019  if_then_else( expr1.val() <= expr2.val(), expr1.val(), expr2.val() ),
1020  if_then_else( expr1.val(j) <= expr2.val(j), expr1.val(j), expr2.val(j) ),
1021  if_then_else( expr1.val(j) <= expr2.val(j), expr1.dx(i,j), expr2.dx(i,j) ),
1022  if_then_else( expr1.val(j) <= expr2.val(j), val_type(0.0), expr2.dx(i,j) ),
1023  if_then_else( expr1.val(j) <= expr2.val(j), expr1.dx(i,j), val_type(0.0) ),
1024  if_then_else( expr1.val(j) <= expr2.val(j), expr1.fastAccessDx(i,j), expr2.fastAccessDx(i,j) ),
1025  if_then_else( c <= expr2.val(), c, expr2.val() ),
1026  if_then_else( expr1.val() <= c, expr1.val(), c ),
1027  if_then_else( c.fastAccessCoeff(j) <= expr2.val(j), c.fastAccessCoeff(j), expr2.val(j) ),
1028  if_then_else( expr1.val(j) <= c.fastAccessCoeff(j), expr1.val(j), c.fastAccessCoeff(j) ),
1029  if_then_else( c.fastAccessCoeff(j) <= expr2.val(j), val_type(0), expr2.dx(i,j) ),
1030  if_then_else( expr1.val(j) <= c.fastAccessCoeff(j), expr1.dx(i,j), val_type(0) ),
1031  if_then_else( c.fastAccessCoeff(j) <= expr2.val(j), val_type(0), expr2.fastAccessDx(i,j) ),
1032  if_then_else( expr1.val(j) <= c.fastAccessCoeff(j), expr1.fastAccessDx(i,j), val_type(0) ) )
1033 
1034 // Special handling for std::pow() to provide specializations of PowerOp for
1035 // "simd" value types that use if_then_else(). The only reason for not using
1036 // if_then_else() always is to avoid evaluating the derivative if the value is
1037 // zero to avoid throwing FPEs.
1038 namespace Sacado {
1039  namespace Fad {
1040  namespace Exp {
1041 
1042  //
1043  // Implementation for simd type using if_then_else()
1044  //
1045  template <typename T1, typename T2>
1046  class PowerOp< T1, T2, false, false, ExprSpecMPVector, PowerImpl::Simd > :
1047  public Expr< PowerOp< T1, T2, false, false, ExprSpecMPVector,
1048  PowerImpl::Simd > > {
1049  public:
1050 
1051  typedef typename std::remove_cv<T1>::type ExprT1;
1052  typedef typename std::remove_cv<T2>::type ExprT2;
1053  typedef typename ExprT1::value_type value_type_1;
1054  typedef typename ExprT2::value_type value_type_2;
1055  typedef typename Sacado::Promote<value_type_1,
1056  value_type_2>::type value_type;
1057 
1058  typedef typename ExprT1::scalar_type scalar_type_1;
1059  typedef typename ExprT2::scalar_type scalar_type_2;
1060  typedef typename Sacado::Promote<scalar_type_1,
1061  scalar_type_2>::type scalar_type;
1062 
1063  typedef typename value_type::value_type val_type;
1064 
1065  typedef ExprSpecMPVector expr_spec_type;
1066 
1067  KOKKOS_INLINE_FUNCTION
1068  PowerOp(const T1& expr1_, const T2& expr2_) :
1069  expr1(expr1_), expr2(expr2_) {}
1070 
1071  KOKKOS_INLINE_FUNCTION
1072  int size() const {
1073  const int sz1 = expr1.size(), sz2 = expr2.size();
1074  return sz1 > sz2 ? sz1 : sz2;
1075  }
1076 
1077  KOKKOS_INLINE_FUNCTION
1078  bool hasFastAccess() const {
1079  return expr1.hasFastAccess() && expr2.hasFastAccess();
1080  }
1081 
1082  KOKKOS_INLINE_FUNCTION
1083  value_type val() const {
1084  using std::pow;
1085  return pow(expr1.val(), expr2.val());
1086  }
1087 
1088  KOKKOS_INLINE_FUNCTION
1089  val_type val(int j) const {
1090  using std::pow;
1091  return pow(expr1.val(j), expr2.val(j));
1092  }
1093 
1094  KOKKOS_INLINE_FUNCTION
1095  val_type dx(int i, int j) const {
1096  using std::pow; using std::log;
1097  const int sz1 = expr1.size(), sz2 = expr2.size();
1098  if (sz1 > 0 && sz2 > 0)
1099  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.dx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.dx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) );
1100  else if (sz1 > 0)
1101  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1102  // It seems less accurate and caused convergence problems in some codes
1103  return if_then_else( expr2.val(j) == scalar_type(1.0), expr1.dx(i,j), if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.val(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),expr2.val(j))) ));
1104  else
1105  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(expr1.val(j))*pow(expr1.val(j),expr2.val(j))) );
1106  }
1107 
1108  KOKKOS_INLINE_FUNCTION
1109  val_type fastAccessDx(int i, int j) const {
1110  using std::pow; using std::log;
1111  return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.fastAccessDx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.fastAccessDx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) );
1112  }
1113 
1114  protected:
1115 
1116  const T1& expr1;
1117  const T2& expr2;
1118 
1119  };
1120 
1121  template <typename T1, typename T2>
1122  class PowerOp< T1, T2, false, true, ExprSpecMPVector, PowerImpl::Simd >
1123  : public Expr< PowerOp< T1, T2, false, true, ExprSpecMPVector,
1124  PowerImpl::Simd > > {
1125  public:
1126 
1127  typedef typename std::remove_cv<T1>::type ExprT1;
1128  typedef T2 ConstT;
1129  typedef typename ExprT1::value_type value_type;
1130  typedef typename ExprT1::scalar_type scalar_type;
1131 
1132  typedef typename value_type::value_type val_type;
1133 
1134  typedef ExprSpecMPVector expr_spec_type;
1135 
1136  KOKKOS_INLINE_FUNCTION
1137  PowerOp(const T1& expr1_, const ConstT& c_) :
1138  expr1(expr1_), c(c_) {}
1139 
1140  KOKKOS_INLINE_FUNCTION
1141  int size() const {
1142  return expr1.size();
1143  }
1144 
1145  KOKKOS_INLINE_FUNCTION
1146  bool hasFastAccess() const {
1147  return expr1.hasFastAccess();
1148  }
1149 
1150  KOKKOS_INLINE_FUNCTION
1151  value_type val() const {
1152  using std::pow;
1153  return pow(expr1.val(), c);
1154  }
1155 
1156  KOKKOS_INLINE_FUNCTION
1157  val_type val(int j) const {
1158  using std::pow;
1159  return pow(expr1.val(j), c.fastAccessCoeff(j));
1160  }
1161 
1162  KOKKOS_INLINE_FUNCTION
1163  val_type dx(int i, int j) const {
1164  using std::pow;
1165  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1166  // It seems less accurate and caused convergence problems in some codes
1167  return if_then_else( c.fastAccessCoeff(j) == scalar_type(1.0), expr1.dx(i,j), if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j))) ));
1168  }
1169 
1170  KOKKOS_INLINE_FUNCTION
1171  val_type fastAccessDx(int i, int j) const {
1172  using std::pow;
1173  // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1174  // It seems less accurate and caused convergence problems in some codes
1175  return if_then_else( c.fastAccessCoeff(j) == scalar_type(1.0), expr1.fastAccessDx(i,j), if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j))) ));
1176  }
1177 
1178  protected:
1179 
1180  const T1& expr1;
1181  const ConstT& c;
1182  };
1183 
1184  template <typename T1, typename T2>
1185  class PowerOp< T1, T2, true, false, ExprSpecMPVector, PowerImpl::Simd >
1186  : public Expr< PowerOp< T1, T2, true, false, ExprSpecMPVector,
1187  PowerImpl::Simd> > {
1188  public:
1189 
1190  typedef typename std::remove_cv<T2>::type ExprT2;
1191  typedef T1 ConstT;
1192  typedef typename ExprT2::value_type value_type;
1193  typedef typename ExprT2::scalar_type scalar_type;
1194 
1195  typedef typename value_type::value_type val_type;
1196 
1197  typedef ExprSpecMPVector expr_spec_type;
1198 
1199  KOKKOS_INLINE_FUNCTION
1200  PowerOp(const ConstT& c_, const T2& expr2_) :
1201  c(c_), expr2(expr2_) {}
1202 
1203  KOKKOS_INLINE_FUNCTION
1204  int size() const {
1205  return expr2.size();
1206  }
1207 
1208  KOKKOS_INLINE_FUNCTION
1209  bool hasFastAccess() const {
1210  return expr2.hasFastAccess();
1211  }
1212 
1213  KOKKOS_INLINE_FUNCTION
1214  value_type val() const {
1215  using std::pow;
1216  return pow(c, expr2.val());
1217  }
1218 
1219  KOKKOS_INLINE_FUNCTION
1220  val_type val(int j) const {
1221  using std::pow;
1222  return pow(c.fastAccessCoeff(j), expr2.val(j));
1223  }
1224 
1225  KOKKOS_INLINE_FUNCTION
1226  val_type dx(int i, int j) const {
1227  using std::pow; using std::log;
1228  return if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) );
1229  }
1230 
1231  KOKKOS_INLINE_FUNCTION
1232  val_type fastAccessDx(int i, int j) const {
1233  using std::pow; using std::log;
1234  return if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.fastAccessDx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) );
1235  }
1236 
1237  protected:
1238 
1239  const ConstT& c;
1240  const T2& expr2;
1241  };
1242 
1243  //
1244  // Specialization for nested derivatives. This version does not use
1245  // if_then_else/ternary-operator on the base so that nested derivatives
1246  // are correct.
1247  //
1248  template <typename T1, typename T2>
1249  class PowerOp< T1, T2, false, false, ExprSpecMPVector,
1250  PowerImpl::NestedSimd > :
1251  public Expr< PowerOp< T1, T2, false, false, ExprSpecMPVector,
1252  PowerImpl::NestedSimd > > {
1253  public:
1254 
1255  typedef typename std::remove_cv<T1>::type ExprT1;
1256  typedef typename std::remove_cv<T2>::type ExprT2;
1257  typedef typename ExprT1::value_type value_type_1;
1258  typedef typename ExprT2::value_type value_type_2;
1259  typedef typename Sacado::Promote<value_type_1,
1260  value_type_2>::type value_type;
1261 
1262  typedef typename ExprT1::scalar_type scalar_type_1;
1263  typedef typename ExprT2::scalar_type scalar_type_2;
1264  typedef typename Sacado::Promote<scalar_type_1,
1265  scalar_type_2>::type scalar_type;
1266 
1267  typedef typename value_type::value_type val_type;
1268 
1269  typedef ExprSpecMPVector expr_spec_type;
1270 
1271  KOKKOS_INLINE_FUNCTION
1272  PowerOp(const T1& expr1_, const T2& expr2_) :
1273  expr1(expr1_), expr2(expr2_) {}
1274 
1275  KOKKOS_INLINE_FUNCTION
1276  int size() const {
1277  const int sz1 = expr1.size(), sz2 = expr2.size();
1278  return sz1 > sz2 ? sz1 : sz2;
1279  }
1280 
1281  KOKKOS_INLINE_FUNCTION
1282  bool hasFastAccess() const {
1283  return expr1.hasFastAccess() && expr2.hasFastAccess();
1284  }
1285 
1286  KOKKOS_INLINE_FUNCTION
1287  value_type val() const {
1288  using std::pow;
1289  return pow(expr1.val(), expr2.val());
1290  }
1291 
1292  KOKKOS_INLINE_FUNCTION
1293  val_type val(int j) const {
1294  using std::pow;
1295  return pow(expr1.val(j), expr2.val(j));
1296  }
1297 
1298  KOKKOS_INLINE_FUNCTION
1299  value_type dx(int i, int j) const {
1300  using std::pow; using std::log;
1301  const int sz1 = expr1.size(), sz2 = expr2.size();
1302  if (sz1 > 0 && sz2 > 0)
1303  return (expr2.dx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.dx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j));
1304  else if (sz1 > 0)
1305  return if_then_else( expr2.val(j) == scalar_type(0.0), value_type(0.0), value_type((expr2.val(j)*expr1.dx(i,j))*pow(expr1.val(j),expr2.val(j)-scalar_type(1.0))));
1306  else
1307  return expr2.dx(i,j)*log(expr1.val(j))*pow(expr1.val(j),expr2.val(j));
1308  }
1309 
1310  KOKKOS_INLINE_FUNCTION
1311  value_type fastAccessDx(int i, int j) const {
1312  using std::pow; using std::log;
1313  return (expr2.fastAccessDx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.fastAccessDx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j));
1314  }
1315 
1316  protected:
1317 
1318  const T1& expr1;
1319  const T2& expr2;
1320 
1321  };
1322 
1323  template <typename T1, typename T2>
1324  class PowerOp< T1, T2, false, true, ExprSpecMPVector,
1325  PowerImpl::NestedSimd > :
1326  public Expr< PowerOp< T1, T2, false, true, ExprSpecMPVector,
1327  PowerImpl::NestedSimd > > {
1328  public:
1329 
1330  typedef typename std::remove_cv<T1>::type ExprT1;
1331  typedef T2 ConstT;
1332  typedef typename ExprT1::value_type value_type;
1333  typedef typename ExprT1::scalar_type scalar_type;
1334 
1335  typedef typename value_type::value_type val_type;
1336 
1337  typedef ExprSpecMPVector expr_spec_type;
1338 
1339  KOKKOS_INLINE_FUNCTION
1340  PowerOp(const T1& expr1_, const ConstT& c_) :
1341  expr1(expr1_), c(c_) {}
1342 
1343  KOKKOS_INLINE_FUNCTION
1344  int size() const {
1345  return expr1.size();
1346  }
1347 
1348  KOKKOS_INLINE_FUNCTION
1349  bool hasFastAccess() const {
1350  return expr1.hasFastAccess();
1351  }
1352 
1353  KOKKOS_INLINE_FUNCTION
1354  value_type val() const {
1355  using std::pow;
1356  return pow(expr1.val(), c);
1357  }
1358 
1359  KOKKOS_INLINE_FUNCTION
1360  val_type val(int j) const {
1361  using std::pow;
1362  return pow(expr1.val(j), c.fastAccessCoeff(j));
1363  }
1364 
1365  KOKKOS_INLINE_FUNCTION
1366  value_type dx(int i, int j) const {
1367  using std::pow;
1368  return if_then_else( c.fastAccessCoeff(j) == scalar_type(0.0), value_type(0.0), value_type(c.fastAccessCoeff(j)*expr1.dx(i,j)*pow(expr1.val(j),c.fastAccessCoeff(j)-scalar_type(1.0))));
1369  }
1370 
1371  KOKKOS_INLINE_FUNCTION
1372  value_type fastAccessDx(int i, int j) const {
1373  using std::pow;
1374  return if_then_else( c.fastAccessCoeff(j) == scalar_type(0.0), value_type(0.0), value_type(c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j)*pow(expr1.val(j),c.fastAccessCoeff(j)-scalar_type(1.0))));
1375  }
1376 
1377  protected:
1378 
1379  const T1& expr1;
1380  const ConstT& c;
1381  };
1382 
1383  template <typename T1, typename T2>
1384  class PowerOp<T1, T2, true, false, ExprSpecMPVector,
1385  PowerImpl::NestedSimd > :
1386  public Expr< PowerOp< T1, T2, true, false, ExprSpecMPVector,
1387  PowerImpl::NestedSimd > > {
1388  public:
1389 
1390  typedef typename std::remove_cv<T2>::type ExprT2;
1391  typedef T1 ConstT;
1392  typedef typename ExprT2::value_type value_type;
1393  typedef typename ExprT2::scalar_type scalar_type;
1394 
1395  typedef typename value_type::value_type val_type;
1396 
1397  typedef ExprSpecMPVector expr_spec_type;
1398 
1399  KOKKOS_INLINE_FUNCTION
1400  PowerOp(const ConstT& c_, const T2& expr2_) :
1401  c(c_), expr2(expr2_) {}
1402 
1403  KOKKOS_INLINE_FUNCTION
1404  int size() const {
1405  return expr2.size();
1406  }
1407 
1408  KOKKOS_INLINE_FUNCTION
1409  bool hasFastAccess() const {
1410  return expr2.hasFastAccess();
1411  }
1412 
1413  KOKKOS_INLINE_FUNCTION
1414  value_type val() const {
1415  using std::pow;
1416  return pow(c, expr2.val());
1417  }
1418 
1419  KOKKOS_INLINE_FUNCTION
1420  val_type val(int j) const {
1421  using std::pow;
1422  return pow(c.fastAccessCoeff(j), expr2.val(j));
1423  }
1424 
1425  KOKKOS_INLINE_FUNCTION
1426  value_type dx(int i, int j) const {
1427  using std::pow; using std::log;
1428  return expr2.dx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j));
1429  }
1430 
1431  KOKKOS_INLINE_FUNCTION
1432  value_type fastAccessDx(int i, int j) const {
1433  using std::pow; using std::log;
1434  return expr2.fastAccessDx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j));
1435  }
1436 
1437  protected:
1438 
1439  const ConstT& c;
1440  const T2& expr2;
1441  };
1442 
1443  }
1444  }
1445 }
1446 
1447 //--------------------------if_then_else operator -----------------------
1448 // Can't use the above macros because it is a ternary operator (sort of).
1449 // Also, relies on C++11
1450 
1451 namespace Sacado {
1452  namespace Fad {
1453  namespace Exp {
1454 
1455  template <typename CondT, typename T1, typename T2>
1456  class IfThenElseOp< CondT,T1,T2,false,false,ExprSpecMPVector > :
1457  public Expr< IfThenElseOp< CondT, T1, T2, false, false, ExprSpecMPVector > > {
1458 
1459  public:
1460 
1461  typedef typename std::remove_cv<T1>::type ExprT1;
1462  typedef typename std::remove_cv<T2>::type ExprT2;
1465  typedef typename Sacado::Promote<value_type_1,
1467 
1470  typedef typename Sacado::Promote<scalar_type_1,
1472 
1474 
1476 
1477  KOKKOS_INLINE_FUNCTION
1478  IfThenElseOp(const CondT& cond_, const T1& expr1_, const T2& expr2_) :
1479  cond(cond_), expr1(expr1_), expr2(expr2_) {}
1480 
1481  KOKKOS_INLINE_FUNCTION
1482  int size() const {
1483  int sz1 = expr1.size(), sz2 = expr2.size();
1484  return sz1 > sz2 ? sz1 : sz2;
1485  }
1486 
1487  KOKKOS_INLINE_FUNCTION
1488  bool hasFastAccess() const {
1489  return expr1.hasFastAccess() && expr2.hasFastAccess();
1490  }
1491 
1492  KOKKOS_INLINE_FUNCTION
1493  value_type val() const {
1494  return if_then_else( cond, expr1.val(), expr2.val() );
1495  }
1496 
1497  KOKKOS_INLINE_FUNCTION
1498  val_type val(int j) const {
1499  return if_then_else( cond, expr1.val(j), expr2.val(j) );
1500  }
1501 
1502  KOKKOS_INLINE_FUNCTION
1503  val_type dx(int i, int j) const {
1504  return if_then_else( cond, expr1.dx(i,j), expr2.dx(i,j) );
1505  }
1506 
1507  KOKKOS_INLINE_FUNCTION
1508  val_type fastAccessDx(int i, int j) const {
1509  return if_then_else( cond, expr1.fastAccessDx(i,j), expr2.fastAccessDx(i,j) );
1510  }
1511 
1512  protected:
1513 
1514  const CondT& cond;
1515  const T1& expr1;
1516  const T2& expr2;
1517 
1518  };
1519 
1520  template <typename CondT, typename T1, typename T2>
1521  class IfThenElseOp< CondT, T1, T2, false, true, ExprSpecMPVector> :
1522  public Expr< IfThenElseOp< CondT, T1, T2, false, true, ExprSpecMPVector > > {
1523 
1524  public:
1525 
1526  typedef typename std::remove_cv<T1>::type ExprT1;
1527  typedef T2 ConstT;
1528  typedef typename ExprT1::value_type value_type;
1530 
1532 
1533  KOKKOS_INLINE_FUNCTION
1534  IfThenElseOp(const CondT& cond_, const T1& expr1_, const ConstT& c_) :
1535  cond(cond_), expr1(expr1_), c(c_) {}
1536 
1537  KOKKOS_INLINE_FUNCTION
1538  int size() const {
1539  return expr1.size();
1540  }
1541 
1542  KOKKOS_INLINE_FUNCTION
1543  bool hasFastAccess() const {
1544  return expr1.hasFastAccess();
1545  }
1546 
1547  KOKKOS_INLINE_FUNCTION
1548  value_type val() const {
1549  return if_then_else( cond, expr1.val(), c );
1550  }
1551 
1552  KOKKOS_INLINE_FUNCTION
1553  val_type val(int j) const {
1554  return if_then_else( cond, expr1.val(j), c.fastAccessCoeff(j) );
1555  }
1556 
1557  KOKKOS_INLINE_FUNCTION
1558  val_type dx(int i, int j) const {
1559  return if_then_else( cond, expr1.dx(i,j), val_type(0.0) );
1560  }
1561 
1562  KOKKOS_INLINE_FUNCTION
1563  val_type fastAccessDx(int i, int j) const {
1564  return if_then_else( cond, expr1.fastAccessDx(i,j), val_type(0.0) );
1565  }
1566 
1567  protected:
1568 
1569  const CondT& cond;
1570  const T1& expr1;
1571  const typename ConstTypeRef<ConstT,value_type>::type c;
1572  };
1573 
1574  template <typename CondT, typename T1, typename T2>
1575  class IfThenElseOp< CondT, T1, T2, true, false, ExprSpecMPVector> :
1576  public Expr< IfThenElseOp< CondT, T1, T2, true, false, ExprSpecMPVector > > {
1577 
1578  public:
1579 
1580  typedef typename std::remove_cv<T2>::type ExprT2;
1581  typedef T1 ConstT;
1582  typedef typename ExprT2::value_type value_type;
1584 
1586 
1588 
1589  KOKKOS_INLINE_FUNCTION
1590  IfThenElseOp(const CondT& cond_, const ConstT& c_, const T2& expr2_) :
1591  cond(cond_), c(c_), expr2(expr2_) {}
1592 
1593  KOKKOS_INLINE_FUNCTION
1594  int size() const {
1595  return expr2.size();
1596  }
1597 
1598  KOKKOS_INLINE_FUNCTION
1599  bool hasFastAccess() const {
1600  return expr2.hasFastAccess();
1601  }
1602 
1603  KOKKOS_INLINE_FUNCTION
1604  value_type val() const {
1605  return if_then_else( cond, c, expr2.val() );
1606  }
1607 
1608  KOKKOS_INLINE_FUNCTION
1609  val_type val(int j) const {
1610  return if_then_else( cond, c.fastAccessCoeff(j), expr2.val(j) );
1611  }
1612 
1613  KOKKOS_INLINE_FUNCTION
1614  val_type dx(int i, int j) const {
1615  return if_then_else( cond, val_type(0.0), expr2.dx(i,j) );
1616  }
1617 
1618  KOKKOS_INLINE_FUNCTION
1619  val_type fastAccessDx(int i, int j) const {
1620  return if_then_else( cond, val_type(0.0), expr2.fastAccessDx(i,j) );
1621  }
1622 
1623  protected:
1624 
1625  const CondT& cond;
1626  const typename ConstTypeRef<ConstT,value_type>::type c;
1627  const T2& expr2;
1628  };
1629 
1630  }
1631  }
1632 }
1633 
1634 #undef FAD_BINARYOP_MACRO
1635 
1636 #endif // SACADO_FAD_EXP_MP_VECTOR_HPP
expr expr ASinhOp
expr expr TanhOp
KOKKOS_INLINE_FUNCTION IfThenElseOp(const CondT &cond_, const ConstT &c_, const T2 &expr2_)
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 MultiplicationOp
expr expr ATanhOp
atanh(expr.val())
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 j expr1 expr1 expr1 expr1 j expr1 c *expr2 expr1 c expr1 c expr1 c expr1 DivisionOp
KOKKOS_INLINE_FUNCTION const val_type & fastAccessDx(int i, int j) const
Returns derivative component i without bounds checking.
KOKKOS_INLINE_FUNCTION PCE< Storage > pow(const PCE< Storage > &a, const PCE< Storage > &b)
log10(expr.val())
cosh(expr.val())
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j) - expr2.val(j)
abs(expr.val())
KOKKOS_INLINE_FUNCTION IfThenElseOp(const CondT &cond_, const T1 &expr1_, const ConstT &c_)
expr expr ASinOp
atan(expr.val())
sqrt(expr.val())
tan(expr.val())
sinh(expr.val())
#define FAD_BINARYOP_MACRO(OPNAME, OP, USING, MPVALUE, VALUE, DX, CDX1, CDX2, FASTACCESSDX, MPVAL_CONST_DX_1, MPVAL_CONST_DX_2, VAL_CONST_DX_1, VAL_CONST_DX_2, CONST_DX_1, CONST_DX_2, CONST_FASTACCESSDX_1, CONST_FASTACCESSDX_2)
asin(expr.val())
expr expr TanOp
atan2(expr1.val(), expr2.val())
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
fabs(expr.val())
asinh(expr.val())
expr2 j expr1 expr1 expr2 expr2 j expr1 c c c c MaxOp
acos(expr.val())
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:260
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 j expr1 expr1 expr1 expr1 j expr1 c *expr2 expr1 c expr1 c expr1 c expr1 expr1 expr1 expr1 j *expr1 expr2 expr1 expr1 j *expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 Atan2Op
expr expr CoshOp
KOKKOS_INLINE_FUNCTION PCE< Storage > max(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
expr expr expr expr fastAccessDx(i, j)) FAD_UNARYOP_MACRO(exp
tanh(expr.val())
#define FAD_UNARYOP_MACRO(OPNAME, OP, USING, MPVALUE, VALUE, DX, FASTACCESSDX)
cos(expr.val())
if_then_else(expr.val() >=0, expr.dx(i, j), value_type(-expr.dx(i, j)))
expr expr CosOp
expr expr expr dx(i, j)
expr2 j expr1 expr1 expr2 expr2 j expr1 c c c c MinOp
expr expr SinhOp
expr val()
sin(expr.val())
exp(expr.val())
acosh(expr.val())
expr expr SinOp
KOKKOS_INLINE_FUNCTION val_type & fastAccessDx(int i, int j)
Returns derivative component i without bounds checking.
expr expr expr expr ExpOp
expr expr SqrtOp
expr expr AbsOp
expr expr ACoshOp
expr expr ATanOp
KOKKOS_INLINE_FUNCTION PCE< Storage > log(const PCE< Storage > &a)
Expression template specialization tag for Fad< MP::Vector >
KOKKOS_INLINE_FUNCTION IfThenElseOp(const CondT &cond_, const T1 &expr1_, const T2 &expr2_)
cbrt(expr.val())
KOKKOS_INLINE_FUNCTION val_type dx(int i, int j) const
Returns derivative component i with bounds checking.
expr expr ACosOp