32 #ifndef FADBLASUNITTESTS_HPP 33 #define FADBLASUNITTESTS_HPP 41 #include <cppunit/extensions/HelperMacros.h> 43 #define COMPARE_VALUES(a, b) \ 44 CPPUNIT_ASSERT( std::abs(a-b) < this->tol_a + this->tol_r*std::abs(a) ); 46 #define COMPARE_FADS(a, b) \ 47 CPPUNIT_ASSERT(a.size() == b.size()); \ 48 CPPUNIT_ASSERT(a.hasFastAccess() == b.hasFastAccess()); \ 49 COMPARE_VALUES(a.val(), b.val()); \ 50 for (int k=0; k<a.size(); k++) { \ 51 COMPARE_VALUES(a.dx(k), b.dx(k)); \ 52 COMPARE_VALUES(a.fastAccessDx(k), b.fastAccessDx(k)); \ 56 #define COMPARE_FAD_VECTORS(X1, X2, n) \ 57 CPPUNIT_ASSERT(X1.size() == std::size_t(n)); \ 58 CPPUNIT_ASSERT(X2.size() == std::size_t(n)); \ 59 for (unsigned int i=0; i<n; i++) { \ 60 COMPARE_FADS(X1[i], X2[i]); \ 65 template <
class FadType,
class ScalarType>
162 double absolute_tolerance,
double relative_tolerance);
275 template <
class FadType,
class ScalarType>
278 urand(), real_urand(), m(5), n(6), l(4), ndot(7), tol_a(1.0e-11), tol_r(1.0e-11) {}
280 template <
class FadType,
class ScalarType>
283 double relative_tolerance) :
290 tol_a(absolute_tolerance),
291 tol_r(relative_tolerance) {}
293 template <
class FadType,
class ScalarType>
297 template <
class FadType,
class ScalarType>
302 template <
class FadType,
class ScalarType>
306 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
307 for (
unsigned int i=0; i<m; i++) {
308 ScalarType
val = urand.number();
312 for (
unsigned int k=0; k<ndot; k++) {
313 val = urand.number();
314 x1[i].fastAccessDx(k) =
val;
315 x2[i].fastAccessDx(k) =
val;
316 x3[i].fastAccessDx(k) =
val;
319 FadType alpha(ndot, urand.number());
320 for (
unsigned int k=0; k<ndot; k++) {
321 alpha.fastAccessDx(k) = urand.number();
324 Teuchos::BLAS<int,FadType> teuchos_blas;
325 teuchos_blas.SCAL(m, alpha, &x1[0], 1);
327 Teuchos::BLAS<int,FadType> sacado_blas(
false);
328 sacado_blas.SCAL(m, alpha, &x2[0], 1);
332 unsigned int sz = m*(1+ndot);
333 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
334 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
340 template <
class FadType,
class ScalarType>
344 unsigned int incx = 2;
345 VectorType x1(m*incx,ndot), x2(m*incx,ndot), x3(m*incx,ndot);
346 for (
unsigned int i=0; i<m*incx; i++) {
347 ScalarType
val = urand.number();
351 for (
unsigned int k=0; k<ndot; k++) {
352 val = urand.number();
353 x1[i].fastAccessDx(k) =
val;
354 x2[i].fastAccessDx(k) =
val;
355 x3[i].fastAccessDx(k) =
val;
358 FadType alpha(ndot, urand.number());
359 for (
unsigned int k=0; k<ndot; k++) {
360 alpha.fastAccessDx(k) = urand.number();
363 Teuchos::BLAS<int,FadType> teuchos_blas;
364 teuchos_blas.SCAL(m, alpha, &x1[0], incx);
366 Teuchos::BLAS<int,FadType> sacado_blas(
false);
367 sacado_blas.SCAL(m, alpha, &x2[0], incx);
371 unsigned int sz = m*(1+ndot);
372 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
373 sacado_blas2.SCAL(m, alpha, &x3[0], incx);
379 template <
class FadType,
class ScalarType>
383 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
384 for (
unsigned int i=0; i<m; i++) {
385 ScalarType
val = urand.number();
389 for (
unsigned int k=0; k<ndot; k++) {
390 val = urand.number();
391 x1[i].fastAccessDx(k) =
val;
392 x2[i].fastAccessDx(k) =
val;
393 x3[i].fastAccessDx(k) =
val;
396 ScalarType alpha = urand.number();
398 Teuchos::BLAS<int,FadType> teuchos_blas;
399 teuchos_blas.SCAL(m, alpha, &x1[0], 1);
401 Teuchos::BLAS<int,FadType> sacado_blas(
false);
402 sacado_blas.SCAL(m, alpha, &x2[0], 1);
406 unsigned int sz = m*(1+ndot);
407 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
408 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
414 template <
class FadType,
class ScalarType>
418 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
419 for (
unsigned int i=0; i<m; i++) {
420 ScalarType
val = urand.number();
426 for (
unsigned int k=0; k<ndot; k++)
427 alpha.fastAccessDx(k) = urand.number();
429 Teuchos::BLAS<int,FadType> teuchos_blas;
430 teuchos_blas.SCAL(m, alpha, &x1[0], 1);
432 Teuchos::BLAS<int,FadType> sacado_blas(
false);
433 sacado_blas.SCAL(m, alpha, &x2[0], 1);
437 unsigned int sz = m*(1+ndot);
438 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
439 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
445 template <
class FadType,
class ScalarType>
449 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
450 for (
unsigned int i=0; i<m; i++) {
451 x[i] =
FadType(ndot, urand.number());
452 ScalarType
val = urand.number();
456 for (
unsigned int k=0; k<ndot; k++) {
457 x[i].fastAccessDx(k) = urand.number();
458 val = urand.number();
459 y1[i].fastAccessDx(k) =
val;
460 y2[i].fastAccessDx(k) =
val;
461 y3[i].fastAccessDx(k) =
val;
465 Teuchos::BLAS<int,FadType> teuchos_blas;
466 teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
468 Teuchos::BLAS<int,FadType> sacado_blas(
false);
469 sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
473 unsigned int sz = 2*m*(1+ndot);
474 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
475 sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
481 template <
class FadType,
class ScalarType>
485 unsigned int incx = 2;
486 unsigned int incy = 3;
487 VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
488 for (
unsigned int i=0; i<m*incx; i++) {
489 x[i] =
FadType(ndot, urand.number());
490 for (
unsigned int k=0; k<ndot; k++) {
491 x[i].fastAccessDx(k) = urand.number();
494 for (
unsigned int i=0; i<m*incy; i++) {
495 ScalarType
val = urand.number();
499 for (
unsigned int k=0; k<ndot; k++) {
500 val = urand.number();
501 y1[i].fastAccessDx(k) =
val;
502 y2[i].fastAccessDx(k) =
val;
503 y3[i].fastAccessDx(k) =
val;
507 Teuchos::BLAS<int,FadType> teuchos_blas;
508 teuchos_blas.COPY(m, &x[0], incx, &y1[0], incy);
510 Teuchos::BLAS<int,FadType> sacado_blas(
false);
511 sacado_blas.COPY(m, &x[0], incx, &y2[0], incy);
515 unsigned int sz = 2*m*(1+ndot);
516 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
517 sacado_blas2.COPY(m, &x[0], incx, &y3[0], incy);
523 template <
class FadType,
class ScalarType>
527 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
528 for (
unsigned int i=0; i<m; i++) {
529 x[i] = urand.number();
531 for (
unsigned int i=0; i<m; i++) {
532 ScalarType
val = urand.number();
536 for (
unsigned int k=0; k<ndot; k++) {
537 val = urand.number();
538 y1[i].fastAccessDx(k) =
val;
539 y2[i].fastAccessDx(k) =
val;
540 y3[i].fastAccessDx(k) =
val;
544 Teuchos::BLAS<int,FadType> teuchos_blas;
545 teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
547 Teuchos::BLAS<int,FadType> sacado_blas(
false);
548 sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
552 unsigned int sz = 2*m*(1+ndot);
553 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
554 sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
560 template <
class FadType,
class ScalarType>
564 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
565 for (
unsigned int i=0; i<m; i++) {
566 x[i] =
FadType(ndot, urand.number());
567 ScalarType
val = urand.number();
571 for (
unsigned int k=0; k<ndot; k++) {
572 x[i].fastAccessDx(k) = urand.number();
576 Teuchos::BLAS<int,FadType> teuchos_blas;
577 teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
579 Teuchos::BLAS<int,FadType> sacado_blas(
false);
580 sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
584 unsigned int sz = 2*m*(1+ndot);
585 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
586 sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
592 template <
class FadType,
class ScalarType>
596 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
597 for (
unsigned int i=0; i<m; i++) {
598 x[i] =
FadType(ndot, urand.number());
599 ScalarType
val = urand.number();
603 for (
unsigned int k=0; k<ndot; k++) {
604 x[i].fastAccessDx(k) = urand.number();
605 val = urand.number();
606 y1[i].fastAccessDx(k) =
val;
607 y2[i].fastAccessDx(k) =
val;
608 y3[i].fastAccessDx(k) =
val;
611 FadType alpha(ndot, urand.number());
612 for (
unsigned int k=0; k<ndot; k++)
613 alpha.fastAccessDx(k) = urand.number();
615 Teuchos::BLAS<int,FadType> teuchos_blas;
616 teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
618 Teuchos::BLAS<int,FadType> sacado_blas(
false);
619 sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
623 unsigned int sz = 2*m*(1+ndot);
624 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
625 sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
631 template <
class FadType,
class ScalarType>
635 unsigned int incx = 2;
636 unsigned int incy = 3;
637 VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
638 for (
unsigned int i=0; i<m*incx; i++) {
639 x[i] =
FadType(ndot, urand.number());
640 for (
unsigned int k=0; k<ndot; k++) {
641 x[i].fastAccessDx(k) = urand.number();
644 for (
unsigned int i=0; i<m*incy; i++) {
645 ScalarType
val = urand.number();
649 for (
unsigned int k=0; k<ndot; k++) {
650 val = urand.number();
651 y1[i].fastAccessDx(k) =
val;
652 y2[i].fastAccessDx(k) =
val;
653 y3[i].fastAccessDx(k) =
val;
656 FadType alpha(ndot, urand.number());
657 for (
unsigned int k=0; k<ndot; k++)
658 alpha.fastAccessDx(k) = urand.number();
660 Teuchos::BLAS<int,FadType> teuchos_blas;
661 teuchos_blas.AXPY(m, alpha, &x[0], incx, &y1[0], incy);
663 Teuchos::BLAS<int,FadType> sacado_blas(
false);
664 sacado_blas.AXPY(m, alpha, &x[0], incx, &y2[0], incy);
668 unsigned int sz = 2*m*(1+ndot);
669 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
670 sacado_blas2.AXPY(m, alpha, &x[0], incx, &y3[0], incy);
676 template <
class FadType,
class ScalarType>
680 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot), y4(m,ndot);
681 std::vector<ScalarType> xx(m);
682 for (
unsigned int i=0; i<m; i++) {
683 xx[i] = urand.number();
685 ScalarType
val = urand.number();
690 for (
unsigned int k=0; k<ndot; k++) {
691 val = urand.number();
692 y1[i].fastAccessDx(k) =
val;
693 y2[i].fastAccessDx(k) =
val;
694 y3[i].fastAccessDx(k) =
val;
695 y4[i].fastAccessDx(k) =
val;
698 FadType alpha(ndot, urand.number());
699 for (
unsigned int k=0; k<ndot; k++)
700 alpha.fastAccessDx(k) = urand.number();
702 Teuchos::BLAS<int,FadType> teuchos_blas;
703 teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
705 Teuchos::BLAS<int,FadType> sacado_blas(
false);
706 sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
710 unsigned int sz = m*(1+ndot)+m;
711 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
712 sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
716 sacado_blas.AXPY(m, alpha, &xx[0], 1, &y4[0], 1);
722 template <
class FadType,
class ScalarType>
726 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
727 for (
unsigned int i=0; i<m; i++) {
728 x[i] =
FadType(ndot, urand.number());
729 ScalarType
val = urand.number();
733 for (
unsigned int k=0; k<ndot; k++) {
734 x[i].fastAccessDx(k) = urand.number();
737 FadType alpha(ndot, urand.number());
738 for (
unsigned int k=0; k<ndot; k++)
739 alpha.fastAccessDx(k) = urand.number();
741 Teuchos::BLAS<int,FadType> teuchos_blas;
742 teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
744 Teuchos::BLAS<int,FadType> sacado_blas(
false);
745 sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
749 unsigned int sz = 2*m*(1+ndot);
750 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
751 sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
757 template <
class FadType,
class ScalarType>
762 for (
unsigned int i=0; i<m; i++) {
763 X[i] =
FadType(ndot, real_urand.number());
764 Y[i] =
FadType(ndot, real_urand.number());
765 for (
unsigned int k=0; k<ndot; k++) {
766 X[i].fastAccessDx(k) = real_urand.number();
767 Y[i].fastAccessDx(k) = real_urand.number();
771 Teuchos::BLAS<int,FadType> teuchos_blas;
772 FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
774 Teuchos::BLAS<int,FadType> sacado_blas(
false);
775 FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
779 unsigned int sz = 2*m*(1+ndot);
780 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
781 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
787 template <
class FadType,
class ScalarType>
791 unsigned int incx = 2;
792 unsigned int incy = 3;
794 for (
unsigned int i=0; i<m*incx; i++) {
795 X[i] =
FadType(ndot, real_urand.number());
796 for (
unsigned int k=0; k<ndot; k++) {
797 X[i].fastAccessDx(k) = real_urand.number();
800 for (
unsigned int i=0; i<m*incy; i++) {
801 Y[i] =
FadType(ndot, real_urand.number());
802 for (
unsigned int k=0; k<ndot; k++) {
803 Y[i].fastAccessDx(k) = real_urand.number();
807 Teuchos::BLAS<int,FadType> teuchos_blas;
808 FadType z1 = teuchos_blas.DOT(m, &X[0], incx, &Y[0], incy);
810 Teuchos::BLAS<int,FadType> sacado_blas(
false);
811 FadType z2 = sacado_blas.DOT(m, &X[0], incx, &Y[0], incy);
815 unsigned int sz = 2*m*(1+ndot);
816 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
817 FadType z3 = sacado_blas2.DOT(m, &X[0], incx, &Y[0], incy);
823 template <
class FadType,
class ScalarType>
828 std::vector<ScalarType> x(m);
829 for (
unsigned int i=0; i<m; i++) {
830 x[i] = urand.number();
832 Y[i] =
FadType(ndot, real_urand.number());
833 for (
unsigned int k=0; k<ndot; k++) {
834 Y[i].fastAccessDx(k) = real_urand.number();
838 Teuchos::BLAS<int,FadType> teuchos_blas;
839 FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
841 Teuchos::BLAS<int,FadType> sacado_blas(
false);
842 FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
846 unsigned int sz = 2*m*(1+ndot);
847 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
848 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
852 FadType z4 = sacado_blas.DOT(m, &x[0], 1, &Y[0], 1);
858 template <
class FadType,
class ScalarType>
863 std::vector<ScalarType> y(m);
864 for (
unsigned int i=0; i<m; i++) {
865 X[i] =
FadType(ndot, real_urand.number());
866 y[i] = urand.number();
868 for (
unsigned int k=0; k<ndot; k++) {
869 X[i].fastAccessDx(k) = real_urand.number();
873 Teuchos::BLAS<int,FadType> teuchos_blas;
874 FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
876 Teuchos::BLAS<int,FadType> sacado_blas(
false);
877 FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
881 unsigned int sz = 2*m*(1+ndot);
882 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
883 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
887 FadType z4 = sacado_blas.DOT(m, &X[0], 1, &y[0], 1);
893 template <
class FadType,
class ScalarType>
898 for (
unsigned int i=0; i<m; i++) {
899 X[i] =
FadType(ndot, real_urand.number());
900 for (
unsigned int k=0; k<ndot; k++) {
901 X[i].fastAccessDx(k) = real_urand.number();
905 Teuchos::BLAS<int,FadType> teuchos_blas;
906 typename Teuchos::ScalarTraits<FadType>::magnitudeType z1 =
907 teuchos_blas.NRM2(m, &X[0], 1);
909 Teuchos::BLAS<int,FadType> sacado_blas(
false);
910 typename Teuchos::ScalarTraits<FadType>::magnitudeType z2 =
911 sacado_blas.NRM2(m, &X[0], 1);
915 unsigned int sz = m*(1+ndot);
916 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
917 typename Teuchos::ScalarTraits<FadType>::magnitudeType z3 =
918 sacado_blas2.NRM2(m, &X[0], 1);
924 template <
class FadType,
class ScalarType>
928 unsigned int incx = 2;
930 for (
unsigned int i=0; i<m*incx; i++) {
931 X[i] =
FadType(ndot, real_urand.number());
932 for (
unsigned int k=0; k<ndot; k++) {
933 X[i].fastAccessDx(k) = real_urand.number();
937 Teuchos::BLAS<int,FadType> teuchos_blas;
938 typename Teuchos::ScalarTraits<FadType>::magnitudeType z1 =
939 teuchos_blas.NRM2(m, &X[0], incx);
941 Teuchos::BLAS<int,FadType> sacado_blas(
false);
942 typename Teuchos::ScalarTraits<FadType>::magnitudeType z2 =
943 sacado_blas.NRM2(m, &X[0], incx);
947 unsigned int sz = m*(1+ndot);
948 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
949 typename Teuchos::ScalarTraits<FadType>::magnitudeType z3 =
950 sacado_blas2.NRM2(m, &X[0], incx);
956 template <
class FadType,
class ScalarType>
960 VectorType A(m*n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
961 for (
unsigned int j=0; j<n; j++) {
962 for (
unsigned int i=0; i<m; i++) {
963 A[i+j*m] =
FadType(ndot, urand.number());
964 for (
unsigned int k=0; k<ndot; k++)
967 B[j] =
FadType(ndot, urand.number());
968 for (
unsigned int k=0; k<ndot; k++)
971 FadType alpha(ndot, urand.number());
972 FadType beta(ndot, urand.number());
973 for (
unsigned int k=0; k<ndot; k++) {
974 alpha.fastAccessDx(k) = urand.number();
975 beta.fastAccessDx(k) = urand.number();
978 for (
unsigned int i=0; i<m; i++) {
979 ScalarType
val = urand.number();
983 for (
unsigned int k=0; k<ndot; k++) {
984 val = urand.number();
985 C1[i].fastAccessDx(k) =
val;
986 C2[i].fastAccessDx(k) =
val;
987 C3[i].fastAccessDx(k) =
val;
991 Teuchos::BLAS<int,FadType> teuchos_blas;
992 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
995 Teuchos::BLAS<int,FadType> sacado_blas(
false);
996 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1001 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1002 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1003 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1010 template <
class FadType,
class ScalarType>
1014 unsigned int lda = m+3;
1015 unsigned int incb = 2;
1016 unsigned int incc = 3;
1017 VectorType A(lda*n,ndot),
B(n*incb,ndot), C1(m*incc,ndot), C2(m*incc,ndot),
1019 for (
unsigned int j=0; j<n; j++) {
1020 for (
unsigned int i=0; i<lda; i++) {
1021 A[i+j*lda] =
FadType(ndot, urand.number());
1022 for (
unsigned int k=0; k<ndot; k++)
1026 for (
unsigned int j=0; j<n*incb; j++) {
1027 B[j] =
FadType(ndot, urand.number());
1028 for (
unsigned int k=0; k<ndot; k++)
1031 FadType alpha(ndot, urand.number());
1032 FadType beta(ndot, urand.number());
1033 for (
unsigned int k=0; k<ndot; k++) {
1034 alpha.fastAccessDx(k) = urand.number();
1035 beta.fastAccessDx(k) = urand.number();
1038 for (
unsigned int i=0; i<m*incc; i++) {
1039 ScalarType
val = urand.number();
1043 for (
unsigned int k=0; k<ndot; k++) {
1044 val = urand.number();
1045 C1[i].fastAccessDx(k) =
val;
1046 C2[i].fastAccessDx(k) =
val;
1047 C3[i].fastAccessDx(k) =
val;
1051 Teuchos::BLAS<int,FadType> teuchos_blas;
1052 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], lda, &
B[0], incb,
1053 beta, &C1[0], incc);
1055 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1056 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], lda, &
B[0], incb,
1057 beta, &C2[0], incc);
1061 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1062 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1063 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], lda, &
B[0], incb,
1064 beta, &C3[0], incc);
1070 template <
class FadType,
class ScalarType>
1074 VectorType A(m*n,ndot),
B(m,ndot), C1(n,ndot), C2(n,ndot), C3(n,ndot);
1075 for (
unsigned int j=0; j<n; j++) {
1076 for (
unsigned int i=0; i<m; i++) {
1077 A[i+j*m] =
FadType(ndot, urand.number());
1078 for (
unsigned int k=0; k<ndot; k++)
1082 for (
unsigned int j=0; j<m; j++) {
1083 B[j] =
FadType(ndot, urand.number());
1084 for (
unsigned int k=0; k<ndot; k++)
1087 FadType alpha(ndot, urand.number());
1088 FadType beta(ndot, urand.number());
1089 for (
unsigned int k=0; k<ndot; k++) {
1090 alpha.fastAccessDx(k) = urand.number();
1091 beta.fastAccessDx(k) = urand.number();
1094 for (
unsigned int i=0; i<n; i++) {
1095 ScalarType
val = urand.number();
1099 for (
unsigned int k=0; k<ndot; k++) {
1100 val = urand.number();
1101 C1[i].fastAccessDx(k) =
val;
1102 C2[i].fastAccessDx(k) =
val;
1103 C3[i].fastAccessDx(k) =
val;
1107 Teuchos::BLAS<int,FadType> teuchos_blas;
1108 teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1111 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1112 sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1117 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1118 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1119 sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1126 template <
class FadType,
class ScalarType>
1130 unsigned int lda = m+3;
1131 unsigned int incb = 2;
1132 unsigned int incc = 3;
1133 VectorType A(lda*n,ndot),
B(m*incb,ndot), C1(n*incc,ndot), C2(n*incc,ndot),
1135 for (
unsigned int j=0; j<n; j++) {
1136 for (
unsigned int i=0; i<lda; i++) {
1137 A[i+j*lda] =
FadType(ndot, urand.number());
1138 for (
unsigned int k=0; k<ndot; k++)
1142 for (
unsigned int j=0; j<m*incb; j++) {
1143 B[j] =
FadType(ndot, urand.number());
1144 for (
unsigned int k=0; k<ndot; k++)
1147 FadType alpha(ndot, urand.number());
1148 FadType beta(ndot, urand.number());
1149 for (
unsigned int k=0; k<ndot; k++) {
1150 alpha.fastAccessDx(k) = urand.number();
1151 beta.fastAccessDx(k) = urand.number();
1154 for (
unsigned int i=0; i<n*incc; i++) {
1155 ScalarType
val = urand.number();
1159 for (
unsigned int k=0; k<ndot; k++) {
1160 val = urand.number();
1161 C1[i].fastAccessDx(k) =
val;
1162 C2[i].fastAccessDx(k) =
val;
1163 C3[i].fastAccessDx(k) =
val;
1167 Teuchos::BLAS<int,FadType> teuchos_blas;
1168 teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &
A[0], lda, &
B[0], incb,
1169 beta, &C1[0], incc);
1171 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1172 sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &
A[0], lda, &
B[0], incb,
1173 beta, &C2[0], incc);
1177 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1178 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1179 sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &
A[0], lda, &
B[0], incb,
1180 beta, &C3[0], incc);
1186 template <
class FadType,
class ScalarType>
1190 VectorType A(m*n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1191 for (
unsigned int j=0; j<n; j++) {
1192 for (
unsigned int i=0; i<m; i++) {
1193 A[i+j*m] =
FadType(ndot, urand.number());
1194 for (
unsigned int k=0; k<ndot; k++)
1197 B[j] =
FadType(ndot, urand.number());
1198 for (
unsigned int k=0; k<ndot; k++)
1201 FadType alpha(ndot, urand.number());
1202 FadType beta(ndot, urand.number());
1203 for (
unsigned int k=0; k<ndot; k++) {
1204 alpha.fastAccessDx(k) = urand.number();
1205 beta.fastAccessDx(k) = urand.number();
1208 for (
unsigned int i=0; i<m; i++) {
1209 ScalarType
val = urand.number();
1215 Teuchos::BLAS<int,FadType> teuchos_blas;
1216 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1219 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1220 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1225 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1226 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1227 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1234 template <
class FadType,
class ScalarType>
1238 VectorType A(m*n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1239 for (
unsigned int j=0; j<n; j++) {
1240 for (
unsigned int i=0; i<m; i++) {
1241 A[i+j*m] =
FadType(ndot, urand.number());
1242 for (
unsigned int k=0; k<ndot; k++)
1245 B[j] =
FadType(ndot, urand.number());
1246 for (
unsigned int k=0; k<ndot; k++)
1249 ScalarType alpha = urand.number();
1250 ScalarType beta = urand.number();
1252 for (
unsigned int i=0; i<m; i++) {
1253 ScalarType
val = urand.number();
1257 for (
unsigned int k=0; k<ndot; k++) {
1258 val = urand.number();
1259 C1[i].fastAccessDx(k) =
val;
1260 C2[i].fastAccessDx(k) =
val;
1261 C3[i].fastAccessDx(k) =
val;
1265 Teuchos::BLAS<int,FadType> teuchos_blas;
1266 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1269 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1270 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1275 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1276 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1277 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1284 template <
class FadType,
class ScalarType>
1288 VectorType A(m*n,ndot),
B(n,0), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1290 std::vector<ScalarType> b(n);
1291 for (
unsigned int j=0; j<n; j++) {
1292 for (
unsigned int i=0; i<m; i++) {
1293 A[i+j*m] =
FadType(ndot, urand.number());
1294 for (
unsigned int k=0; k<ndot; k++)
1297 b[j] = urand.number();
1300 FadType alpha(ndot, urand.number());
1301 FadType beta(ndot, urand.number());
1302 for (
unsigned int k=0; k<ndot; k++) {
1303 alpha.fastAccessDx(k) = urand.number();
1304 beta.fastAccessDx(k) = urand.number();
1307 for (
unsigned int i=0; i<m; i++) {
1308 ScalarType
val = urand.number();
1313 for (
unsigned int k=0; k<ndot; k++) {
1314 val = urand.number();
1315 C1[i].fastAccessDx(k) =
val;
1316 C2[i].fastAccessDx(k) =
val;
1317 C3[i].fastAccessDx(k) =
val;
1318 C4[i].fastAccessDx(k) =
val;
1322 Teuchos::BLAS<int,FadType> teuchos_blas;
1323 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1326 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1327 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1332 unsigned int sz = m*n*(1+ndot) + n + m*(1+ndot);
1333 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1334 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1339 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &b[0], 1,
1346 template <
class FadType,
class ScalarType>
1350 VectorType A(m*n,0),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1352 std::vector<ScalarType>
a(m*n);
1353 for (
unsigned int j=0; j<n; j++) {
1354 for (
unsigned int i=0; i<m; i++) {
1355 a[i+j*m] = urand.number();
1356 A[i+j*m] =
a[i+j*m];
1358 B[j] =
FadType(ndot, urand.number());
1359 for (
unsigned int k=0; k<ndot; k++)
1362 FadType alpha(ndot, urand.number());
1363 FadType beta(ndot, urand.number());
1364 for (
unsigned int k=0; k<ndot; k++) {
1365 alpha.fastAccessDx(k) = urand.number();
1366 beta.fastAccessDx(k) = urand.number();
1369 for (
unsigned int i=0; i<m; i++) {
1370 ScalarType
val = urand.number();
1375 for (
unsigned int k=0; k<ndot; k++) {
1376 val = urand.number();
1377 C1[i].fastAccessDx(k) =
val;
1378 C2[i].fastAccessDx(k) =
val;
1379 C3[i].fastAccessDx(k) =
val;
1380 C4[i].fastAccessDx(k) =
val;
1384 Teuchos::BLAS<int,FadType> teuchos_blas;
1385 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1388 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1389 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1394 unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1395 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1396 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1401 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
a[0], m, &
B[0], 1,
1408 template <
class FadType,
class ScalarType>
1412 VectorType A(m*n,0),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1414 std::vector<ScalarType>
a(m*n), b(n);
1415 for (
unsigned int j=0; j<n; j++) {
1416 for (
unsigned int i=0; i<m; i++) {
1417 a[i+j*m] = urand.number();
1418 A[i+j*m] =
a[i+j*m];
1420 b[j] = urand.number();
1423 FadType alpha(ndot, urand.number());
1424 FadType beta(ndot, urand.number());
1425 for (
unsigned int k=0; k<ndot; k++) {
1426 alpha.fastAccessDx(k) = urand.number();
1427 beta.fastAccessDx(k) = urand.number();
1430 for (
unsigned int i=0; i<m; i++) {
1431 ScalarType
val = urand.number();
1436 for (
unsigned int k=0; k<ndot; k++) {
1437 val = urand.number();
1438 C1[i].fastAccessDx(k) =
val;
1439 C2[i].fastAccessDx(k) =
val;
1440 C3[i].fastAccessDx(k) =
val;
1441 C4[i].fastAccessDx(k) =
val;
1445 Teuchos::BLAS<int,FadType> teuchos_blas;
1446 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1449 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1450 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1455 unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1456 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1457 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1462 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
a[0], m, &b[0], 1,
1469 template <
class FadType,
class ScalarType>
1473 VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1474 for (
unsigned int j=0; j<n; j++) {
1475 for (
unsigned int i=0; i<n; i++) {
1476 A[i+j*n] =
FadType(ndot, urand.number());
1477 for (
unsigned int k=0; k<ndot; k++)
1480 ScalarType
val = urand.number();
1484 for (
unsigned int k=0; k<ndot; k++) {
1485 val = urand.number();
1486 x1[j].fastAccessDx(k) =
val;
1487 x2[j].fastAccessDx(k) =
val;
1488 x3[j].fastAccessDx(k) =
val;
1492 Teuchos::BLAS<int,FadType> teuchos_blas;
1493 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1494 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1496 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1497 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1498 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1502 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1503 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1504 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1505 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1509 teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1510 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1511 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1512 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1513 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1514 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1518 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1519 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1520 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1521 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1522 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1523 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1527 for (
unsigned int i=0; i<n; i++) {
1528 A[i*n+i].val() = 1.0;
1529 for (
unsigned int k=0; k<ndot; k++)
1532 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1533 Teuchos::UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1534 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1535 Teuchos::UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1536 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1537 Teuchos::UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1543 template <
class FadType,
class ScalarType>
1547 unsigned int lda = n+3;
1548 unsigned int incx = 2;
1549 VectorType A(lda*n,ndot), x1(n*incx,ndot), x2(n*incx,ndot), x3(n*incx,ndot);
1550 for (
unsigned int j=0; j<n; j++) {
1551 for (
unsigned int i=0; i<lda; i++) {
1552 A[i+j*lda] =
FadType(ndot, urand.number());
1553 for (
unsigned int k=0; k<ndot; k++)
1557 for (
unsigned int j=0; j<n*incx; j++) {
1558 ScalarType
val = urand.number();
1562 for (
unsigned int k=0; k<ndot; k++) {
1563 val = urand.number();
1564 x1[j].fastAccessDx(k) =
val;
1565 x2[j].fastAccessDx(k) =
val;
1566 x3[j].fastAccessDx(k) =
val;
1570 Teuchos::BLAS<int,FadType> teuchos_blas;
1571 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1572 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x1[0], incx);
1574 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1575 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1576 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x2[0], incx);
1580 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1581 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1582 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1583 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x3[0], incx);
1587 teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1588 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x1[0], incx);
1589 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1590 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x2[0], incx);
1591 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1592 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x3[0], incx);
1596 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1597 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x1[0], incx);
1598 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1599 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x2[0], incx);
1600 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1601 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x3[0], incx);
1605 for (
unsigned int i=0; i<n; i++) {
1606 A[i*lda+i].val() = 1.0;
1607 for (
unsigned int k=0; k<ndot; k++)
1610 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1611 Teuchos::UNIT_DIAG, n, &
A[0], lda, &x1[0], incx);
1612 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1613 Teuchos::UNIT_DIAG, n, &
A[0], lda, &x2[0], incx);
1614 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1615 Teuchos::UNIT_DIAG, n, &
A[0], lda, &x3[0], incx);
1621 template <
class FadType,
class ScalarType>
1625 VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot), x4(n,ndot),
1627 std::vector<ScalarType>
a(n*n);
1628 for (
unsigned int j=0; j<n; j++) {
1629 for (
unsigned int i=0; i<n; i++) {
1630 a[i+j*n] = urand.number();
1631 A[i+j*n] =
a[i+j*n];
1633 ScalarType
val = urand.number();
1639 for (
unsigned int k=0; k<ndot; k++) {
1640 val = urand.number();
1641 x1[j].fastAccessDx(k) =
val;
1642 x2[j].fastAccessDx(k) =
val;
1643 x3[j].fastAccessDx(k) =
val;
1644 x4[j].fastAccessDx(k) =
val;
1645 x5[j].fastAccessDx(k) =
val;
1649 Teuchos::BLAS<int,FadType> teuchos_blas;
1650 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1651 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1653 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1654 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1655 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1659 unsigned int sz = n*n+n*(1+ndot);
1660 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1661 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1662 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1666 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1667 Teuchos::NON_UNIT_DIAG, n, &
a[0], n, &x4[0], 1);
1671 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1672 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x5[0], 1);
1676 teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1677 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1678 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1679 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1680 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1681 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1682 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1683 Teuchos::NON_UNIT_DIAG, n, &
a[0], n, &x4[0], 1);
1684 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1685 Teuchos::NON_UNIT_DIAG, n, &
a[0], n, &x5[0], 1);
1691 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1692 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1693 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1694 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1695 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1696 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1697 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1698 Teuchos::NON_UNIT_DIAG, n, &
a[0], n, &x4[0], 1);
1699 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1700 Teuchos::NON_UNIT_DIAG, n, &
a[0], n, &x5[0], 1);
1706 for (
unsigned int i=0; i<n; i++) {
1707 A[i*n+i].val() = 1.0;
1708 for (
unsigned int k=0; k<ndot; k++)
1711 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1712 Teuchos::UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1713 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1714 Teuchos::UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1715 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1716 Teuchos::UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1717 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1718 Teuchos::UNIT_DIAG, n, &
a[0], n, &x4[0], 1);
1719 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1720 Teuchos::UNIT_DIAG, n, &
a[0], n, &x5[0], 1);
1728 template <
class FadType,
class ScalarType>
1732 VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1733 for (
unsigned int j=0; j<n; j++) {
1734 for (
unsigned int i=0; i<n; i++) {
1735 A[i+j*n] =
FadType(ndot, urand.number());
1736 for (
unsigned int k=0; k<ndot; k++)
1739 ScalarType
val = urand.number();
1745 Teuchos::BLAS<int,FadType> teuchos_blas;
1746 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1747 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1749 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1750 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1751 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1755 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1756 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1757 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1758 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1762 teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1763 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1764 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1765 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1766 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1767 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1771 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1772 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1773 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1774 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1775 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1776 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1780 for (
unsigned int i=0; i<n; i++) {
1781 A[i*n+i].val() = 1.0;
1782 for (
unsigned int k=0; k<ndot; k++)
1785 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1786 Teuchos::UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1787 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1788 Teuchos::UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1789 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1790 Teuchos::UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1796 template <
class FadType,
class ScalarType>
1802 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1805 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1806 for (
unsigned int j=0; j<n; j++) {
1807 for (
unsigned int i=0; i<m; i++) {
1808 ScalarType
val = urand.number();
1812 for (
unsigned int k=0; k<ndot; k++) {
1813 val = urand.number();
1814 A1[i+j*m].fastAccessDx(k) =
val;
1815 A2[i+j*m].fastAccessDx(k) =
val;
1816 A3[i+j*m].fastAccessDx(k) =
val;
1820 for (
unsigned int i=0; i<m; i++) {
1821 x[i] =
FadType(ndot, urand.number());
1822 for (
unsigned int k=0; k<ndot; k++)
1825 for (
unsigned int i=0; i<n; i++) {
1826 y[i] =
FadType(ndot, urand.number());
1827 for (
unsigned int k=0; k<ndot; k++)
1830 FadType alpha(ndot, urand.number());
1831 for (
unsigned int k=0; k<ndot; k++) {
1832 alpha.fastAccessDx(k) = urand.number();
1835 Teuchos::BLAS<int,FadType> teuchos_blas;
1836 teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1838 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1839 sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1843 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1844 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1845 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1851 template <
class FadType,
class ScalarType>
1857 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1860 unsigned int lda = m+3;
1861 unsigned int incx = 2;
1862 unsigned int incy = 3;
1863 VectorType A1(lda*n,ndot), A2(lda*n,ndot), A3(lda*n,ndot), x(m*incx,ndot),
1865 for (
unsigned int j=0; j<n; j++) {
1866 for (
unsigned int i=0; i<lda; i++) {
1867 ScalarType
val = urand.number();
1871 for (
unsigned int k=0; k<ndot; k++) {
1872 val = urand.number();
1873 A1[i+j*lda].fastAccessDx(k) =
val;
1874 A2[i+j*lda].fastAccessDx(k) =
val;
1875 A3[i+j*lda].fastAccessDx(k) =
val;
1879 for (
unsigned int i=0; i<m*incx; i++) {
1880 x[i] =
FadType(ndot, urand.number());
1881 for (
unsigned int k=0; k<ndot; k++)
1884 for (
unsigned int i=0; i<n*incy; i++) {
1885 y[i] =
FadType(ndot, urand.number());
1886 for (
unsigned int k=0; k<ndot; k++)
1889 FadType alpha(ndot, urand.number());
1890 for (
unsigned int k=0; k<ndot; k++) {
1891 alpha.fastAccessDx(k) = urand.number();
1894 Teuchos::BLAS<int,FadType> teuchos_blas;
1895 teuchos_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A1[0], lda);
1897 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1898 sacado_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A2[0], lda);
1902 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1903 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1904 sacado_blas2.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A3[0], lda);
1910 template <
class FadType,
class ScalarType>
1916 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1919 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1920 for (
unsigned int j=0; j<n; j++) {
1921 for (
unsigned int i=0; i<m; i++) {
1922 ScalarType
val = urand.number();
1926 for (
unsigned int k=0; k<ndot; k++) {
1927 val = urand.number();
1928 A1[i+j*m].fastAccessDx(k) =
val;
1929 A2[i+j*m].fastAccessDx(k) =
val;
1930 A3[i+j*m].fastAccessDx(k) =
val;
1934 for (
unsigned int i=0; i<m; i++) {
1935 x[i] =
FadType(ndot, urand.number());
1936 for (
unsigned int k=0; k<ndot; k++)
1939 for (
unsigned int i=0; i<n; i++) {
1940 y[i] =
FadType(ndot, urand.number());
1941 for (
unsigned int k=0; k<ndot; k++)
1944 ScalarType alpha = urand.number();
1946 Teuchos::BLAS<int,FadType> teuchos_blas;
1947 teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1949 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1950 sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1954 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1955 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1956 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1962 template <
class FadType,
class ScalarType>
1968 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1971 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1972 A5(m*n,ndot), x(m,ndot), y(n,ndot);
1973 std::vector<ScalarType> xx(m);
1974 for (
unsigned int j=0; j<n; j++) {
1975 for (
unsigned int i=0; i<m; i++) {
1976 ScalarType
val = urand.number();
1982 for (
unsigned int k=0; k<ndot; k++) {
1983 val = urand.number();
1984 A1[i+j*m].fastAccessDx(k) =
val;
1985 A2[i+j*m].fastAccessDx(k) =
val;
1986 A3[i+j*m].fastAccessDx(k) =
val;
1987 A4[i+j*m].fastAccessDx(k) =
val;
1988 A5[i+j*m].fastAccessDx(k) =
val;
1992 for (
unsigned int i=0; i<m; i++) {
1993 xx[i] = urand.number();
1996 for (
unsigned int i=0; i<n; i++) {
1997 y[i] =
FadType(ndot, urand.number());
1998 for (
unsigned int k=0; k<ndot; k++)
2001 FadType alpha(ndot, urand.number());
2002 for (
unsigned int k=0; k<ndot; k++) {
2003 alpha.fastAccessDx(k) = urand.number();
2006 Teuchos::BLAS<int,FadType> teuchos_blas;
2007 teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2009 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2010 sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2014 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m;
2015 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2016 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2020 sacado_blas.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A4[0], m);
2024 sacado_blas2.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A5[0], m);
2030 template <
class FadType,
class ScalarType>
2036 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
2039 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
2040 A5(m*n,ndot), x(m,ndot), y(n,ndot);
2041 std::vector<ScalarType> yy(n);
2042 for (
unsigned int j=0; j<n; j++) {
2043 for (
unsigned int i=0; i<m; i++) {
2044 ScalarType
val = urand.number();
2050 for (
unsigned int k=0; k<ndot; k++) {
2051 val = urand.number();
2052 A1[i+j*m].fastAccessDx(k) =
val;
2053 A2[i+j*m].fastAccessDx(k) =
val;
2054 A3[i+j*m].fastAccessDx(k) =
val;
2055 A4[i+j*m].fastAccessDx(k) =
val;
2056 A5[i+j*m].fastAccessDx(k) =
val;
2060 for (
unsigned int i=0; i<m; i++) {
2061 x[i] =
FadType(ndot, urand.number());
2062 for (
unsigned int k=0; k<ndot; k++)
2065 for (
unsigned int i=0; i<n; i++) {
2066 yy[i] = urand.number();
2069 FadType alpha(ndot, urand.number());
2070 for (
unsigned int k=0; k<ndot; k++) {
2071 alpha.fastAccessDx(k) = urand.number();
2074 Teuchos::BLAS<int,FadType> teuchos_blas;
2075 teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2077 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2078 sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2082 unsigned int sz = m*n*(1+ndot) + m*(1+ndot) + n;
2083 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2084 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2088 sacado_blas.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A4[0], m);
2092 sacado_blas2.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A5[0], m);
2098 template <
class FadType,
class ScalarType>
2104 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
2107 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
2108 A5(m*n,ndot), x(m,ndot), y(n,ndot);
2109 std::vector<ScalarType> xx(n), yy(n);
2110 for (
unsigned int j=0; j<n; j++) {
2111 for (
unsigned int i=0; i<m; i++) {
2112 ScalarType
val = urand.number();
2118 for (
unsigned int k=0; k<ndot; k++) {
2119 val = urand.number();
2120 A1[i+j*m].fastAccessDx(k) =
val;
2121 A2[i+j*m].fastAccessDx(k) =
val;
2122 A3[i+j*m].fastAccessDx(k) =
val;
2123 A4[i+j*m].fastAccessDx(k) =
val;
2124 A5[i+j*m].fastAccessDx(k) =
val;
2128 for (
unsigned int i=0; i<m; i++) {
2129 xx[i] = urand.number();
2132 for (
unsigned int i=0; i<n; i++) {
2133 yy[i] = urand.number();
2136 FadType alpha(ndot, urand.number());
2137 for (
unsigned int k=0; k<ndot; k++) {
2138 alpha.fastAccessDx(k) = urand.number();
2141 Teuchos::BLAS<int,FadType> teuchos_blas;
2142 teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2144 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2145 sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2149 unsigned int sz = m*n*(1+ndot) + m + n;
2150 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2151 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2155 sacado_blas.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A4[0], m);
2159 sacado_blas2.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A5[0], m);
2165 template <
class FadType,
class ScalarType>
2171 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
2174 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
2175 for (
unsigned int j=0; j<n; j++) {
2176 for (
unsigned int i=0; i<m; i++) {
2177 ScalarType
val = urand.number();
2183 for (
unsigned int i=0; i<m; i++) {
2184 x[i] =
FadType(ndot, urand.number());
2185 for (
unsigned int k=0; k<ndot; k++)
2188 for (
unsigned int i=0; i<n; i++) {
2189 y[i] =
FadType(ndot, urand.number());
2190 for (
unsigned int k=0; k<ndot; k++)
2193 FadType alpha(ndot, urand.number());
2194 for (
unsigned int k=0; k<ndot; k++) {
2195 alpha.fastAccessDx(k) = urand.number();
2198 Teuchos::BLAS<int,FadType> teuchos_blas;
2199 teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2201 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2202 sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2206 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
2207 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2208 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2214 template <
class FadType,
class ScalarType>
2218 VectorType A(m*l,ndot),
B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2219 for (
unsigned int j=0; j<l; j++) {
2220 for (
unsigned int i=0; i<m; i++) {
2221 A[i+j*m] =
FadType(ndot, urand.number());
2222 for (
unsigned int k=0; k<ndot; k++)
2226 for (
unsigned int j=0; j<n; j++) {
2227 for (
unsigned int i=0; i<l; i++) {
2228 B[i+j*l] =
FadType(ndot, urand.number());
2229 for (
unsigned int k=0; k<ndot; k++)
2233 FadType alpha(ndot, urand.number());
2234 FadType beta(ndot, urand.number());
2235 for (
unsigned int k=0; k<ndot; k++) {
2236 alpha.fastAccessDx(k) = urand.number();
2237 beta.fastAccessDx(k) = urand.number();
2240 for (
unsigned int j=0; j<n; j++) {
2241 for (
unsigned int i=0; i<m; i++) {
2242 ScalarType
val = urand.number();
2246 for (
unsigned int k=0; k<ndot; k++) {
2247 val = urand.number();
2248 C1[i+j*m].fastAccessDx(k) =
val;
2249 C2[i+j*m].fastAccessDx(k) =
val;
2250 C3[i+j*m].fastAccessDx(k) =
val;
2255 Teuchos::BLAS<int,FadType> teuchos_blas;
2256 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2257 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2259 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2260 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2261 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2265 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2266 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2267 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2268 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2273 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2274 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2275 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2276 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2277 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2278 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2284 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2285 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2286 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2287 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2288 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2289 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2295 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2296 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2297 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2298 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2299 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2300 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2307 template <
class FadType,
class ScalarType>
2311 unsigned int lda = m+4;
2312 unsigned int ldb = l+4;
2313 unsigned int ldc = m+5;
2314 VectorType A(lda*l,ndot),
B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2316 for (
unsigned int j=0; j<l; j++) {
2317 for (
unsigned int i=0; i<lda; i++) {
2318 A[i+j*lda] =
FadType(ndot, urand.number());
2319 for (
unsigned int k=0; k<ndot; k++)
2323 for (
unsigned int j=0; j<n; j++) {
2324 for (
unsigned int i=0; i<ldb; i++) {
2325 B[i+j*ldb] =
FadType(ndot, urand.number());
2326 for (
unsigned int k=0; k<ndot; k++)
2330 FadType alpha(ndot, urand.number());
2331 FadType beta(ndot, urand.number());
2332 for (
unsigned int k=0; k<ndot; k++) {
2333 alpha.fastAccessDx(k) = urand.number();
2334 beta.fastAccessDx(k) = urand.number();
2337 for (
unsigned int j=0; j<n; j++) {
2338 for (
unsigned int i=0; i<ldc; i++) {
2339 ScalarType
val = urand.number();
2343 for (
unsigned int k=0; k<ndot; k++) {
2344 val = urand.number();
2345 C1[i+j*ldc].fastAccessDx(k) =
val;
2346 C2[i+j*ldc].fastAccessDx(k) =
val;
2347 C3[i+j*ldc].fastAccessDx(k) =
val;
2352 Teuchos::BLAS<int,FadType> teuchos_blas;
2353 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2354 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2356 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2357 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2358 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2362 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2363 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2364 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2365 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2371 template <
class FadType,
class ScalarType>
2375 unsigned int lda = l+3;
2376 unsigned int ldb = l+4;
2377 unsigned int ldc = m+5;
2378 VectorType A(lda*m,ndot),
B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2380 for (
unsigned int j=0; j<m; j++) {
2381 for (
unsigned int i=0; i<lda; i++) {
2382 A[i+j*lda] =
FadType(ndot, urand.number());
2383 for (
unsigned int k=0; k<ndot; k++)
2387 for (
unsigned int j=0; j<n; j++) {
2388 for (
unsigned int i=0; i<ldb; i++) {
2389 B[i+j*ldb] =
FadType(ndot, urand.number());
2390 for (
unsigned int k=0; k<ndot; k++)
2394 FadType alpha(ndot, urand.number());
2395 FadType beta(ndot, urand.number());
2396 for (
unsigned int k=0; k<ndot; k++) {
2397 alpha.fastAccessDx(k) = urand.number();
2398 beta.fastAccessDx(k) = urand.number();
2401 for (
unsigned int j=0; j<n; j++) {
2402 for (
unsigned int i=0; i<ldc; i++) {
2403 ScalarType
val = urand.number();
2407 for (
unsigned int k=0; k<ndot; k++) {
2408 val = urand.number();
2409 C1[i+j*ldc].fastAccessDx(k) =
val;
2410 C2[i+j*ldc].fastAccessDx(k) =
val;
2411 C3[i+j*ldc].fastAccessDx(k) =
val;
2416 Teuchos::BLAS<int,FadType> teuchos_blas;
2417 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2418 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2420 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2421 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2422 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2426 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2427 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2428 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2429 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2435 template <
class FadType,
class ScalarType>
2439 unsigned int lda = m+4;
2440 unsigned int ldb = n+4;
2441 unsigned int ldc = m+5;
2442 VectorType A(lda*l,ndot),
B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2444 for (
unsigned int j=0; j<l; j++) {
2445 for (
unsigned int i=0; i<lda; i++) {
2446 A[i+j*lda] =
FadType(ndot, urand.number());
2447 for (
unsigned int k=0; k<ndot; k++)
2451 for (
unsigned int j=0; j<l; j++) {
2452 for (
unsigned int i=0; i<ldb; i++) {
2453 B[i+j*ldb] =
FadType(ndot, urand.number());
2454 for (
unsigned int k=0; k<ndot; k++)
2458 FadType alpha(ndot, urand.number());
2459 FadType beta(ndot, urand.number());
2460 for (
unsigned int k=0; k<ndot; k++) {
2461 alpha.fastAccessDx(k) = urand.number();
2462 beta.fastAccessDx(k) = urand.number();
2465 for (
unsigned int j=0; j<n; j++) {
2466 for (
unsigned int i=0; i<ldc; i++) {
2467 ScalarType
val = urand.number();
2471 for (
unsigned int k=0; k<ndot; k++) {
2472 val = urand.number();
2473 C1[i+j*ldc].fastAccessDx(k) =
val;
2474 C2[i+j*ldc].fastAccessDx(k) =
val;
2475 C3[i+j*ldc].fastAccessDx(k) =
val;
2480 Teuchos::BLAS<int,FadType> teuchos_blas;
2481 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2482 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2484 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2485 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2486 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2490 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2491 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2492 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2493 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2499 template <
class FadType,
class ScalarType>
2503 unsigned int lda = l+3;
2504 unsigned int ldb = n+4;
2505 unsigned int ldc = m+5;
2506 VectorType A(lda*m,ndot),
B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2508 for (
unsigned int j=0; j<m; j++) {
2509 for (
unsigned int i=0; i<lda; i++) {
2510 A[i+j*lda] =
FadType(ndot, urand.number());
2511 for (
unsigned int k=0; k<ndot; k++)
2515 for (
unsigned int j=0; j<l; j++) {
2516 for (
unsigned int i=0; i<ldb; i++) {
2517 B[i+j*ldb] =
FadType(ndot, urand.number());
2518 for (
unsigned int k=0; k<ndot; k++)
2522 FadType alpha(ndot, urand.number());
2523 FadType beta(ndot, urand.number());
2524 for (
unsigned int k=0; k<ndot; k++) {
2525 alpha.fastAccessDx(k) = urand.number();
2526 beta.fastAccessDx(k) = urand.number();
2529 for (
unsigned int j=0; j<n; j++) {
2530 for (
unsigned int i=0; i<ldc; i++) {
2531 ScalarType
val = urand.number();
2535 for (
unsigned int k=0; k<ndot; k++) {
2536 val = urand.number();
2537 C1[i+j*ldc].fastAccessDx(k) =
val;
2538 C2[i+j*ldc].fastAccessDx(k) =
val;
2539 C3[i+j*ldc].fastAccessDx(k) =
val;
2544 Teuchos::BLAS<int,FadType> teuchos_blas;
2545 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2546 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2548 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2549 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2550 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2554 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2555 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2556 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2557 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2563 template <
class FadType,
class ScalarType>
2567 VectorType A(m*l,ndot),
B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2568 for (
unsigned int j=0; j<l; j++) {
2569 for (
unsigned int i=0; i<m; i++) {
2570 A[i+j*m] =
FadType(ndot, urand.number());
2571 for (
unsigned int k=0; k<ndot; k++)
2575 for (
unsigned int j=0; j<n; j++) {
2576 for (
unsigned int i=0; i<l; i++) {
2577 B[i+j*l] =
FadType(ndot, urand.number());
2578 for (
unsigned int k=0; k<ndot; k++)
2582 FadType alpha(ndot, urand.number());
2583 FadType beta(ndot, urand.number());
2584 for (
unsigned int k=0; k<ndot; k++) {
2585 alpha.fastAccessDx(k) = urand.number();
2586 beta.fastAccessDx(k) = urand.number();
2589 for (
unsigned int j=0; j<n; j++) {
2590 for (
unsigned int i=0; i<m; i++) {
2591 ScalarType
val = urand.number();
2598 Teuchos::BLAS<int,FadType> teuchos_blas;
2599 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2600 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2602 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2603 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2604 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2608 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2609 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2610 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2611 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2616 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2617 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2618 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2619 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2620 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2621 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2627 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2628 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2629 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2630 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2631 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2632 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2638 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2639 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2640 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2641 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2642 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2643 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2650 template <
class FadType,
class ScalarType>
2654 VectorType A(m*l,ndot),
B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2655 for (
unsigned int j=0; j<l; j++) {
2656 for (
unsigned int i=0; i<m; i++) {
2657 A[i+j*m] =
FadType(ndot, urand.number());
2658 for (
unsigned int k=0; k<ndot; k++)
2662 for (
unsigned int j=0; j<n; j++) {
2663 for (
unsigned int i=0; i<l; i++) {
2664 B[i+j*l] =
FadType(ndot, urand.number());
2665 for (
unsigned int k=0; k<ndot; k++)
2669 ScalarType alpha = urand.number();
2670 ScalarType beta = urand.number();
2672 for (
unsigned int j=0; j<n; j++) {
2673 for (
unsigned int i=0; i<m; i++) {
2674 ScalarType
val = urand.number();
2678 for (
unsigned int k=0; k<ndot; k++) {
2679 val = urand.number();
2680 C1[i+j*m].fastAccessDx(k) =
val;
2681 C2[i+j*m].fastAccessDx(k) =
val;
2682 C3[i+j*m].fastAccessDx(k) =
val;
2687 Teuchos::BLAS<int,FadType> teuchos_blas;
2688 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2689 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2691 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2692 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2693 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2697 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2698 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2699 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2700 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2705 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2706 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2707 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2708 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2709 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2710 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2716 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2717 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2718 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2719 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2720 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2721 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2727 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2728 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2729 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2730 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2731 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2732 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2739 template <
class FadType,
class ScalarType>
2743 VectorType A(m*l,ndot),
B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2744 C4(m*n,ndot), C5(m*n,ndot);
2745 std::vector<ScalarType>
a(m*l);
2746 for (
unsigned int j=0; j<l; j++) {
2747 for (
unsigned int i=0; i<m; i++) {
2748 a[i+j*m] = urand.number();
2749 A[i+j*m] =
a[i+j*m];
2752 for (
unsigned int j=0; j<n; j++) {
2753 for (
unsigned int i=0; i<l; i++) {
2754 B[i+j*l] =
FadType(ndot, urand.number());
2755 for (
unsigned int k=0; k<ndot; k++)
2759 FadType alpha(ndot, urand.number());
2760 FadType beta(ndot, urand.number());
2761 for (
unsigned int k=0; k<ndot; k++) {
2762 alpha.fastAccessDx(k) = urand.number();
2763 beta.fastAccessDx(k) = urand.number();
2766 for (
unsigned int j=0; j<n; j++) {
2767 for (
unsigned int i=0; i<m; i++) {
2768 ScalarType
val = urand.number();
2774 for (
unsigned int k=0; k<ndot; k++) {
2775 val = urand.number();
2776 C1[i+j*m].fastAccessDx(k) =
val;
2777 C2[i+j*m].fastAccessDx(k) =
val;
2778 C3[i+j*m].fastAccessDx(k) =
val;
2779 C4[i+j*m].fastAccessDx(k) =
val;
2780 C5[i+j*m].fastAccessDx(k) =
val;
2785 Teuchos::BLAS<int,FadType> teuchos_blas;
2786 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2787 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2789 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2790 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2791 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2795 unsigned int sz = m*l + l*n*(1+ndot) + m*n*(1+ndot);
2796 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2797 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2798 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2802 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2803 &
a[0], m, &
B[0], l, beta, &C4[0], m);
2807 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2808 &
a[0], m, &
B[0], l, beta, &C5[0], m);
2813 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2814 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2815 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2816 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2817 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2818 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2819 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2820 &
a[0], l, &
B[0], l, beta, &C4[0], m);
2821 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2822 &
a[0], l, &
B[0], l, beta, &C5[0], m);
2830 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2831 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2832 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2833 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2834 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2835 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2836 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2837 &
a[0], m, &
B[0], n, beta, &C4[0], m);
2838 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2839 &
a[0], m, &
B[0], n, beta, &C5[0], m);
2847 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2848 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2849 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2850 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2851 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2852 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2853 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2854 &
a[0], l, &
B[0], n, beta, &C4[0], m);
2855 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2856 &
a[0], l, &
B[0], n, beta, &C5[0], m);
2865 template <
class FadType,
class ScalarType>
2869 VectorType A(m*l,ndot),
B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2870 C4(m*n,ndot), C5(m*n,ndot);
2871 std::vector<ScalarType> b(l*n);
2872 for (
unsigned int j=0; j<l; j++) {
2873 for (
unsigned int i=0; i<m; i++) {
2874 A[i+j*m] =
FadType(ndot, urand.number());
2875 for (
unsigned int k=0; k<ndot; k++)
2879 for (
unsigned int j=0; j<n; j++) {
2880 for (
unsigned int i=0; i<l; i++) {
2881 b[i+j*l] = urand.number();
2882 B[i+j*l] = b[i+j*l];
2885 FadType alpha(ndot, urand.number());
2886 FadType beta(ndot, urand.number());
2887 for (
unsigned int k=0; k<ndot; k++) {
2888 alpha.fastAccessDx(k) = urand.number();
2889 beta.fastAccessDx(k) = urand.number();
2892 for (
unsigned int j=0; j<n; j++) {
2893 for (
unsigned int i=0; i<m; i++) {
2894 ScalarType
val = urand.number();
2900 for (
unsigned int k=0; k<ndot; k++) {
2901 val = urand.number();
2902 C1[i+j*m].fastAccessDx(k) =
val;
2903 C2[i+j*m].fastAccessDx(k) =
val;
2904 C3[i+j*m].fastAccessDx(k) =
val;
2905 C4[i+j*m].fastAccessDx(k) =
val;
2906 C5[i+j*m].fastAccessDx(k) =
val;
2911 Teuchos::BLAS<int,FadType> teuchos_blas;
2912 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2913 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2915 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2916 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2917 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2921 unsigned int sz = m*l*(1+ndot) + l*n + m*n*(1+ndot);
2922 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2923 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2924 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2928 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2929 &
A[0], m, &b[0], l, beta, &C4[0], m);
2933 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2934 &
A[0], m, &b[0], l, beta, &C5[0], m);
2939 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2940 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2941 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2942 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2943 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2944 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2945 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2946 &
A[0], l, &b[0], l, beta, &C4[0], m);
2947 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2948 &
A[0], l, &b[0], l, beta, &C5[0], m);
2956 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2957 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2958 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2959 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2960 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2961 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2962 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2963 &
A[0], m, &b[0], n, beta, &C4[0], m);
2964 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2965 &
A[0], m, &b[0], n, beta, &C5[0], m);
2973 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2974 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2975 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2976 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2977 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2978 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2979 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2980 &
A[0], l, &b[0], n, beta, &C4[0], m);
2981 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2982 &
A[0], l, &b[0], n, beta, &C5[0], m);
2991 template <
class FadType,
class ScalarType>
2995 VectorType A(m*l,ndot),
B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2996 C4(m*n,ndot), C5(m*n,ndot);
2997 std::vector<ScalarType>
a(m*l), b(l*n);
2998 for (
unsigned int j=0; j<l; j++) {
2999 for (
unsigned int i=0; i<m; i++) {
3000 a[i+j*m] = urand.number();
3001 A[i+j*m] =
a[i+j*m];
3004 for (
unsigned int j=0; j<n; j++) {
3005 for (
unsigned int i=0; i<l; i++) {
3006 b[i+j*l] = urand.number();
3007 B[i+j*l] = b[i+j*l];
3010 FadType alpha(ndot, urand.number());
3011 FadType beta(ndot, urand.number());
3012 for (
unsigned int k=0; k<ndot; k++) {
3013 alpha.fastAccessDx(k) = urand.number();
3014 beta.fastAccessDx(k) = urand.number();
3017 for (
unsigned int j=0; j<n; j++) {
3018 for (
unsigned int i=0; i<m; i++) {
3019 ScalarType
val = urand.number();
3025 for (
unsigned int k=0; k<ndot; k++) {
3026 val = urand.number();
3027 C1[i+j*m].fastAccessDx(k) =
val;
3028 C2[i+j*m].fastAccessDx(k) =
val;
3029 C3[i+j*m].fastAccessDx(k) =
val;
3030 C4[i+j*m].fastAccessDx(k) =
val;
3031 C5[i+j*m].fastAccessDx(k) =
val;
3036 Teuchos::BLAS<int,FadType> teuchos_blas;
3037 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3038 &
A[0], m, &
B[0], l, beta, &C1[0], m);
3040 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3041 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3042 &
A[0], m, &
B[0], l, beta, &C2[0], m);
3046 unsigned int sz = m*l + l*n + m*n*(1+ndot);
3047 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3048 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3049 &
A[0], m, &
B[0], l, beta, &C3[0], m);
3053 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3054 &
a[0], m, &b[0], l, beta, &C4[0], m);
3058 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3059 &
a[0], m, &b[0], l, beta, &C5[0], m);
3064 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3065 &
A[0], l, &
B[0], l, beta, &C1[0], m);
3066 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3067 &
A[0], l, &
B[0], l, beta, &C2[0], m);
3068 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3069 &
A[0], l, &
B[0], l, beta, &C3[0], m);
3070 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3071 &
a[0], l, &b[0], l, beta, &C4[0], m);
3072 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3073 &
a[0], l, &b[0], l, beta, &C5[0], m);
3081 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3082 &
A[0], m, &
B[0], n, beta, &C1[0], m);
3083 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3084 &
A[0], m, &
B[0], n, beta, &C2[0], m);
3085 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3086 &
A[0], m, &
B[0], n, beta, &C3[0], m);
3087 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3088 &
a[0], m, &b[0], n, beta, &C4[0], m);
3089 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3090 &
a[0], m, &b[0], n, beta, &C5[0], m);
3098 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3099 &
A[0], l, &
B[0], n, beta, &C1[0], m);
3100 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3101 &
A[0], l, &
B[0], n, beta, &C2[0], m);
3102 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3103 &
A[0], l, &
B[0], n, beta, &C3[0], m);
3104 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3105 &
a[0], l, &b[0], n, beta, &C4[0], m);
3106 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3107 &
a[0], l, &b[0], n, beta, &C5[0], m);
3116 template <
class FadType,
class ScalarType>
3122 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3125 VectorType A(m*m,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3126 for (
unsigned int j=0; j<m; j++) {
3127 for (
unsigned int i=0; i<m; i++) {
3128 A[i+j*m] =
FadType(ndot, urand.number());
3129 for (
unsigned int k=0; k<ndot; k++)
3133 for (
unsigned int j=0; j<n; j++) {
3134 for (
unsigned int i=0; i<m; i++) {
3135 B[i+j*m] =
FadType(ndot, urand.number());
3136 for (
unsigned int k=0; k<ndot; k++)
3140 FadType alpha(ndot, urand.number());
3141 FadType beta(ndot, urand.number());
3142 for (
unsigned int k=0; k<ndot; k++) {
3143 alpha.fastAccessDx(k) = urand.number();
3144 beta.fastAccessDx(k) = urand.number();
3147 for (
unsigned int j=0; j<n; j++) {
3148 for (
unsigned int i=0; i<m; i++) {
3149 ScalarType
val = urand.number();
3153 for (
unsigned int k=0; k<ndot; k++) {
3154 val = urand.number();
3155 C1[i+j*m].fastAccessDx(k) =
val;
3156 C2[i+j*m].fastAccessDx(k) =
val;
3157 C3[i+j*m].fastAccessDx(k) =
val;
3162 Teuchos::BLAS<int,FadType> teuchos_blas;
3163 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3164 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3166 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3167 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3168 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3172 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3173 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3174 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3175 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3180 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3181 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3182 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3183 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3184 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3185 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3192 template <
class FadType,
class ScalarType>
3198 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3201 VectorType A(n*n,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3202 for (
unsigned int j=0; j<n; j++) {
3203 for (
unsigned int i=0; i<n; i++) {
3204 A[i+j*n] =
FadType(ndot, urand.number());
3205 for (
unsigned int k=0; k<ndot; k++)
3209 for (
unsigned int j=0; j<n; j++) {
3210 for (
unsigned int i=0; i<m; i++) {
3211 B[i+j*m] =
FadType(ndot, urand.number());
3212 for (
unsigned int k=0; k<ndot; k++)
3216 FadType alpha(ndot, urand.number());
3217 FadType beta(ndot, urand.number());
3218 for (
unsigned int k=0; k<ndot; k++) {
3219 alpha.fastAccessDx(k) = urand.number();
3220 beta.fastAccessDx(k) = urand.number();
3223 for (
unsigned int j=0; j<n; j++) {
3224 for (
unsigned int i=0; i<m; i++) {
3225 ScalarType
val = urand.number();
3229 for (
unsigned int k=0; k<ndot; k++) {
3230 val = urand.number();
3231 C1[i+j*m].fastAccessDx(k) =
val;
3232 C2[i+j*m].fastAccessDx(k) =
val;
3233 C3[i+j*m].fastAccessDx(k) =
val;
3238 Teuchos::BLAS<int,FadType> teuchos_blas;
3239 teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3240 &
A[0], n, &
B[0], m, beta, &C1[0], m);
3242 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3243 sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3244 &
A[0], n, &
B[0], m, beta, &C2[0], m);
3248 unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3249 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3250 sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3251 &
A[0], n, &
B[0], m, beta, &C3[0], m);
3256 teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3257 &
A[0], n, &
B[0], m, beta, &C1[0], m);
3258 sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3259 &
A[0], n, &
B[0], m, beta, &C2[0], m);
3260 sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3261 &
A[0], n, &
B[0], m, beta, &C3[0], m);
3268 template <
class FadType,
class ScalarType>
3274 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3277 unsigned int lda = m+4;
3278 unsigned int ldb = m+5;
3279 unsigned int ldc = m+6;
3280 VectorType A(lda*m,ndot),
B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3282 for (
unsigned int j=0; j<m; j++) {
3283 for (
unsigned int i=0; i<lda; i++) {
3284 A[i+j*lda] =
FadType(ndot, urand.number());
3285 for (
unsigned int k=0; k<ndot; k++)
3289 for (
unsigned int j=0; j<n; j++) {
3290 for (
unsigned int i=0; i<ldb; i++) {
3291 B[i+j*ldb] =
FadType(ndot, urand.number());
3292 for (
unsigned int k=0; k<ndot; k++)
3296 FadType alpha(ndot, urand.number());
3297 FadType beta(ndot, urand.number());
3298 for (
unsigned int k=0; k<ndot; k++) {
3299 alpha.fastAccessDx(k) = urand.number();
3300 beta.fastAccessDx(k) = urand.number();
3303 for (
unsigned int j=0; j<n; j++) {
3304 for (
unsigned int i=0; i<ldc; i++) {
3305 ScalarType
val = urand.number();
3309 for (
unsigned int k=0; k<ndot; k++) {
3310 val = urand.number();
3311 C1[i+j*ldc].fastAccessDx(k) =
val;
3312 C2[i+j*ldc].fastAccessDx(k) =
val;
3313 C3[i+j*ldc].fastAccessDx(k) =
val;
3318 Teuchos::BLAS<int,FadType> teuchos_blas;
3319 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3320 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3322 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3323 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3324 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3328 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3329 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3330 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3331 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3336 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3337 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3338 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3339 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3340 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3341 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3348 template <
class FadType,
class ScalarType>
3354 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3357 unsigned int lda = n+4;
3358 unsigned int ldb = m+5;
3359 unsigned int ldc = m+6;
3360 VectorType A(lda*n,ndot),
B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3362 for (
unsigned int j=0; j<n; j++) {
3363 for (
unsigned int i=0; i<lda; i++) {
3364 A[i+j*lda] =
FadType(ndot, urand.number());
3365 for (
unsigned int k=0; k<ndot; k++)
3369 for (
unsigned int j=0; j<n; j++) {
3370 for (
unsigned int i=0; i<ldb; i++) {
3371 B[i+j*ldb] =
FadType(ndot, urand.number());
3372 for (
unsigned int k=0; k<ndot; k++)
3376 FadType alpha(ndot, urand.number());
3377 FadType beta(ndot, urand.number());
3378 for (
unsigned int k=0; k<ndot; k++) {
3379 alpha.fastAccessDx(k) = urand.number();
3380 beta.fastAccessDx(k) = urand.number();
3383 for (
unsigned int j=0; j<n; j++) {
3384 for (
unsigned int i=0; i<ldc; i++) {
3385 ScalarType
val = urand.number();
3389 for (
unsigned int k=0; k<ndot; k++) {
3390 val = urand.number();
3391 C1[i+j*ldc].fastAccessDx(k) =
val;
3392 C2[i+j*ldc].fastAccessDx(k) =
val;
3393 C3[i+j*ldc].fastAccessDx(k) =
val;
3398 Teuchos::BLAS<int,FadType> teuchos_blas;
3399 teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3400 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3402 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3403 sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3404 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3408 unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3409 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3410 sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3411 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3416 teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3417 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3418 sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3419 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3420 sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3421 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3428 template <
class FadType,
class ScalarType>
3434 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3437 VectorType A(m*m,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3438 for (
unsigned int j=0; j<m; j++) {
3439 for (
unsigned int i=0; i<m; i++) {
3440 A[i+j*m] =
FadType(ndot, urand.number());
3441 for (
unsigned int k=0; k<ndot; k++)
3445 for (
unsigned int j=0; j<n; j++) {
3446 for (
unsigned int i=0; i<m; i++) {
3447 B[i+j*m] =
FadType(ndot, urand.number());
3448 for (
unsigned int k=0; k<ndot; k++)
3452 FadType alpha(ndot, urand.number());
3453 FadType beta(ndot, urand.number());
3454 for (
unsigned int k=0; k<ndot; k++) {
3455 alpha.fastAccessDx(k) = urand.number();
3456 beta.fastAccessDx(k) = urand.number();
3459 for (
unsigned int j=0; j<n; j++) {
3460 for (
unsigned int i=0; i<m; i++) {
3461 ScalarType
val = urand.number();
3468 Teuchos::BLAS<int,FadType> teuchos_blas;
3469 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3470 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3472 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3473 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3474 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3478 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3479 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3480 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3481 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3486 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3487 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3488 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3489 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3490 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3491 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3498 template <
class FadType,
class ScalarType>
3504 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3507 VectorType A(m*m,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3508 for (
unsigned int j=0; j<m; j++) {
3509 for (
unsigned int i=0; i<m; i++) {
3510 A[i+j*m] =
FadType(ndot, urand.number());
3511 for (
unsigned int k=0; k<ndot; k++)
3515 for (
unsigned int j=0; j<n; j++) {
3516 for (
unsigned int i=0; i<m; i++) {
3517 B[i+j*m] =
FadType(ndot, urand.number());
3518 for (
unsigned int k=0; k<ndot; k++)
3522 ScalarType alpha = urand.number();
3523 ScalarType beta = urand.number();
3525 for (
unsigned int j=0; j<n; j++) {
3526 for (
unsigned int i=0; i<m; i++) {
3527 ScalarType
val = urand.number();
3531 for (
unsigned int k=0; k<ndot; k++) {
3532 val = urand.number();
3533 C1[i+j*m].fastAccessDx(k) =
val;
3534 C2[i+j*m].fastAccessDx(k) =
val;
3535 C3[i+j*m].fastAccessDx(k) =
val;
3540 Teuchos::BLAS<int,FadType> teuchos_blas;
3541 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3542 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3544 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3545 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3546 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3550 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3551 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3552 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3553 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3558 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3559 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3560 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3561 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3562 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3563 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3570 template <
class FadType,
class ScalarType>
3576 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3579 VectorType A(m*m,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3580 C4(m*n,ndot), C5(m*n,ndot);
3581 std::vector<ScalarType>
a(m*m);
3582 for (
unsigned int j=0; j<m; j++) {
3583 for (
unsigned int i=0; i<m; i++) {
3584 a[i+j*m] = urand.number();
3585 A[i+j*m] =
a[i+j*m];
3588 for (
unsigned int j=0; j<n; j++) {
3589 for (
unsigned int i=0; i<m; i++) {
3590 B[i+j*m] =
FadType(ndot, urand.number());
3591 for (
unsigned int k=0; k<ndot; k++)
3595 FadType alpha(ndot, urand.number());
3596 FadType beta(ndot, urand.number());
3597 for (
unsigned int k=0; k<ndot; k++) {
3598 alpha.fastAccessDx(k) = urand.number();
3599 beta.fastAccessDx(k) = urand.number();
3602 for (
unsigned int j=0; j<n; j++) {
3603 for (
unsigned int i=0; i<m; i++) {
3604 ScalarType
val = urand.number();
3610 for (
unsigned int k=0; k<ndot; k++) {
3611 val = urand.number();
3612 C1[i+j*m].fastAccessDx(k) =
val;
3613 C2[i+j*m].fastAccessDx(k) =
val;
3614 C3[i+j*m].fastAccessDx(k) =
val;
3615 C4[i+j*m].fastAccessDx(k) =
val;
3616 C5[i+j*m].fastAccessDx(k) =
val;
3621 Teuchos::BLAS<int,FadType> teuchos_blas;
3622 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3623 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3625 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3626 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3627 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3631 unsigned int sz = m*m + 2*m*n*(1+ndot);
3632 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3633 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3634 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3638 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3639 &
a[0], m, &
B[0], m, beta, &C4[0], m);
3643 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3644 &
a[0], m, &
B[0], m, beta, &C5[0], m);
3649 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3650 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3651 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3652 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3653 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3654 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3655 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3656 &
a[0], m, &
B[0], m, beta, &C4[0], m);
3657 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3658 &
a[0], m, &
B[0], m, beta, &C5[0], m);
3667 template <
class FadType,
class ScalarType>
3673 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3676 VectorType A(m*m,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3677 C4(m*n,ndot), C5(m*n,ndot);
3678 std::vector<ScalarType> b(m*n);
3679 for (
unsigned int j=0; j<m; j++) {
3680 for (
unsigned int i=0; i<m; i++) {
3681 A[i+j*m] =
FadType(ndot, urand.number());
3682 for (
unsigned int k=0; k<ndot; k++)
3686 for (
unsigned int j=0; j<n; j++) {
3687 for (
unsigned int i=0; i<m; i++) {
3688 b[i+j*m] = urand.number();
3689 B[i+j*m] = b[i+j*m];
3692 FadType alpha(ndot, urand.number());
3693 FadType beta(ndot, urand.number());
3694 for (
unsigned int k=0; k<ndot; k++) {
3695 alpha.fastAccessDx(k) = urand.number();
3696 beta.fastAccessDx(k) = urand.number();
3699 for (
unsigned int j=0; j<n; j++) {
3700 for (
unsigned int i=0; i<m; i++) {
3701 ScalarType
val = urand.number();
3707 for (
unsigned int k=0; k<ndot; k++) {
3708 val = urand.number();
3709 C1[i+j*m].fastAccessDx(k) =
val;
3710 C2[i+j*m].fastAccessDx(k) =
val;
3711 C3[i+j*m].fastAccessDx(k) =
val;
3712 C4[i+j*m].fastAccessDx(k) =
val;
3713 C5[i+j*m].fastAccessDx(k) =
val;
3718 Teuchos::BLAS<int,FadType> teuchos_blas;
3719 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3720 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3722 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3723 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3724 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3728 unsigned int sz = m*m*(1+ndot) + m*n*(2+ndot);
3729 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3730 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3731 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3735 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3736 &
A[0], m, &b[0], m, beta, &C4[0], m);
3740 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3741 &
A[0], m, &b[0], m, beta, &C5[0], m);
3746 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3747 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3748 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3749 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3750 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3751 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3752 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3753 &
A[0], m, &b[0], m, beta, &C4[0], m);
3754 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3755 &
A[0], m, &b[0], m, beta, &C5[0], m);
3764 template <
class FadType,
class ScalarType>
3770 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3773 VectorType A(m*m,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3774 C4(m*n,ndot), C5(m*n,ndot);
3775 std::vector<ScalarType>
a(m*m), b(m*n);
3776 for (
unsigned int j=0; j<m; j++) {
3777 for (
unsigned int i=0; i<m; i++) {
3778 a[i+j*m] = urand.number();
3779 A[i+j*m] =
a[i+j*m];
3782 for (
unsigned int j=0; j<n; j++) {
3783 for (
unsigned int i=0; i<m; i++) {
3784 b[i+j*m] = urand.number();
3785 B[i+j*m] = b[i+j*m];
3788 FadType alpha(ndot, urand.number());
3789 FadType beta(ndot, urand.number());
3790 for (
unsigned int k=0; k<ndot; k++) {
3791 alpha.fastAccessDx(k) = urand.number();
3792 beta.fastAccessDx(k) = urand.number();
3795 for (
unsigned int j=0; j<n; j++) {
3796 for (
unsigned int i=0; i<m; i++) {
3797 ScalarType
val = urand.number();
3803 for (
unsigned int k=0; k<ndot; k++) {
3804 val = urand.number();
3805 C1[i+j*m].fastAccessDx(k) =
val;
3806 C2[i+j*m].fastAccessDx(k) =
val;
3807 C3[i+j*m].fastAccessDx(k) =
val;
3808 C4[i+j*m].fastAccessDx(k) =
val;
3809 C5[i+j*m].fastAccessDx(k) =
val;
3814 Teuchos::BLAS<int,FadType> teuchos_blas;
3815 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3816 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3818 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3819 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3820 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3824 unsigned int sz = m*m + m*n*(2+ndot);
3825 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3826 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3827 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3831 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3832 &
a[0], m, &b[0], m, beta, &C4[0], m);
3836 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3837 &
a[0], m, &b[0], m, beta, &C5[0], m);
3842 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3843 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3844 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3845 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3846 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3847 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3848 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3849 &
a[0], m, &b[0], m, beta, &C4[0], m);
3850 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3851 &
a[0], m, &b[0], m, beta, &C5[0], m);
3860 template <
class FadType,
class ScalarType>
3864 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3865 for (
unsigned int j=0; j<m; j++) {
3866 for (
unsigned int i=0; i<m; i++) {
3867 A[i+j*m] =
FadType(ndot, urand.number());
3868 for (
unsigned int k=0; k<ndot; k++)
3872 FadType alpha(ndot, urand.number());
3873 for (
unsigned int k=0; k<ndot; k++) {
3874 alpha.fastAccessDx(k) = urand.number();
3877 for (
unsigned int j=0; j<n; j++) {
3878 for (
unsigned int i=0; i<m; i++) {
3879 ScalarType
val = urand.number();
3883 for (
unsigned int k=0; k<ndot; k++) {
3884 val = urand.number();
3885 B1[i+j*m].fastAccessDx(k) =
val;
3886 B2[i+j*m].fastAccessDx(k) =
val;
3887 B3[i+j*m].fastAccessDx(k) =
val;
3892 Teuchos::BLAS<int,FadType> teuchos_blas;
3893 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3894 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
3896 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3897 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3898 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
3902 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
3903 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3904 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3905 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
3909 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
3910 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
3911 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
3912 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
3913 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
3914 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
3918 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
3919 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
3920 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
3921 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
3922 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
3923 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
3927 for (
unsigned int i=0; i<m; i++) {
3928 A[i*m+i].val() = 1.0;
3929 for (
unsigned int k=0; k<ndot; k++)
3932 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3933 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
3934 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3935 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
3936 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3937 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
3943 template <
class FadType,
class ScalarType>
3947 VectorType A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3948 for (
unsigned int j=0; j<n; j++) {
3949 for (
unsigned int i=0; i<n; i++) {
3950 A[i+j*n] =
FadType(ndot, urand.number());
3951 for (
unsigned int k=0; k<ndot; k++)
3955 FadType alpha(ndot, urand.number());
3956 for (
unsigned int k=0; k<ndot; k++) {
3957 alpha.fastAccessDx(k) = urand.number();
3960 for (
unsigned int j=0; j<n; j++) {
3961 for (
unsigned int i=0; i<m; i++) {
3962 ScalarType
val = urand.number();
3966 for (
unsigned int k=0; k<ndot; k++) {
3967 val = urand.number();
3968 B1[i+j*m].fastAccessDx(k) =
val;
3969 B2[i+j*m].fastAccessDx(k) =
val;
3970 B3[i+j*m].fastAccessDx(k) =
val;
3975 Teuchos::BLAS<int,FadType> teuchos_blas;
3976 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3977 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
3979 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3980 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3981 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
3985 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
3986 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3987 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3988 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
3992 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
3993 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
3994 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
3995 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
3996 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
3997 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4001 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4002 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
4003 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4004 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
4005 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4006 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4010 for (
unsigned int i=0; i<n; i++) {
4011 A[i*n+i].val() = 1.0;
4012 for (
unsigned int k=0; k<ndot; k++)
4015 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4016 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
4017 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4018 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
4019 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4020 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4026 template <
class FadType,
class ScalarType>
4030 unsigned int lda = m+4;
4031 unsigned int ldb = m+5;
4032 VectorType A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4033 for (
unsigned int j=0; j<m; j++) {
4034 for (
unsigned int i=0; i<lda; i++) {
4035 A[i+j*lda] =
FadType(ndot, urand.number());
4036 for (
unsigned int k=0; k<ndot; k++)
4040 FadType alpha(ndot, urand.number());
4041 for (
unsigned int k=0; k<ndot; k++) {
4042 alpha.fastAccessDx(k) = urand.number();
4045 for (
unsigned int j=0; j<n; j++) {
4046 for (
unsigned int i=0; i<ldb; i++) {
4047 ScalarType
val = urand.number();
4051 for (
unsigned int k=0; k<ndot; k++) {
4052 val = urand.number();
4053 B1[i+j*ldb].fastAccessDx(k) =
val;
4054 B2[i+j*ldb].fastAccessDx(k) =
val;
4055 B3[i+j*ldb].fastAccessDx(k) =
val;
4060 Teuchos::BLAS<int,FadType> teuchos_blas;
4061 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4062 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4064 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4065 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4066 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4070 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4071 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4072 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4073 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4077 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4078 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4079 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4080 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4081 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4082 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4086 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4087 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4088 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4089 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4090 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4091 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4095 for (
unsigned int i=0; i<m; i++) {
4096 A[i*lda+i].val() = 1.0;
4097 for (
unsigned int k=0; k<ndot; k++)
4100 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4101 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4102 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4103 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4104 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4105 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4111 template <
class FadType,
class ScalarType>
4115 unsigned int lda = n+4;
4116 unsigned int ldb = m+5;
4117 VectorType A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4118 for (
unsigned int j=0; j<n; j++) {
4119 for (
unsigned int i=0; i<lda; i++) {
4120 A[i+j*lda] =
FadType(ndot, urand.number());
4121 for (
unsigned int k=0; k<ndot; k++)
4125 FadType alpha(ndot, urand.number());
4126 for (
unsigned int k=0; k<ndot; k++) {
4127 alpha.fastAccessDx(k) = urand.number();
4130 for (
unsigned int j=0; j<n; j++) {
4131 for (
unsigned int i=0; i<ldb; i++) {
4132 ScalarType
val = urand.number();
4136 for (
unsigned int k=0; k<ndot; k++) {
4137 val = urand.number();
4138 B1[i+j*ldb].fastAccessDx(k) =
val;
4139 B2[i+j*ldb].fastAccessDx(k) =
val;
4140 B3[i+j*ldb].fastAccessDx(k) =
val;
4145 Teuchos::BLAS<int,FadType> teuchos_blas;
4146 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4147 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4149 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4150 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4151 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4155 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4156 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4157 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4158 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4162 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4163 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4164 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4165 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4166 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4167 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4171 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4172 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4173 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4174 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4175 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4176 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4180 for (
unsigned int i=0; i<n; i++) {
4181 A[i*lda+i].val() = 1.0;
4182 for (
unsigned int k=0; k<ndot; k++)
4185 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4186 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4187 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4188 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4189 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4190 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4196 template <
class FadType,
class ScalarType>
4200 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4201 for (
unsigned int j=0; j<m; j++) {
4202 for (
unsigned int i=0; i<m; i++) {
4203 A[i+j*m] =
FadType(ndot, urand.number());
4204 for (
unsigned int k=0; k<ndot; k++)
4208 ScalarType alpha = urand.number();
4210 for (
unsigned int j=0; j<n; j++) {
4211 for (
unsigned int i=0; i<m; i++) {
4212 ScalarType
val = urand.number();
4216 for (
unsigned int k=0; k<ndot; k++) {
4217 val = urand.number();
4218 B1[i+j*m].fastAccessDx(k) =
val;
4219 B2[i+j*m].fastAccessDx(k) =
val;
4220 B3[i+j*m].fastAccessDx(k) =
val;
4225 Teuchos::BLAS<int,FadType> teuchos_blas;
4226 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4227 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4229 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4230 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4231 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4235 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4236 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4237 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4238 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4242 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4243 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4244 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4245 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4246 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4247 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4251 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4252 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4253 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4254 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4255 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4256 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4260 for (
unsigned int i=0; i<m; i++) {
4261 A[i*m+i].val() = 1.0;
4262 for (
unsigned int k=0; k<ndot; k++)
4265 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4266 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4267 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4268 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4269 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4270 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4276 template <
class FadType,
class ScalarType>
4280 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4281 for (
unsigned int j=0; j<m; j++) {
4282 for (
unsigned int i=0; i<m; i++) {
4283 A[i+j*m] =
FadType(ndot, urand.number());
4284 for (
unsigned int k=0; k<ndot; k++)
4288 FadType alpha(ndot, urand.number());
4289 for (
unsigned int k=0; k<ndot; k++) {
4290 alpha.fastAccessDx(k) = urand.number();
4293 for (
unsigned int j=0; j<n; j++) {
4294 for (
unsigned int i=0; i<m; i++) {
4295 ScalarType
val = urand.number();
4302 Teuchos::BLAS<int,FadType> teuchos_blas;
4303 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4304 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4306 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4307 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4308 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4312 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4313 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4314 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4315 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4319 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4320 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4321 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4322 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4323 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4324 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4328 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4329 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4330 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4331 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4332 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4333 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4337 for (
unsigned int i=0; i<m; i++) {
4338 A[i*m+i].val() = 1.0;
4339 for (
unsigned int k=0; k<ndot; k++)
4342 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4343 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4344 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4345 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4346 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4347 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4353 template <
class FadType,
class ScalarType>
4357 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
4358 B4(m*n,ndot), B5(m*n,ndot);
4359 std::vector<ScalarType>
a(m*m);
4360 for (
unsigned int j=0; j<m; j++) {
4361 for (
unsigned int i=0; i<m; i++) {
4362 a[i+j*m] = urand.number();
4363 A[i+j*m] =
a[i+j*m];
4366 FadType alpha(ndot, urand.number());
4367 for (
unsigned int k=0; k<ndot; k++) {
4368 alpha.fastAccessDx(k) = urand.number();
4371 for (
unsigned int j=0; j<n; j++) {
4372 for (
unsigned int i=0; i<m; i++) {
4373 ScalarType
val = urand.number();
4379 for (
unsigned int k=0; k<ndot; k++) {
4380 val = urand.number();
4381 B1[i+j*m].fastAccessDx(k) =
val;
4382 B2[i+j*m].fastAccessDx(k) =
val;
4383 B3[i+j*m].fastAccessDx(k) =
val;
4384 B4[i+j*m].fastAccessDx(k) =
val;
4385 B5[i+j*m].fastAccessDx(k) =
val;
4390 Teuchos::BLAS<int,FadType> teuchos_blas;
4391 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4392 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4394 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4395 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4396 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4400 unsigned int sz = m*m + m*n*(1+ndot);
4401 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4402 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4403 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4407 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4408 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
4412 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4413 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
4417 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4418 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4419 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4420 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4421 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4422 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4423 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4424 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
4425 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4426 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
4432 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4433 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4434 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4435 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4436 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4437 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4438 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4439 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
4440 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4441 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
4447 for (
unsigned int i=0; i<m; i++) {
4448 A[i*m+i].val() = 1.0;
4449 for (
unsigned int k=0; k<ndot; k++)
4452 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4453 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4454 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4455 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4456 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4457 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4458 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4459 Teuchos::UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
4460 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4461 Teuchos::UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
4469 template <
class FadType,
class ScalarType>
4473 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4474 for (
unsigned int j=0; j<m; j++) {
4475 for (
unsigned int i=0; i<m; i++) {
4477 A[i+j*m] =
FadType(ndot, urand.number());
4478 for (
unsigned int k=0; k<ndot; k++)
4482 FadType alpha(ndot, urand.number());
4483 for (
unsigned int k=0; k<ndot; k++) {
4484 alpha.fastAccessDx(k) = urand.number();
4488 for (
unsigned int j=0; j<n; j++) {
4489 for (
unsigned int i=0; i<m; i++) {
4490 ScalarType
val = urand.number();
4497 for (
unsigned int k=0; k<ndot; k++) {
4498 val = urand.number();
4499 B1[i+j*m].fastAccessDx(k) =
val;
4500 B2[i+j*m].fastAccessDx(k) =
val;
4501 B3[i+j*m].fastAccessDx(k) =
val;
4506 Teuchos::BLAS<int,FadType> teuchos_blas;
4507 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4508 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4510 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4511 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4512 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4516 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4517 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4518 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4519 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4523 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4524 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4525 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4526 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4527 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4528 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4532 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4533 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4534 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4535 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4536 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4537 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4541 for (
unsigned int i=0; i<m; i++) {
4542 A[i*m+i].val() = 1.0;
4543 for (
unsigned int k=0; k<ndot; k++)
4546 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4547 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4548 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4549 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4550 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4551 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4557 template <
class FadType,
class ScalarType>
4561 VectorType A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4562 for (
unsigned int j=0; j<n; j++) {
4563 for (
unsigned int i=0; i<n; i++) {
4564 A[i+j*n] =
FadType(ndot, urand.number());
4565 for (
unsigned int k=0; k<ndot; k++)
4569 FadType alpha(ndot, urand.number());
4570 for (
unsigned int k=0; k<ndot; k++) {
4571 alpha.fastAccessDx(k) = urand.number();
4574 for (
unsigned int j=0; j<n; j++) {
4575 for (
unsigned int i=0; i<m; i++) {
4576 ScalarType
val = urand.number();
4580 for (
unsigned int k=0; k<ndot; k++) {
4581 val = urand.number();
4582 B1[i+j*m].fastAccessDx(k) =
val;
4583 B2[i+j*m].fastAccessDx(k) =
val;
4584 B3[i+j*m].fastAccessDx(k) =
val;
4589 Teuchos::BLAS<int,FadType> teuchos_blas;
4590 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4591 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
4593 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4594 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4595 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
4599 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4600 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4601 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4602 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4606 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4607 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
4608 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4609 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
4610 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4611 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4615 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4616 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
4617 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4618 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
4619 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4620 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4624 for (
unsigned int i=0; i<n; i++) {
4625 A[i*n+i].val() = 1.0;
4626 for (
unsigned int k=0; k<ndot; k++)
4629 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4630 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
4631 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4632 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
4633 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4634 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4640 template <
class FadType,
class ScalarType>
4644 unsigned int lda = m+4;
4645 unsigned int ldb = m+5;
4646 VectorType A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4647 for (
unsigned int j=0; j<m; j++) {
4648 for (
unsigned int i=0; i<lda; i++) {
4649 A[i+j*lda] =
FadType(ndot, urand.number());
4650 for (
unsigned int k=0; k<ndot; k++)
4654 FadType alpha(ndot, urand.number());
4655 for (
unsigned int k=0; k<ndot; k++) {
4656 alpha.fastAccessDx(k) = urand.number();
4659 for (
unsigned int j=0; j<n; j++) {
4660 for (
unsigned int i=0; i<ldb; i++) {
4661 ScalarType
val = urand.number();
4665 for (
unsigned int k=0; k<ndot; k++) {
4666 val = urand.number();
4667 B1[i+j*ldb].fastAccessDx(k) =
val;
4668 B2[i+j*ldb].fastAccessDx(k) =
val;
4669 B3[i+j*ldb].fastAccessDx(k) =
val;
4674 Teuchos::BLAS<int,FadType> teuchos_blas;
4675 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4676 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4678 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4679 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4680 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4684 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4685 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4686 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4687 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4691 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4692 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4693 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4694 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4695 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4696 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4700 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4701 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4702 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4703 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4704 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4705 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4709 for (
unsigned int i=0; i<m; i++) {
4710 A[i*lda+i].val() = 1.0;
4711 for (
unsigned int k=0; k<ndot; k++)
4714 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4715 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4716 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4717 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4718 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4719 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4725 template <
class FadType,
class ScalarType>
4729 unsigned int lda = n+4;
4730 unsigned int ldb = m+5;
4731 VectorType A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4732 for (
unsigned int j=0; j<n; j++) {
4733 for (
unsigned int i=0; i<lda; i++) {
4734 A[i+j*lda] =
FadType(ndot, urand.number());
4735 for (
unsigned int k=0; k<ndot; k++)
4739 FadType alpha(ndot, urand.number());
4740 for (
unsigned int k=0; k<ndot; k++) {
4741 alpha.fastAccessDx(k) = urand.number();
4744 for (
unsigned int j=0; j<n; j++) {
4745 for (
unsigned int i=0; i<ldb; i++) {
4746 ScalarType
val = urand.number();
4750 for (
unsigned int k=0; k<ndot; k++) {
4751 val = urand.number();
4752 B1[i+j*ldb].fastAccessDx(k) =
val;
4753 B2[i+j*ldb].fastAccessDx(k) =
val;
4754 B3[i+j*ldb].fastAccessDx(k) =
val;
4759 Teuchos::BLAS<int,FadType> teuchos_blas;
4760 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4761 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4763 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4764 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4765 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4769 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4770 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4771 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4772 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4776 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4777 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4778 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4779 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4780 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4781 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4785 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4786 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4787 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4788 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4789 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4790 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4794 for (
unsigned int i=0; i<n; i++) {
4795 A[i*lda+i].val() = 1.0;
4796 for (
unsigned int k=0; k<ndot; k++)
4799 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4800 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4801 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4802 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4803 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4804 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4810 template <
class FadType,
class ScalarType>
4814 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4815 for (
unsigned int j=0; j<m; j++) {
4816 for (
unsigned int i=0; i<m; i++) {
4817 A[i+j*m] =
FadType(ndot, urand.number());
4818 for (
unsigned int k=0; k<ndot; k++)
4822 ScalarType alpha = urand.number();
4824 for (
unsigned int j=0; j<n; j++) {
4825 for (
unsigned int i=0; i<m; i++) {
4826 ScalarType
val = urand.number();
4830 for (
unsigned int k=0; k<ndot; k++) {
4831 val = urand.number();
4832 B1[i+j*m].fastAccessDx(k) =
val;
4833 B2[i+j*m].fastAccessDx(k) =
val;
4834 B3[i+j*m].fastAccessDx(k) =
val;
4839 Teuchos::BLAS<int,FadType> teuchos_blas;
4840 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4841 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4843 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4844 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4845 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4849 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4850 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4851 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4852 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4856 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4857 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4858 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4859 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4860 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4861 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4865 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4866 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4867 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4868 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4869 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4870 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4874 for (
unsigned int i=0; i<m; i++) {
4875 A[i*m+i].val() = 1.0;
4876 for (
unsigned int k=0; k<ndot; k++)
4879 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4880 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4881 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4882 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4883 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4884 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4890 template <
class FadType,
class ScalarType>
4894 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4895 for (
unsigned int j=0; j<m; j++) {
4896 for (
unsigned int i=0; i<m; i++) {
4897 A[i+j*m] =
FadType(ndot, urand.number());
4898 for (
unsigned int k=0; k<ndot; k++)
4902 FadType alpha(ndot, urand.number());
4903 for (
unsigned int k=0; k<ndot; k++) {
4904 alpha.fastAccessDx(k) = urand.number();
4907 for (
unsigned int j=0; j<n; j++) {
4908 for (
unsigned int i=0; i<m; i++) {
4909 ScalarType
val = urand.number();
4916 Teuchos::BLAS<int,FadType> teuchos_blas;
4917 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4918 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4920 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4921 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4922 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4926 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4927 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4928 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4929 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4933 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4934 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4935 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4936 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4937 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4938 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4942 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4943 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4944 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4945 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4946 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4947 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4951 for (
unsigned int i=0; i<m; i++) {
4952 A[i*m+i].val() = 1.0;
4953 for (
unsigned int k=0; k<ndot; k++)
4956 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4957 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4958 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4959 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4960 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4961 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4967 template <
class FadType,
class ScalarType>
4971 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
4972 B4(m*n,ndot), B5(m*n,ndot);
4973 std::vector<ScalarType>
a(m*m);
4974 for (
unsigned int j=0; j<m; j++) {
4975 for (
unsigned int i=0; i<m; i++) {
4976 a[i+j*m] = urand.number();
4977 A[i+j*m] =
a[i+j*m];
4980 FadType alpha(ndot, urand.number());
4981 for (
unsigned int k=0; k<ndot; k++) {
4982 alpha.fastAccessDx(k) = urand.number();
4985 for (
unsigned int j=0; j<n; j++) {
4986 for (
unsigned int i=0; i<m; i++) {
4987 ScalarType
val = urand.number();
4993 for (
unsigned int k=0; k<ndot; k++) {
4994 val = urand.number();
4995 B1[i+j*m].fastAccessDx(k) =
val;
4996 B2[i+j*m].fastAccessDx(k) =
val;
4997 B3[i+j*m].fastAccessDx(k) =
val;
4998 B4[i+j*m].fastAccessDx(k) =
val;
4999 B5[i+j*m].fastAccessDx(k) =
val;
5004 Teuchos::BLAS<int,FadType> teuchos_blas;
5005 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5006 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
5008 Teuchos::BLAS<int,FadType> sacado_blas(
false);
5009 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5010 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
5014 unsigned int sz = m*m + m*n*(1+ndot);
5015 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
5016 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5017 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
5021 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5022 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
5026 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5027 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
5031 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5032 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
5033 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5034 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
5035 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5036 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
5037 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5038 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
5039 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5040 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
5046 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5047 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
5048 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5049 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
5050 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5051 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
5052 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5053 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
5054 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5055 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
5061 for (
unsigned int i=0; i<m; i++) {
5062 A[i*m+i].val() = 1.0;
5063 for (
unsigned int k=0; k<ndot; k++)
5066 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5067 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
5068 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5069 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
5070 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5071 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
5072 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5073 Teuchos::UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
5074 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5075 Teuchos::UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
5082 #undef COMPARE_VALUES 5084 #undef COMPARE_FAD_VECTORS 5086 #endif // FADBLASUNITTESTS_HPP
#define COMPARE_FAD_VECTORS(X1, X2, n)
Sacado::Random< ScalarType > urand
Sacado::Fad::DFad< double > FadType
CPPUNIT_TEST_SUITE(FadBLASUnitTests)
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
#define COMPARE_FADS(a, b)
Sacado::Random< double > real_urand
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
Sacado::Fad::Vector< unsigned int, FadType > VectorType