66 o2cblas_ConjTrans=113};
86 for(
size_t i=0;i<m;i++) {
87 for(
size_t j=0;j<n;j++) {
88 if (!std::isfinite(O2SCL_IX2(data,i,j)))
return false;
111 template<
class vec_t>
double dasum(
const size_t N,
const vec_t &X) {
113 for(
size_t i=0;i<N;i++) {
124 template<
class vec_t,
class vec2_t>
125 void daxpy(
const double alpha,
const size_t N,
const vec_t &X,
134 const size_t m=N % 4;
137 O2SCL_IX(Y,i)+=alpha*O2SCL_IX(X,i);
140 for (i=m;i+3<N;i+=4) {
141 O2SCL_IX(Y,i)+=alpha*O2SCL_IX(X,i);
142 O2SCL_IX(Y,i+1)+=alpha*O2SCL_IX(X,i+1);
143 O2SCL_IX(Y,i+2)+=alpha*O2SCL_IX(X,i+2);
144 O2SCL_IX(Y,i+3)+=alpha*O2SCL_IX(X,i+3);
149 template<
class vec_t,
class vec2_t>
150 double ddot(
const size_t N,
const vec_t &X,
const vec2_t &Y) {
155 const size_t m=N % 4;
158 r+=O2SCL_IX(X,i)*O2SCL_IX(Y,i);
161 for (i=m;i+3<N;i+=4) {
162 r+=O2SCL_IX(X,i)*O2SCL_IX(Y,i);
163 r+=O2SCL_IX(X,i+1)*O2SCL_IX(Y,i+1);
164 r+=O2SCL_IX(X,i+2)*O2SCL_IX(Y,i+2);
165 r+=O2SCL_IX(X,i+3)*O2SCL_IX(Y,i+3);
183 template<
class vec_t>
double dnrm2(
const size_t N,
const vec_t &X) {
192 return fabs(O2SCL_IX(X,0));
196 const double x=O2SCL_IX(X,i);
199 const double ax=fabs(x);
202 ssq=1.0+ssq*(scale/ax)*(scale/ax);
205 ssq+=(ax/scale)*(ax/scale);
211 return scale*sqrt(ssq);
216 template<
class vec_t>
217 void dscal(
const double alpha,
const size_t N, vec_t &X) {
220 const size_t m=N % 4;
223 O2SCL_IX(X,i)*=alpha;
226 for (i=m;i+3<N;i+=4) {
227 O2SCL_IX(X,i)*=alpha;
228 O2SCL_IX(X,i+1)*=alpha;
229 O2SCL_IX(X,i+2)*=alpha;
230 O2SCL_IX(X,i+3)*=alpha;
244 template<
class mat_t,
class vec_t,
class vec2_t>
247 const size_t N,
const double alpha,
const mat_t &A,
248 const vec_t &X,
const double beta, vec2_t &Y) {
254 const int Trans=(TransA != o2cblas_ConjTrans) ? TransA : o2cblas_Trans;
256 if (M == 0 || N == 0) {
260 if (alpha == 0.0 && beta == 1.0) {
264 if (Trans == o2cblas_NoTrans) {
275 for (i=0;i<lenY;i++) {
279 }
else if (beta != 1.0) {
281 for (i=0;i<lenY;i++) {
282 O2SCL_IX(Y,iy) *= beta;
291 if ((order == o2cblas_RowMajor && Trans == o2cblas_NoTrans) ||
292 (order == o2cblas_ColMajor && Trans == o2cblas_Trans)) {
296 for (i=0;i<lenY;i++) {
299 for (j=0;j<lenX;j++) {
300 temp+=O2SCL_IX(X,ix)*O2SCL_IX2(A,i,j);
303 O2SCL_IX(Y,iy)+=alpha*temp;
307 }
else if ((order == o2cblas_RowMajor && Trans == o2cblas_Trans) ||
308 (order == o2cblas_ColMajor && Trans == o2cblas_NoTrans)) {
312 for (j=0;j<lenX;j++) {
313 const double temp=alpha*O2SCL_IX(X,ix);
316 for (i=0;i<lenY;i++) {
317 O2SCL_IX(Y,iy)+=temp*O2SCL_IX2(A,j,i);
334 template<
class mat_t,
class vec_t>
339 const size_t M,
const size_t N,
const mat_t &A, vec_t &X) {
341 const int nonunit=(Diag == o2cblas_NonUnit);
344 const int Trans=(TransA != o2cblas_ConjTrans) ? TransA : o2cblas_Trans;
352 if ((order == o2cblas_RowMajor && Trans == o2cblas_NoTrans &&
353 Uplo == o2cblas_Upper) ||
354 (order == o2cblas_ColMajor && Trans == o2cblas_Trans &&
355 Uplo == o2cblas_Lower)) {
363 O2SCL_IX(X,ix)=O2SCL_IX(X,ix)/O2SCL_IX2(A,N-1,N-1);
367 for (i=(
int)(N-1);i>0 && i--;) {
368 double tmp=O2SCL_IX(X,ix);
370 for (j=i+1;j<((int)N);j++) {
371 const double Aij=O2SCL_IX2(A,i,j);
372 tmp-=Aij*O2SCL_IX(X,jx);
376 O2SCL_IX(X,ix)=tmp/O2SCL_IX2(A,i,i);
383 }
else if ((order == o2cblas_RowMajor && Trans == o2cblas_NoTrans &&
384 Uplo == o2cblas_Lower) ||
385 (order == o2cblas_ColMajor && Trans == o2cblas_Trans &&
386 Uplo == o2cblas_Upper)) {
391 O2SCL_IX(X,ix)=O2SCL_IX(X,ix)/O2SCL_IX2(A,0,0);
394 for (i=1;i<((int)N);i++) {
395 double tmp=O2SCL_IX(X,ix);
398 const double Aij=O2SCL_IX2(A,i,j);
399 tmp-=Aij*O2SCL_IX(X,jx);
403 O2SCL_IX(X,ix)=tmp/O2SCL_IX2(A,i,i);
410 }
else if ((order == o2cblas_RowMajor && Trans == o2cblas_Trans &&
411 Uplo == o2cblas_Upper) ||
412 (order == o2cblas_ColMajor && Trans == o2cblas_NoTrans &&
413 Uplo == o2cblas_Lower)) {
420 O2SCL_IX(X,ix)=O2SCL_IX(X,ix)/O2SCL_IX2(A,0,0);
423 for (i=1;i<((int)N);i++) {
424 double tmp=O2SCL_IX(X,ix);
427 const double Aji=O2SCL_IX2(A,j,i);
428 tmp-=Aji*O2SCL_IX(X,jx);
432 O2SCL_IX(X,ix)=tmp/O2SCL_IX2(A,i,i);
439 }
else if ((order == o2cblas_RowMajor && Trans == o2cblas_Trans &&
440 Uplo == o2cblas_Lower) ||
441 (order == o2cblas_ColMajor && Trans == o2cblas_NoTrans &&
442 Uplo == o2cblas_Upper)) {
449 O2SCL_IX(X,ix)=O2SCL_IX(X,ix)/O2SCL_IX2(A,N-1,N-1);
452 for (i=(
int)(N-1);i>0 && i--;) {
453 double tmp=O2SCL_IX(X,ix);
455 for (j=i+1;j<((int)N);j++) {
456 const double Aji=O2SCL_IX2(A,j,i);
457 tmp-=Aji*O2SCL_IX(X,jx);
461 O2SCL_IX(X,ix)=tmp/O2SCL_IX2(A,i,i);
476 template<
class mat_t,
class vec_t>
481 const mat_t &A, vec_t &x) {
485 const int nonunit=(Diag == o2cblas_NonUnit);
486 const int Trans=(TransA != o2cblas_ConjTrans) ? TransA : o2cblas_Trans;
488 if ((Order == o2cblas_RowMajor && Trans == o2cblas_NoTrans &&
489 Uplo == o2cblas_Upper) ||
490 (Order == o2cblas_ColMajor && Trans == o2cblas_Trans &&
491 Uplo == o2cblas_Lower)) {
497 const size_t j_min=i+1;
498 const size_t j_max=N;
500 for (j=j_min;j<j_max;j++) {
501 temp+=O2SCL_IX(x,jx)*O2SCL_IX2(A,i,j);
505 O2SCL_IX(x,i)=temp+O2SCL_IX(x,i)*O2SCL_IX2(A,i,i);
511 }
else if ((Order == o2cblas_RowMajor && Trans == o2cblas_NoTrans &&
512 Uplo == o2cblas_Lower) ||
513 (Order == o2cblas_ColMajor && Trans == o2cblas_Trans &&
514 Uplo == o2cblas_Upper)) {
519 for (i=N;i>0 && i--;) {
521 const size_t j_min=0;
522 const size_t j_max=i;
524 for (j=j_min;j<j_max;j++) {
525 temp+=O2SCL_IX(x,jx)*O2SCL_IX2(A,i,j);
529 O2SCL_IX(x,ix)=temp+O2SCL_IX(x,ix)*O2SCL_IX2(A,i,i);
531 O2SCL_IX(x,ix)+=temp;
536 }
else if ((Order == o2cblas_RowMajor && Trans == o2cblas_Trans &&
537 Uplo == o2cblas_Upper) ||
538 (Order == o2cblas_ColMajor && Trans == o2cblas_NoTrans &&
539 Uplo == o2cblas_Lower)) {
543 for (i=N;i>0 && i--;) {
545 const size_t j_min=0;
546 const size_t j_max=i;
548 for (j=j_min;j<j_max;j++) {
549 temp+=O2SCL_IX(x,jx)*O2SCL_IX2(A,j,i);
553 O2SCL_IX(x,ix)=temp+O2SCL_IX(x,ix)*O2SCL_IX2(A,i,i);
555 O2SCL_IX(x,ix)+=temp;
560 }
else if ((Order == o2cblas_RowMajor && Trans == o2cblas_Trans &&
561 Uplo == o2cblas_Lower) ||
562 (Order == o2cblas_ColMajor && Trans == o2cblas_NoTrans &&
563 Uplo == o2cblas_Upper)) {
567 const size_t j_min=i+1;
568 const size_t j_max=N;
570 for (j=j_min;j<j_max;j++) {
571 temp+=O2SCL_IX(x,jx)*O2SCL_IX2(A,j,i);
575 O2SCL_IX(x,i)=temp+O2SCL_IX(x,i)*O2SCL_IX2(A,i,i);
582 O2SCL_ERR(
"Unrecognized operation in dtrmv().",
603 template<
class mat_t>
607 const size_t N,
const size_t K,
const double alpha,
608 const mat_t &A,
const mat_t &B,
const double beta, mat_t &C) {
614 if (alpha == 0.0 && beta == 1.0) {
626 if (Order == o2cblas_RowMajor) {
635 O2SCL_IX2(C,i,j)=0.0;
638 }
else if (beta != 1.0) {
641 O2SCL_IX2(C,i,j)*=beta;
650 TransF=(TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
651 TransG=(TransB == o2cblas_ConjTrans) ? o2cblas_Trans : TransB;
653 if (TransF == o2cblas_NoTrans && TransG == o2cblas_NoTrans) {
659 const double temp=alpha*O2SCL_IX2(A,i,k);
662 O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(B,k,j);
668 }
else if (TransF == o2cblas_NoTrans && TransG == o2cblas_Trans) {
676 temp+=O2SCL_IX2(A,i,k)*O2SCL_IX2(B,j,k);
678 O2SCL_IX2(C,i,j)+=alpha*temp;
682 }
else if (TransF == o2cblas_Trans && TransG == o2cblas_NoTrans) {
686 const double temp=alpha*O2SCL_IX2(A,k,i);
689 O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(B,k,j);
695 }
else if (TransF == o2cblas_Trans && TransG == o2cblas_Trans) {
701 temp+=O2SCL_IX2(A,k,i)*O2SCL_IX2(B,j,k);
703 O2SCL_IX2(C,i,j)+=alpha*temp;
722 O2SCL_IX2(C,i,j)=0.0;
725 }
else if (beta != 1.0) {
728 O2SCL_IX2(C,i,j)*=beta;
737 TransF=(TransB == o2cblas_ConjTrans) ? o2cblas_Trans : TransB;
738 TransG=(TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
740 if (TransF == o2cblas_NoTrans && TransG == o2cblas_NoTrans) {
746 const double temp=alpha*O2SCL_IX2(B,i,k);
749 O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(A,k,j);
755 }
else if (TransF == o2cblas_NoTrans && TransG == o2cblas_Trans) {
763 temp+=O2SCL_IX2(B,i,k)*O2SCL_IX2(A,j,k);
765 O2SCL_IX2(C,i,j)+=alpha*temp;
769 }
else if (TransF == o2cblas_Trans && TransG == o2cblas_NoTrans) {
773 const double temp=alpha*O2SCL_IX2(B,k,i);
776 O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(A,k,j);
782 }
else if (TransF == o2cblas_Trans && TransG == o2cblas_Trans) {
788 temp+=O2SCL_IX2(B,k,i)*O2SCL_IX2(A,j,k);
790 O2SCL_IX2(C,i,j)+=alpha*temp;
814 template<
class mat_t>
820 const size_t M,
const size_t N,
821 const double alpha,
const mat_t &A, mat_t &B) {
826 const int nonunit = (Diag == o2cblas_NonUnit);
827 int side, uplo, trans;
829 if (Order == o2cblas_RowMajor) {
834 trans = (TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
838 side = (Side == o2cblas_Left) ? o2cblas_Right : o2cblas_Left;
839 uplo = (Uplo == o2cblas_Upper) ? o2cblas_Lower : o2cblas_Upper;
840 trans = (TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
843 if (side == o2cblas_Left && uplo == o2cblas_Upper &&
844 trans == o2cblas_NoTrans) {
849 for (i = 0; i < n1; i++) {
850 for (j = 0; j < n2; j++) {
851 O2SCL_IX2(B,i,j)*=alpha;
856 for (i = n1; i > 0 && i--;) {
858 double Aii = O2SCL_IX2(A,i,i);
859 for (j = 0; j < n2; j++) {
860 O2SCL_IX2(B,i,j)/=Aii;
864 for (k = 0; k < i; k++) {
865 const double Aki = O2SCL_IX2(A,k,i);
866 for (j = 0; j < n2; j++) {
867 O2SCL_IX2(B,k,j)-=Aki*O2SCL_IX2(B,i,j);
872 }
else if (side == o2cblas_Left && uplo == o2cblas_Upper &&
873 trans == o2cblas_Trans) {
878 for (i = 0; i < n1; i++) {
879 for (j = 0; j < n2; j++) {
880 O2SCL_IX2(B,i,j) *= alpha;
885 for (i = 0; i < n1; i++) {
887 double Aii = O2SCL_IX2(A,i,i);
888 for (j = 0; j < n2; j++) {
889 O2SCL_IX2(B,i,j) /= Aii;
893 for (k = i + 1; k < n1; k++) {
894 const double Aik = O2SCL_IX2(A,i,k);
895 for (j = 0; j < n2; j++) {
896 O2SCL_IX2(B,k,j) -= Aik * O2SCL_IX2(B,i,j);
901 }
else if (side == o2cblas_Left && uplo == o2cblas_Lower &&
902 trans == o2cblas_NoTrans) {
907 for (i = 0; i < n1; i++) {
908 for (j = 0; j < n2; j++) {
909 O2SCL_IX2(B,i,j) *= alpha;
914 for (i = 0; i < n1; i++) {
916 double Aii = O2SCL_IX2(A,i,i);
917 for (j = 0; j < n2; j++) {
918 O2SCL_IX2(B,i,j) /= Aii;
922 for (k = i + 1; k < n1; k++) {
923 const double Aki = O2SCL_IX2(A,k,i);
924 for (j = 0; j < n2; j++) {
925 O2SCL_IX2(B,k,j) -= Aki * O2SCL_IX2(B,i,j);
931 }
else if (side == o2cblas_Left && uplo == o2cblas_Lower &&
932 trans == o2cblas_Trans) {
937 for (i = 0; i < n1; i++) {
938 for (j = 0; j < n2; j++) {
939 O2SCL_IX2(B,i,j) *= alpha;
944 for (i = n1; i > 0 && i--;) {
946 double Aii = O2SCL_IX2(A,i,i);
947 for (j = 0; j < n2; j++) {
948 O2SCL_IX2(B,i,j) /= Aii;
952 for (k = 0; k < i; k++) {
953 const double Aik = O2SCL_IX2(A,i,k);
954 for (j = 0; j < n2; j++) {
955 O2SCL_IX2(B,k,j) -= Aik * O2SCL_IX2(B,i,j);
960 }
else if (side == o2cblas_Right && uplo == o2cblas_Upper &&
961 trans == o2cblas_NoTrans) {
966 for (i = 0; i < n1; i++) {
967 for (j = 0; j < n2; j++) {
968 O2SCL_IX2(B,i,j) *= alpha;
973 for (i = 0; i < n1; i++) {
974 for (j = 0; j < n2; j++) {
976 double Ajj = O2SCL_IX2(A,j,j);
977 O2SCL_IX2(B,i,j) /= Ajj;
981 double Bij = O2SCL_IX2(B,i,j);
982 for (k = j + 1; k < n2; k++) {
983 O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,j,k) * Bij;
989 }
else if (side == o2cblas_Right && uplo == o2cblas_Upper &&
990 trans == o2cblas_Trans) {
995 for (i = 0; i < n1; i++) {
996 for (j = 0; j < n2; j++) {
997 O2SCL_IX2(B,i,j) *= alpha;
1002 for (i = 0; i < n1; i++) {
1003 for (j = n2; j > 0 && j--;) {
1006 double Ajj = O2SCL_IX2(A,j,j);
1007 O2SCL_IX2(B,i,j) /= Ajj;
1011 double Bij = O2SCL_IX2(B,i,j);
1012 for (k = 0; k < j; k++) {
1013 O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,k,j) * Bij;
1019 }
else if (side == o2cblas_Right && uplo == o2cblas_Lower &&
1020 trans == o2cblas_NoTrans) {
1025 for (i = 0; i < n1; i++) {
1026 for (j = 0; j < n2; j++) {
1027 O2SCL_IX2(B,i,j) *= alpha;
1032 for (i = 0; i < n1; i++) {
1033 for (j = n2; j > 0 && j--;) {
1036 double Ajj = O2SCL_IX2(A,j,j);
1037 O2SCL_IX2(B,i,j) /= Ajj;
1041 double Bij = O2SCL_IX2(B,i,j);
1042 for (k = 0; k < j; k++) {
1043 O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,j,k) * Bij;
1049 }
else if (side == o2cblas_Right && uplo == o2cblas_Lower &&
1050 trans == o2cblas_Trans) {
1056 for (i = 0; i < n1; i++) {
1057 for (j = 0; j < n2; j++) {
1058 O2SCL_IX2(B,i,j) *= alpha;
1063 for (i = 0; i < n1; i++) {
1064 for (j = 0; j < n2; j++) {
1066 double Ajj = O2SCL_IX2(A,j,j);
1067 O2SCL_IX2(B,i,j) /= Ajj;
1071 double Bij = O2SCL_IX2(B,i,j);
1072 for (k = j + 1; k < n2; k++) {
1073 O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,k,j) * Bij;
1102 template<
class vec_t,
class vec2_t>
1104 vec2_t &Y,
const size_t ie) {
1108 if (alpha == 0.0)
return;
1109 #if O2SCL_NO_RANGE_CHECK
1116 const size_t m=(N-ie) % 4;
1118 for (i=ie;i<ie+m;i++) {
1119 O2SCL_IX(Y,i)+=alpha*O2SCL_IX(X,i);
1123 O2SCL_IX(Y,i)+=alpha*O2SCL_IX(X,i);
1124 O2SCL_IX(Y,i+1)+=alpha*O2SCL_IX(X,i+1);
1125 O2SCL_IX(Y,i+2)+=alpha*O2SCL_IX(X,i+2);
1126 O2SCL_IX(Y,i+3)+=alpha*O2SCL_IX(X,i+3);
1139 template<
class vec_t,
class vec2_t>
1145 #if O2SCL_NO_RANGE_CHECK
1152 const size_t m=(N-ie) % 4;
1154 for (i=ie;i<ie+m;i++) {
1155 r+=O2SCL_IX(X,i)*O2SCL_IX(Y,i);
1159 r+=O2SCL_IX(X,i)*O2SCL_IX(Y,i);
1160 r+=O2SCL_IX(X,i+1)*O2SCL_IX(Y,i+1);
1161 r+=O2SCL_IX(X,i+2)*O2SCL_IX(Y,i+2);
1162 r+=O2SCL_IX(X,i+3)*O2SCL_IX(Y,i+3);
1180 template<
class vec_t>
1186 #if O2SCL_NO_RANGE_CHECK
1194 return fabs(O2SCL_IX(X,ie));
1197 for (
size_t i=ie;i<N;i++) {
1198 const double x=O2SCL_IX(X,i);
1201 const double ax=fabs(x);
1204 ssq=1.0+ssq*(scale/ax)*(scale/ax);
1207 ssq+=(ax/scale)*(ax/scale);
1213 return scale*sqrt(ssq);
1225 template<
class vec_t>
1229 #if O2SCL_NO_RANGE_CHECK
1237 const size_t m=(N-ie) % 4;
1239 for (i=ie;i<ie+m;i++) {
1240 O2SCL_IX(X,i)*=alpha;
1244 O2SCL_IX(X,i)*=alpha;
1245 O2SCL_IX(X,i+1)*=alpha;
1246 O2SCL_IX(X,i+2)*=alpha;
1247 O2SCL_IX(X,i+3)*=alpha;
1264 template<
class mat_t,
class vec_t>
1266 const size_t ir,
const size_t ic, vec_t &y) {
1268 #if O2SCL_NO_RANGE_CHECK
1280 const size_t m=(M-ir) % 4;
1282 for (i=ir;i<m+ir;i++) {
1283 O2SCL_IX(y,i)+=alpha*O2SCL_IX2(X,i,ic);
1287 O2SCL_IX(y,i)+=alpha*O2SCL_IX2(X,i,ic);
1288 O2SCL_IX(y,i+1)+=alpha*O2SCL_IX2(X,i+1,ic);
1289 O2SCL_IX(y,i+2)+=alpha*O2SCL_IX2(X,i+2,ic);
1290 O2SCL_IX(y,i+3)+=alpha*O2SCL_IX2(X,i+3,ic);
1306 template<
class mat_t,
class vec_t>
1308 const size_t ic,
const vec_t &y) {
1309 #if O2SCL_NO_RANGE_CHECK
1318 const size_t m=(M-ir) % 4;
1320 for (i=ir;i<m+ir;i++) {
1321 r+=O2SCL_IX2(X,i,ic)*O2SCL_IX(y,i);
1325 r+=O2SCL_IX2(X,i,ic)*O2SCL_IX(y,i);
1326 r+=O2SCL_IX2(X,i+1,ic)*O2SCL_IX(y,i+1);
1327 r+=O2SCL_IX2(X,i+2,ic)*O2SCL_IX(y,i+2);
1328 r+=O2SCL_IX2(X,i+3,ic)*O2SCL_IX(y,i+3);
1349 template<
class mat_t>
1357 #if O2SCL_NO_RANGE_CHECK
1366 return fabs(O2SCL_IX2(A,ir,ic));
1369 for (i=ir;i<M;i++) {
1370 const double x=O2SCL_IX2(A,i,ic);
1373 const double ax=fabs(x);
1376 ssq=1.0+ssq*(scale/ax)*(scale/ax);
1379 ssq+=(ax/scale)*(ax/scale);
1385 return scale*sqrt(ssq);
1398 template<
class mat_t>
1400 const size_t M,
const double alpha) {
1402 #if O2SCL_NO_RANGE_CHECK
1410 const size_t m=(M-ir) % 4;
1412 for (i=ir;i<m+ir;i++) {
1413 O2SCL_IX2(A,i,ic)*=alpha;
1417 O2SCL_IX2(A,i,ic)*=alpha;
1418 O2SCL_IX2(A,i+1,ic)*=alpha;
1419 O2SCL_IX2(A,i+2,ic)*=alpha;
1420 O2SCL_IX2(A,i+3,ic)*=alpha;
1436 template<
class mat_t>
1440 #if O2SCL_NO_RANGE_CHECK
1449 const size_t m=(M-ir) % 4;
1451 for (i=ir;i<m+ir;i++) {
1452 r+=fabs(O2SCL_IX2(A,i,ic));
1456 r+=fabs(O2SCL_IX2(A,i,ic));
1457 r+=fabs(O2SCL_IX2(A,i+1,ic));
1458 r+=fabs(O2SCL_IX2(A,i+2,ic));
1459 r+=fabs(O2SCL_IX2(A,i+3,ic));
1482 template<
class mat_t,
class vec_t>
1484 const size_t ir,
const size_t ic, vec_t &Y) {
1486 #if O2SCL_NO_RANGE_CHECK
1498 const size_t m=(N-ic) % 4;
1500 for (i=ic;i<m+ic;i++) {
1501 O2SCL_IX(Y,i)+=alpha*O2SCL_IX2(X,ir,i);
1505 O2SCL_IX(Y,i)+=alpha*O2SCL_IX2(X,ir,i);
1506 O2SCL_IX(Y,i+1)+=alpha*O2SCL_IX2(X,ir,i+1);
1507 O2SCL_IX(Y,i+2)+=alpha*O2SCL_IX2(X,ir,i+2);
1508 O2SCL_IX(Y,i+3)+=alpha*O2SCL_IX2(X,ir,i+3);
1528 template<
class mat_t,
class vec_t>
1530 const size_t ic,
const vec_t &Y) {
1532 #if O2SCL_NO_RANGE_CHECK
1541 const size_t m=(N-ic) % 4;
1543 for (i=ic;i<m+ic;i++) {
1544 r+=O2SCL_IX2(X,ir,i)*O2SCL_IX(Y,i);
1548 r+=O2SCL_IX2(X,ir,i)*O2SCL_IX(Y,i);
1549 r+=O2SCL_IX2(X,ir,i+1)*O2SCL_IX(Y,i+1);
1550 r+=O2SCL_IX2(X,ir,i+2)*O2SCL_IX(Y,i+2);
1551 r+=O2SCL_IX2(X,ir,i+3)*O2SCL_IX(Y,i+3);
1567 template<
class mat_t>
1576 return fabs(O2SCL_IX2(M,ir,ic));
1579 for (i=ic;i<N;i++) {
1580 const double x=O2SCL_IX2(M,ir,i);
1583 const double ax=fabs(x);
1586 ssq=1.0+ssq*(scale/ax)*(scale/ax);
1589 ssq+=(ax/scale)*(ax/scale);
1595 return scale*sqrt(ssq);
1610 template<
class mat_t>
1612 const size_t N,
const double alpha) {
1614 #if O2SCL_NO_RANGE_CHECK
1622 const size_t m=(N-ic) % 4;
1624 for (i=ic;i<m+ic;i++) {
1625 O2SCL_IX2(A,ir,i)*=alpha;
1629 O2SCL_IX2(A,ir,i)*=alpha;
1630 O2SCL_IX2(A,ir,i+1)*=alpha;
1631 O2SCL_IX2(A,ir,i+2)*=alpha;
1632 O2SCL_IX2(A,ir,i+3)*=alpha;
1639 #ifdef O2SCL_NEVER_DEFINED
1656 template<
class mat_t>
1660 const size_t N,
const size_t K,
const double alpha,
1661 const mat_t &A,
const mat_t &B,
1662 const double beta, mat_t &C,
size_t rstarta,
1663 size_t cstarta,
size_t rstartb,
size_t cstartb,
1664 size_t rstartc,
size_t cstartc) {
1670 if (alpha == 0.0 && beta == 1.0) {
1682 if (Order == o2cblas_RowMajor) {
1689 for (i=rstartc;i<n1;i++) {
1690 for (j=cstartc;j<n2;j++) {
1691 O2SCL_IX2(C,i,j)=0.0;
1694 }
else if (beta != 1.0) {
1695 for (i=rstartc;i<n1;i++) {
1696 for (j=cstartc;j<n2;j++) {
1697 O2SCL_IX2(C,i,j)*=beta;
1706 TransF=(TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
1707 TransG=(TransB == o2cblas_ConjTrans) ? o2cblas_Trans : TransB;
1709 if (TransF == o2cblas_NoTrans && TransG == o2cblas_NoTrans) {
1713 for (k=cstarta;k<K;k++) {
1714 for (i=rstarta;i<n1;i++) {
1715 const double temp=alpha*O2SCL_IX2(A,i,k);
1717 for (j=rstartc;j<n2;j++) {
1718 O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(B,k,j);
1724 }
else if (TransF == o2cblas_NoTrans && TransG == o2cblas_Trans) {
1728 for (i=mstart;i<n1;i++) {
1729 for (j=nstart;j<n2;j++) {
1731 for (k=kstart;k<K;k++) {
1732 temp+=O2SCL_IX2(A,i,k)*O2SCL_IX2(B,j,k);
1734 O2SCL_IX2(C,i,j)+=alpha*temp;
1738 }
else if (TransF == o2cblas_Trans && TransG == o2cblas_NoTrans) {
1740 for (k=kstart;k<K;k++) {
1741 for (i=mstart;i<n1;i++) {
1742 const double temp=alpha*O2SCL_IX2(A,k,i);
1744 for (j=nstart;j<n2;j++) {
1745 O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(B,k,j);
1751 }
else if (TransF == o2cblas_Trans && TransG == o2cblas_Trans) {
1753 for (i=mstart;i<n1;i++) {
1754 for (j=nstart;j<n2;j++) {
1756 for (k=kstart;k<K;k++) {
1757 temp+=O2SCL_IX2(A,k,i)*O2SCL_IX2(B,j,k);
1759 O2SCL_IX2(C,i,j)+=alpha*temp;
1764 O2SCL_ERR(
"Unrecognized operation in dgemm_submat().",
1777 for (i=nstart;i<n1;i++) {
1778 for (j=mstart;j<n2;j++) {
1779 O2SCL_IX2(C,i,j)=0.0;
1782 }
else if (beta != 1.0) {
1783 for (i=nstart;i<n1;i++) {
1784 for (j=mstart;j<n2;j++) {
1785 O2SCL_IX2(C,i,j)*=beta;
1794 TransF=(TransB == o2cblas_ConjTrans) ? o2cblas_Trans : TransB;
1795 TransG=(TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
1797 if (TransF == o2cblas_NoTrans && TransG == o2cblas_NoTrans) {
1801 for (k=kstart;k<K;k++) {
1802 for (i=nstart;i<n1;i++) {
1803 const double temp=alpha*O2SCL_IX2(B,i,k);
1805 for (j=mstart;j<n2;j++) {
1806 O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(A,k,j);
1812 }
else if (TransF == o2cblas_NoTrans && TransG == o2cblas_Trans) {
1816 for (i=nstart;i<n1;i++) {
1817 for (j=mstart;j<n2;j++) {
1819 for (k=kstart;k<K;k++) {
1820 temp+=O2SCL_IX2(B,i,k)*O2SCL_IX2(A,j,k);
1822 O2SCL_IX2(C,i,j)+=alpha*temp;
1826 }
else if (TransF == o2cblas_Trans && TransG == o2cblas_NoTrans) {
1828 for (k=kstart;k<K;k++) {
1829 for (i=nstart;i<n1;i++) {
1830 const double temp=alpha*O2SCL_IX2(B,k,i);
1832 for (j=mstart;j<n2;j++) {
1833 O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(A,k,j);
1839 }
else if (TransF == o2cblas_Trans && TransG == o2cblas_Trans) {
1841 for (i=nstart;i<n1;i++) {
1842 for (j=mstart;j<n2;j++) {
1844 for (k=kstart;k<K;k++) {
1845 temp+=O2SCL_IX2(B,k,i)*O2SCL_IX2(A,j,k);
1847 O2SCL_IX2(C,i,j)+=alpha*temp;
1852 O2SCL_ERR(
"Unrecognized operation in dgemm_submat().",
1867 template<
class mat_t>
1873 const size_t M,
const size_t N,
const double alpha,
1874 const mat_t &A, mat_t &B,
size_t mstart,
size_t nstart) {
1878 size_t istart, jstart;
1880 const int nonunit = (Diag == o2cblas_NonUnit);
1881 int side, uplo, trans;
1883 if (Order == o2cblas_RowMajor) {
1888 trans = (TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
1894 side = (Side == o2cblas_Left) ? o2cblas_Right : o2cblas_Left;
1895 uplo = (Uplo == o2cblas_Upper) ? o2cblas_Lower : o2cblas_Upper;
1896 trans = (TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
1901 if (side == o2cblas_Left && uplo == o2cblas_Upper &&
1902 trans == o2cblas_NoTrans) {
1907 for (i = istart; i < n1; i++) {
1908 for (j = jstart; j < n2; j++) {
1909 O2SCL_IX2(B,i,j)*=alpha;
1914 for (i = n1; i > istart && i--;) {
1916 double Aii = O2SCL_IX2(A,i,i);
1917 for (j = jstart; j < n2; j++) {
1918 O2SCL_IX2(B,i,j)/=Aii;
1922 for (k = 0; k < i; k++) {
1923 const double Aki = O2SCL_IX2(A,k,i);
1924 for (j = jstart; j < n2; j++) {
1925 O2SCL_IX2(B,k,j)-=Aki*O2SCL_IX2(B,i,j);
1930 }
else if (side == o2cblas_Left && uplo == o2cblas_Upper &&
1931 trans == o2cblas_Trans) {
1936 for (i = istart; i < n1; i++) {
1937 for (j = jstart; j < n2; j++) {
1938 O2SCL_IX2(B,i,j) *= alpha;
1943 for (i = istart; i < n1; i++) {
1945 double Aii = O2SCL_IX2(A,i,i);
1946 for (j = jstart; j < n2; j++) {
1947 O2SCL_IX2(B,i,j) /= Aii;
1951 for (k = i + 1; k < n1; k++) {
1952 const double Aik = O2SCL_IX2(A,i,k);
1953 for (j = jstart; j < n2; j++) {
1954 O2SCL_IX2(B,k,j) -= Aik * O2SCL_IX2(B,i,j);
1959 }
else if (side == o2cblas_Left && uplo == o2cblas_Lower &&
1960 trans == o2cblas_NoTrans) {
1965 for (i = istart; i < n1; i++) {
1966 for (j = jstart; j < n2; j++) {
1967 O2SCL_IX2(B,i,j) *= alpha;
1972 for (i = istart; i < n1; i++) {
1974 double Aii = O2SCL_IX2(A,i,i);
1975 for (j = jstart; j < n2; j++) {
1976 O2SCL_IX2(B,i,j) /= Aii;
1980 for (k = i + 1; k < n1; k++) {
1981 const double Aki = O2SCL_IX2(A,k,i);
1982 for (j = jstart; j < n2; j++) {
1983 O2SCL_IX2(B,k,j) -= Aki * O2SCL_IX2(B,i,j);
1989 }
else if (side == o2cblas_Left && uplo == o2cblas_Lower &&
1990 trans == o2cblas_Trans) {
1995 for (i = istart; i < n1; i++) {
1996 for (j = jstart; j < n2; j++) {
1997 O2SCL_IX2(B,i,j) *= alpha;
2002 for (i = n1; i > istart && i--;) {
2004 double Aii = O2SCL_IX2(A,i,i);
2005 for (j = jstart; j < n2; j++) {
2006 O2SCL_IX2(B,i,j) /= Aii;
2010 for (k = 0; k < i; k++) {
2011 const double Aik = O2SCL_IX2(A,i,k);
2012 for (j = jstart; j < n2; j++) {
2013 O2SCL_IX2(B,k,j) -= Aik * O2SCL_IX2(B,i,j);
2018 }
else if (side == o2cblas_Right && uplo == o2cblas_Upper &&
2019 trans == o2cblas_NoTrans) {
2024 for (i = istart; i < n1; i++) {
2025 for (j = jstart; j < n2; j++) {
2026 O2SCL_IX2(B,i,j) *= alpha;
2031 for (i = istart; i < n1; i++) {
2032 for (j = jstart; j < n2; j++) {
2034 double Ajj = O2SCL_IX2(A,j,j);
2035 O2SCL_IX2(B,i,j) /= Ajj;
2039 double Bij = O2SCL_IX2(B,i,j);
2040 for (k = j + 1; k < n2; k++) {
2041 O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,j,k) * Bij;
2047 }
else if (side == o2cblas_Right && uplo == o2cblas_Upper &&
2048 trans == o2cblas_Trans) {
2053 for (i = istart; i < n1; i++) {
2054 for (j = jstart; j < n2; j++) {
2055 O2SCL_IX2(B,i,j) *= alpha;
2060 for (i = istart; i < n1; i++) {
2061 for (j = n2; j > jstart && j--;) {
2064 double Ajj = O2SCL_IX2(A,j,j);
2065 O2SCL_IX2(B,i,j) /= Ajj;
2069 double Bij = O2SCL_IX2(B,i,j);
2070 for (k = 0; k < j; k++) {
2071 O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,k,j) * Bij;
2077 }
else if (side == o2cblas_Right && uplo == o2cblas_Lower &&
2078 trans == o2cblas_NoTrans) {
2083 for (i = istart; i < n1; i++) {
2084 for (j = jstart; j < n2; j++) {
2085 O2SCL_IX2(B,i,j) *= alpha;
2090 for (i = istart; i < n1; i++) {
2091 for (j = n2; j > jstart && j--;) {
2094 double Ajj = O2SCL_IX2(A,j,j);
2095 O2SCL_IX2(B,i,j) /= Ajj;
2099 double Bij = O2SCL_IX2(B,i,j);
2100 for (k = 0; k < j; k++) {
2101 O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,j,k) * Bij;
2107 }
else if (side == o2cblas_Right && uplo == o2cblas_Lower &&
2108 trans == o2cblas_Trans) {
2114 for (i = istart; i < n1; i++) {
2115 for (j = jstart; j < n2; j++) {
2116 O2SCL_IX2(B,i,j) *= alpha;
2121 for (i = istart; i < n1; i++) {
2122 for (j = jstart; j < n2; j++) {
2124 double Ajj = O2SCL_IX2(A,j,j);
2125 O2SCL_IX2(B,i,j) /= Ajj;
2129 double Bij = O2SCL_IX2(B,i,j);
2130 for (k = j + 1; k < n2; k++) {
2131 O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,k,j) * Bij;