00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00045 #ifndef MAT_GBLAS_HEADER
00046 #define MAT_GBLAS_HEADER
00047 #include <ctime>
00048 #include "Failure.h"
00049
00050
00051 #include "config.h"
00052
00053 #include "template_lapack_common.h"
00054
00055 #ifdef USE_SSE_INTRINSICS
00056 #include "Memory_buffer_thread.h"
00057 #include "gemm_sse/gemm_sse.h"
00058 #endif
00059
00060
00061 extern "C" void dgemm_(const char *ta,const char *tb,
00062 const int *n, const int *k, const int *l,
00063 const double *alpha,const double *A,const int *lda,
00064 const double *B, const int *ldb,
00065 const double *beta, double *C, const int *ldc);
00066 extern "C" void dpptrf_(const char *uplo,const int *n, double* ap, int *info);
00067 extern "C" void dspgst_(const int *itype, const char *uplo,const int *n,
00068 double* ap,const double *bp,int *info);
00069 extern "C" void dtptri_(const char *uplo,const char *diag,const int *n,
00070 double* ap,int *info);
00071
00072
00073 extern "C" void dtrmm_(const char *side,const char *uplo,const char *transa,
00074 const char *diag,const int *m,const int *n,
00075 const double *alpha,const double *A,const int *lda,
00076 double *B,const int *ldb);
00077 extern "C" void dsygv_(const int *itype,const char *jobz,
00078 const char *uplo,const int *n,
00079 double *A,const int *lda,double *B,const int *ldb,
00080 double* w,double* work,const int *lwork,int *info);
00081 extern "C" void dggev_(const char *jobbl, const char *jobvr, const int *n,
00082 double *A, const int *lda, double *B, const int *ldb,
00083 double *alphar, double *alphai, double *beta,
00084 double *vl, const int *ldvl,
00085 double *vr, const int *ldvr,
00086 double *work, const int *lwork, int *info);
00087 extern "C" void dpotrf_(const char *uplo, const int *n, double *A,
00088 const int *lda, int *info);
00089 extern "C" void dtrtri_(const char *uplo,const char *diag,const int *n,
00090 double *A, const int *lda, int *info);
00091 extern "C" void dsyrk_(const char *uplo, const char *trans, const int *n,
00092 const int *k, const double *alpha, const double *A,
00093 const int *lda, const double *beta,
00094 double *C, const int *ldc);
00095 extern "C" void dsymm_(const char *side,const char *uplo,
00096 const int *m,const int *n,
00097 const double *alpha,const double *A,const int *lda,
00098 const double *B,const int *ldb, const double* beta,
00099 double *C,const int *ldc);
00100 extern "C" void dpocon_(const char *uplo, const int *n, const double *A,
00101 const int *lda, const double *anorm, double *rcond,
00102 double *work, int *iwork, int *info);
00103 extern "C" void dstevx_(const char *jobz, const char *range, const int *n,
00104 double *d, double *e, const double *vl,
00105 const double *vu, const int *il, const int *iu,
00106 const double *abstol, int *m, double *w, double *z,
00107 const int *ldz, double *work, int *iwork, int *ifail,
00108 int *info);
00109 extern "C" void dstevr_(const char *jobz, const char *range, const int *n,
00110 double *d, double *e, const double *vl,
00111 const double *vu, const int *il, const int *iu,
00112 const double *abstol, int *m, double *w, double *z,
00113 const int *ldz, int* isuppz, double *work, int* lwork,
00114 int *iwork, int* liwork, int *info);
00115 extern "C" void dsyev_(const char *jobz, const char *uplo, const int *n,
00116 double *a, const int *lda, double *w, double *work,
00117 const int *lwork, int *info);
00118
00119
00120 extern "C" void dgemv_(const char *ta, const int *m, const int *n,
00121 const double *alpha, const double *A, const int *lda,
00122 const double *x, const int *incx, const double *beta,
00123 double *y, const int *incy);
00124 extern "C" void dsymv_(const char *uplo, const int *n,
00125 const double *alpha, const double *A, const int *lda,
00126 const double *x, const int *incx, const double *beta,
00127 double *y, const int *incy);
00128 extern "C" void dtrmv_(const char *uplo, const char *trans, const char *diag,
00129 const int *n, const double *A, const int *lda,
00130 double *x, const int *incx);
00131
00132 extern "C" void dscal_(const int* n,const double* da, double* dx,
00133 const int* incx);
00134 extern "C" double ddot_(const int* n, const double* dx, const int* incx,
00135 const double* dy, const int* incy);
00136 extern "C" void daxpy_(const int* n, const double* da, const double* dx,
00137 const int* incx, double* dy,const int* incy);
00138
00139
00140
00141 extern "C" void sgemm_(const char *ta,const char *tb,
00142 const int *n, const int *k, const int *l,
00143 const float *alpha,const float *A,const int *lda,
00144 const float *B, const int *ldb,
00145 const float *beta, float *C, const int *ldc);
00146 extern "C" void spptrf_(const char *uplo,const int *n, float* ap, int *info);
00147 extern "C" void sspgst_(const int *itype, const char *uplo,const int *n,
00148 float* ap,const float *bp,int *info);
00149 extern "C" void stptri_(const char *uplo,const char *diag,const int *n,
00150 float* ap,int *info);
00151
00152
00153 extern "C" void strmm_(const char *side,const char *uplo,const char *transa,
00154 const char *diag,const int *m,const int *n,
00155 const float *alpha,const float *A,const int *lda,
00156 float *B,const int *ldb);
00157 extern "C" void ssygv_(const int *itype,const char *jobz,
00158 const char *uplo,const int *n,
00159 float *A,const int *lda,float *B,const int *ldb,
00160 float* w,float* work,const int *lwork,int *info);
00161 extern "C" void sggev_(const char *jobbl, const char *jobvr, const int *n,
00162 float *A, const int *lda, float *B, const int *ldb,
00163 float *alphar, float *alphai, float *beta,
00164 float *vl, const int *ldvl,
00165 float *vr, const int *ldvr,
00166 float *work, const int *lwork, int *info);
00167 extern "C" void spotrf_(const char *uplo, const int *n, float *A,
00168 const int *lda, int *info);
00169 extern "C" void strtri_(const char *uplo,const char *diag,const int *n,
00170 float *A, const int *lda, int *info);
00171 extern "C" void ssyrk_(const char *uplo, const char *trans, const int *n,
00172 const int *k, const float *alpha, const float *A,
00173 const int *lda, const float *beta,
00174 float *C, const int *ldc);
00175 extern "C" void ssymm_(const char *side,const char *uplo,
00176 const int *m,const int *n,
00177 const float *alpha,const float *A,const int *lda,
00178 const float *B,const int *ldb, const float* beta,
00179 float *C,const int *ldc);
00180 extern "C" void spocon_(const char *uplo, const int *n, const float *A,
00181 const int *lda, const float *anorm, float *rcond,
00182 float *work, int *iwork, int *info);
00183 extern "C" void sstevx_(const char *jobz, const char *range, const int *n,
00184 float *d, float *e, const float *vl,
00185 const float *vu, const int *il, const int *iu,
00186 const float *abstol, int *m, float *w, float *z,
00187 const int *ldz, float *work, int *iwork, int *ifail,
00188 int *info);
00189 extern "C" void sstevr_(const char *jobz, const char *range, const int *n,
00190 float *d, float *e, const float *vl,
00191 const float *vu, const int *il, const int *iu,
00192 const float *abstol, int *m, float *w, float *z,
00193 const int *ldz, int* isuppz, float *work, int* lwork,
00194 int *iwork, int* liwork, int *info);
00195 extern "C" void ssyev_(const char *jobz, const char *uplo, const int *n,
00196 float *a, const int *lda, float *w, float *work,
00197 const int *lwork, int *info);
00198
00199
00200 extern "C" void sgemv_(const char *ta, const int *m, const int *n,
00201 const float *alpha, const float *A, const int *lda,
00202 const float *x, const int *incx, const float *beta,
00203 float *y, const int *incy);
00204 extern "C" void ssymv_(const char *uplo, const int *n,
00205 const float *alpha, const float *A, const int *lda,
00206 const float *x, const int *incx, const float *beta,
00207 float *y, const int *incy);
00208 extern "C" void strmv_(const char *uplo, const char *trans, const char *diag,
00209 const int *n, const float *A, const int *lda,
00210 float *x, const int *incx);
00211
00212 extern "C" void sscal_(const int* n,const float* da, float* dx,
00213 const int* incx);
00214 #if 0
00215
00216
00217 extern "C" double sdot_(const int* n, const float* dx, const int* incx,
00218 const float* dy, const int* incy);
00219 #endif
00220 extern "C" void saxpy_(const int* n, const float* da, const float* dx,
00221 const int* incx, float* dy,const int* incy);
00222
00223 namespace mat
00224 {
00225 struct Gblas {
00226 static float time;
00227 static bool timekeeping;
00228 };
00229
00230
00231 template<class T>
00232 inline static void gemm(const char *ta,const char *tb,
00233 const int *n, const int *k, const int *l,
00234 const T *alpha,const T *A,const int *lda,
00235 const T *B, const int *ldb,
00236 const T *beta,T *C, const int *ldc) {
00237 #ifdef USE_SSE_INTRINSICS
00238 if (*ta == 'N' && *tb == 'N' && *n == 32 && *k == 32 && *l == 32 && *alpha == 1.0 && *beta == 1) {
00239 static T * A_packed;
00240 static T * B_packed;
00241 static T * C_packed;
00242 int pack_max_size = 10000;
00243 if ( (pack_max_size*sizeof(T))%16 )
00244 throw std::runtime_error("In gblas gemm: requested buffer size not multiple of 16 bytes");
00245 static T * buffer;
00246 Memory_buffer_thread::instance().get_buffer(pack_max_size*3, buffer);
00247 A_packed = buffer;
00248 B_packed = buffer + pack_max_size;
00249 C_packed = buffer + 2*pack_max_size;
00250 gemm_sse<T>(A,B,C,32,32,32,
00251 A_packed, B_packed, C_packed,
00252 pack_max_size, pack_max_size, pack_max_size);
00253 }
00254 else
00255 #endif
00256 template_blas_gemm(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
00257 }
00258
00259
00260
00261
00262 template<class T>
00263 inline static void pptrf(const char *uplo,const int *n, T* ap, int *info) {
00264 template_lapack_pptrf(uplo,n,ap,info);
00265 }
00266
00267 template<class T>
00268 inline static void spgst(const int *itype, const char *uplo,const int *n,
00269 T* ap,const T *bp,int *info) {
00270 template_lapack_spgst(itype,uplo,n,ap,bp,info);
00271 }
00272
00273
00274 template<class T>
00275 inline static void tptri(const char *uplo,const char *diag,const int *n,
00276 T* ap,int *info) {
00277 template_lapack_tptri(uplo,diag,n,ap,info);
00278 }
00279
00280 template<class T>
00281 inline static void trmm(const char *side,const char *uplo,
00282 const char *transa, const char *diag,
00283 const int *m,const int *n,
00284 const T *alpha,const T *A,const int *lda,
00285 T *B,const int *ldb) {
00286 template_blas_trmm(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
00287 }
00288
00289
00290
00291
00292 template<class T>
00293 inline static void sygv(const int *itype,const char *jobz,
00294 const char *uplo,const int *n,
00295 T *A,const int *lda,T *B,const int *ldb,
00296 T* w,T* work,const int *lwork,int *info) {
00297 template_lapack_sygv(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
00298 }
00299
00300 template<class T>
00301 inline static void ggev(const char *jobbl, const char *jobvr,
00302 const int *n, T *A, const int *lda,
00303 T *B, const int *ldb, T *alphar,
00304 T *alphai, T *beta, T *vl,
00305 const int *ldvl, T *vr, const int *ldvr,
00306 T *work, const int *lwork, int *info) {
00307 template_lapack_ggev(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
00308 ldvl, vr, ldvr, work, lwork, info);
00309 }
00310
00311
00312
00313 template<class T>
00314 inline static void potrf(const char *uplo, const int *n, T *A,
00315 const int *lda, int *info) {
00316 template_lapack_potrf(uplo, n, A, lda, info);
00317 }
00318
00319
00320 template<class T>
00321 inline static void trtri(const char *uplo,const char *diag,const int *n,
00322 T *A, const int *lda, int *info) {
00323
00324 char uploCopy[2];
00325 char diagCopy[2];
00326 uploCopy[0] = uplo[0];
00327 uploCopy[1] = '\0';
00328 diagCopy[0] = diag[0];
00329 diagCopy[1] = '\0';
00330 template_lapack_trtri(uploCopy, diagCopy, n, A, lda, info);
00331 }
00332
00333 template<class T>
00334 inline static void syrk(const char *uplo, const char *trans, const int *n,
00335 const int *k, const T *alpha, const T *A,
00336 const int *lda, const T *beta,
00337 T *C, const int *ldc) {
00338 template_blas_syrk(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
00339 }
00340
00341 template<class T>
00342 inline static void symm(const char *side,const char *uplo,
00343 const int *m,const int *n,
00344 const T *alpha,const T *A,const int *lda,
00345 const T *B,const int *ldb, const T* beta,
00346 T *C,const int *ldc) {
00347 template_blas_symm(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
00348 }
00349
00350 template<class T>
00351 inline static void pocon(const char *uplo, const int *n, const T *A,
00352 const int *lda, const T *anorm, T *rcond,
00353 T *work, int *iwork, int *info) {
00354 template_lapack_pocon(uplo, n, A, lda, anorm, rcond, work, iwork, info);
00355 }
00356
00357 template<class T>
00358 inline static void stevx(const char *jobz, const char *range,
00359 const int *n, T *d, T *e, const T *vl,
00360 const T *vu, const int *il, const int *iu,
00361 const T *abstol, int *m, T *w, T *z,
00362 const int *ldz, T *work, int *iwork, int *ifail,
00363 int *info) {
00364 template_lapack_stevx(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
00365 work, iwork, ifail, info);
00366 }
00367
00368 template<class T>
00369 inline static void stevr(const char *jobz, const char *range, const int *n,
00370 T *d, T *e, const T *vl,
00371 const T *vu, const int *il, const int *iu,
00372 const T *abstol, int *m, T *w, T *z,
00373 const int *ldz, int* isuppz, T *work, int* lwork,
00374 int *iwork, int* liwork, int *info) {
00375 template_lapack_stevr(jobz, range, n, d, e, vl, vu, il, iu, abstol,
00376 m, w, z, ldz, isuppz,
00377 work, lwork, iwork, liwork, info);
00378 }
00379
00380
00381 template<class T>
00382 inline static void syev(const char *jobz, const char *uplo, const int *n,
00383 T *a, const int *lda, T *w, T *work,
00384 const int *lwork, int *info) {
00385 template_lapack_syev(jobz, uplo, n, a, lda, w, work, lwork, info);
00386 }
00387
00388
00389
00390 template<class T>
00391 inline static void gemv(const char *ta, const int *m, const int *n,
00392 const T *alpha, const T *A,
00393 const int *lda,
00394 const T *x, const int *incx,
00395 const T *beta, T *y, const int *incy) {
00396 template_blas_gemv(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
00397 }
00398
00399 template<class T>
00400 inline static void symv(const char *uplo, const int *n,
00401 const T *alpha, const T *A,
00402 const int *lda, const T *x,
00403 const int *incx, const T *beta,
00404 T *y, const int *incy) {
00405 template_blas_symv(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
00406 }
00407
00408 template<class T>
00409 inline static void trmv(const char *uplo, const char *trans,
00410 const char *diag, const int *n,
00411 const T *A, const int *lda,
00412 T *x, const int *incx) {
00413 template_blas_trmv(uplo, trans, diag, n, A, lda, x, incx);
00414 }
00415
00416
00417
00418 template<class T>
00419 inline static void scal(const int* n,const T* da, T* dx,
00420 const int* incx) {
00421 template_blas_scal(n, da, dx, incx);
00422 }
00423
00424 template<class T>
00425 inline static T dot(const int* n, const T* dx, const int* incx,
00426 const T* dy, const int* incy) {
00427 return template_blas_dot(n, dx, incx, dy, incy);
00428 }
00429
00430 template<class T>
00431 inline static void axpy(const int* n, const T* da, const T* dx,
00432 const int* incx, T* dy,const int* incy) {
00433 template_blas_axpy(n, da, dx, incx, dy, incy);
00434 }
00435
00436
00437
00438
00439
00440
00441
00442 #ifndef USE_LINALG_TEMPLATES
00443
00444
00445
00446 template<>
00447 inline void gemm<double>(const char *ta,const char *tb,
00448 const int *n, const int *k, const int *l,
00449 const double *alpha,
00450 const double *A,const int *lda,
00451 const double *B, const int *ldb,
00452 const double *beta,
00453 double *C, const int *ldc) {
00454 if (Gblas::timekeeping) {
00455 clock_t start = clock();
00456 dgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
00457 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00458 }
00459 else {
00460 dgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
00461 }
00462 }
00463
00464 template<>
00465 inline void pptrf<double>(const char *uplo,const int *n,
00466 double* ap, int *info) {
00467 if (Gblas::timekeeping) {
00468 clock_t start = clock();
00469 dpptrf_(uplo,n,ap,info);
00470 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00471 }
00472 else {
00473 dpptrf_(uplo,n,ap,info);
00474 }
00475 }
00476
00477 template<>
00478 inline void spgst<double>(const int *itype, const char *uplo,
00479 const int *n,
00480 double* ap,const double *bp,int *info) {
00481 if (Gblas::timekeeping) {
00482 clock_t start = clock();
00483 dspgst_(itype,uplo,n,ap,bp,info);
00484 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00485 }
00486 else {
00487 dspgst_(itype,uplo,n,ap,bp,info);
00488 }
00489 }
00490
00491 template<>
00492 inline void tptri<double>(const char *uplo,const char *diag,const int *n,
00493 double* ap,int *info) {
00494 if (Gblas::timekeeping) {
00495 clock_t start = clock();
00496 dtptri_(uplo,diag,n,ap,info);
00497 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00498 }
00499 else {
00500 dtptri_(uplo,diag,n,ap,info);
00501 }
00502 }
00503
00504 template<>
00505 inline void trmm<double>(const char *side,const char *uplo,
00506 const char *transa,
00507 const char *diag,const int *m,const int *n,
00508 const double *alpha,
00509 const double *A,const int *lda,
00510 double *B,const int *ldb) {
00511 if (Gblas::timekeeping) {
00512 clock_t start = clock();
00513 dtrmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
00514 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00515 }
00516 else {
00517 dtrmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
00518 }
00519 }
00520
00521 template<>
00522 inline void sygv<double>(const int *itype,const char *jobz,
00523 const char *uplo,const int *n,
00524 double *A,const int *lda,
00525 double *B,const int *ldb,
00526 double* w,double* work,
00527 const int *lwork,int *info) {
00528 if (Gblas::timekeeping) {
00529 clock_t start = clock();
00530 dsygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
00531 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00532 }
00533 else {
00534 dsygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
00535 }
00536 }
00537
00538 template<>
00539 inline void ggev<double>(const char *jobbl, const char *jobvr,
00540 const int *n, double *A, const int *lda,
00541 double *B, const int *ldb, double *alphar,
00542 double *alphai, double *beta, double *vl,
00543 const int *ldvl, double *vr, const int *ldvr,
00544 double *work, const int *lwork, int *info) {
00545 if (Gblas::timekeeping) {
00546 clock_t start = clock();
00547 dggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
00548 ldvl, vr, ldvr, work, lwork, info);
00549 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00550 }
00551 else {
00552 dggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
00553 ldvl, vr, ldvr, work, lwork, info);
00554 }
00555 }
00556
00557
00558 template<>
00559 inline void potrf<double>(const char *uplo, const int *n, double *A,
00560 const int *lda, int *info) {
00561 if (Gblas::timekeeping) {
00562 clock_t start = clock();
00563 dpotrf_(uplo, n, A, lda, info);
00564 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00565 }
00566 else {
00567 dpotrf_(uplo, n, A, lda, info);
00568 }
00569 }
00570
00571 template<>
00572 inline void trtri<double>(const char *uplo,const char *diag,const int *n,
00573 double *A, const int *lda, int *info) {
00574 if (Gblas::timekeeping) {
00575 clock_t start = clock();
00576 dtrtri_(uplo, diag, n, A, lda, info);
00577 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00578 }
00579 else {
00580 dtrtri_(uplo, diag, n, A, lda, info);
00581 }
00582 }
00583
00584 template<>
00585 inline void syrk<double>(const char *uplo, const char *trans,
00586 const int *n, const int *k, const double *alpha,
00587 const double *A, const int *lda,
00588 const double *beta, double *C, const int *ldc) {
00589 if (Gblas::timekeeping) {
00590 clock_t start = clock();
00591 dsyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
00592 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00593 }
00594 else {
00595 dsyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
00596 }
00597 }
00598
00599 template<>
00600 inline void symm<double>(const char *side,const char *uplo,
00601 const int *m,const int *n, const double *alpha,
00602 const double *A,const int *lda,
00603 const double *B,const int *ldb,
00604 const double* beta,
00605 double *C,const int *ldc) {
00606 if (Gblas::timekeeping) {
00607 clock_t start = clock();
00608 dsymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
00609 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00610 }
00611 else {
00612 dsymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
00613 }
00614 }
00615
00616 template<>
00617 inline void pocon<double>(const char *uplo, const int *n,
00618 const double *A, const int *lda,
00619 const double *anorm, double *rcond,
00620 double *work, int *iwork, int *info) {
00621 if (Gblas::timekeeping) {
00622 clock_t start = clock();
00623 dpocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
00624 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00625 }
00626 else {
00627 dpocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
00628 }
00629 }
00630
00631 template<>
00632 inline void stevx<double>(const char *jobz, const char *range,
00633 const int *n, double *d, double *e,
00634 const double *vl,
00635 const double *vu, const int *il, const int *iu,
00636 const double *abstol, int *m, double *w,
00637 double *z,
00638 const int *ldz, double *work, int *iwork,
00639 int *ifail, int *info) {
00640 if (Gblas::timekeeping) {
00641 clock_t start = clock();
00642 dstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
00643 work, iwork, ifail, info);
00644 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00645 }
00646 else {
00647 dstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
00648 work, iwork, ifail, info);
00649 }
00650 }
00651
00652 template<>
00653 inline void stevr<double>(const char *jobz, const char *range,
00654 const int *n, double *d, double *e,
00655 const double *vl, const double *vu,
00656 const int *il, const int *iu,
00657 const double *abstol,
00658 int *m, double *w,
00659 double *z, const int *ldz, int* isuppz,
00660 double *work, int* lwork,
00661 int *iwork, int* liwork, int *info) {
00662 if (Gblas::timekeeping) {
00663 clock_t start = clock();
00664 dstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
00665 m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
00666 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00667 }
00668 else {
00669 dstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
00670 m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
00671 }
00672 }
00673
00674
00675
00676 template<>
00677 inline void syev<double>(const char *jobz, const char *uplo, const int *n,
00678 double *a, const int *lda, double *w,
00679 double *work, const int *lwork, int *info) {
00680 if (Gblas::timekeeping) {
00681 clock_t start = clock();
00682 dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
00683 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00684 }
00685 else {
00686 dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
00687 }
00688 }
00689
00690
00691
00692 template<>
00693 inline void gemv<double>(const char *ta, const int *m, const int *n,
00694 const double *alpha, const double *A,
00695 const int *lda,
00696 const double *x, const int *incx,
00697 const double *beta, double *y, const int *incy) {
00698 if (Gblas::timekeeping) {
00699 clock_t start = clock();
00700 dgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
00701 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00702 }
00703 else {
00704 dgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
00705 }
00706 }
00707
00708 template<>
00709 inline void symv<double>(const char *uplo, const int *n,
00710 const double *alpha, const double *A,
00711 const int *lda, const double *x,
00712 const int *incx, const double *beta,
00713 double *y, const int *incy) {
00714 if (Gblas::timekeeping) {
00715 clock_t start = clock();
00716 dsymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
00717 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00718 }
00719 else {
00720 dsymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
00721 }
00722 }
00723
00724 template<>
00725 inline void trmv<double>(const char *uplo, const char *trans,
00726 const char *diag, const int *n,
00727 const double *A, const int *lda,
00728 double *x, const int *incx) {
00729 if (Gblas::timekeeping) {
00730 clock_t start = clock();
00731 dtrmv_(uplo, trans, diag, n, A, lda, x, incx);
00732 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00733 }
00734 else {
00735 dtrmv_(uplo, trans, diag, n, A, lda, x, incx);
00736 }
00737 }
00738
00739
00740
00741 template<>
00742 inline void scal<double>(const int* n,const double* da, double* dx,
00743 const int* incx) {
00744 if (Gblas::timekeeping) {
00745 clock_t start = clock();
00746 dscal_(n, da, dx, incx);
00747 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00748 }
00749 else {
00750 dscal_(n, da, dx, incx);
00751 }
00752 }
00753
00754 template<>
00755 inline double dot<double>(const int* n, const double* dx, const int* incx,
00756 const double* dy, const int* incy) {
00757 double tmp = 0;
00758 if (Gblas::timekeeping) {
00759 clock_t start = clock();
00760 tmp = ddot_(n, dx, incx, dy, incy);
00761 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00762 }
00763 else {
00764 tmp = ddot_(n, dx, incx, dy, incy);
00765 }
00766 return tmp;
00767 }
00768
00769 template<>
00770 inline void axpy<double>(const int* n, const double* da, const double* dx,
00771 const int* incx, double* dy,const int* incy) {
00772 if (Gblas::timekeeping) {
00773 clock_t start = clock();
00774 daxpy_(n, da, dx, incx, dy, incy);
00775 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00776 }
00777 else {
00778 daxpy_(n, da, dx, incx, dy, incy);
00779 }
00780 }
00781
00782
00783
00784 template<>
00785 inline void gemm<float>(const char *ta,const char *tb,
00786 const int *n, const int *k, const int *l,
00787 const float *alpha,
00788 const float *A,const int *lda,
00789 const float *B, const int *ldb,
00790 const float *beta,
00791 float *C, const int *ldc) {
00792 if (Gblas::timekeeping) {
00793 clock_t start = clock();
00794 sgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
00795 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00796 }
00797 else {
00798 sgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
00799 }
00800 }
00801
00802 template<>
00803 inline void pptrf<float>(const char *uplo,const int *n,
00804 float* ap, int *info) {
00805 if (Gblas::timekeeping) {
00806 clock_t start = clock();
00807 spptrf_(uplo,n,ap,info);
00808 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00809 }
00810 else {
00811 spptrf_(uplo,n,ap,info);
00812 }
00813 }
00814
00815 template<>
00816 inline void spgst<float>(const int *itype, const char *uplo,
00817 const int *n,
00818 float* ap,const float *bp,int *info) {
00819 if (Gblas::timekeeping) {
00820 clock_t start = clock();
00821 sspgst_(itype,uplo,n,ap,bp,info);
00822 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00823 }
00824 else {
00825 sspgst_(itype,uplo,n,ap,bp,info);
00826 }
00827 }
00828
00829 template<>
00830 inline void tptri<float>(const char *uplo,const char *diag,
00831 const int *n,
00832 float* ap,int *info) {
00833 if (Gblas::timekeeping) {
00834 clock_t start = clock();
00835 stptri_(uplo,diag,n,ap,info);
00836 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00837 }
00838 else {
00839 stptri_(uplo,diag,n,ap,info);
00840 }
00841 }
00842
00843 template<>
00844 inline void trmm<float>(const char *side,const char *uplo,
00845 const char *transa,
00846 const char *diag,const int *m,const int *n,
00847 const float *alpha,
00848 const float *A,const int *lda,
00849 float *B,const int *ldb) {
00850 if (Gblas::timekeeping) {
00851 clock_t start = clock();
00852 strmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
00853 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00854 }
00855 else {
00856 strmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
00857 }
00858 }
00859
00860 template<>
00861 inline void sygv<float>(const int *itype,const char *jobz,
00862 const char *uplo,const int *n,
00863 float *A,const int *lda,
00864 float *B,const int *ldb,
00865 float* w,float* work,
00866 const int *lwork,int *info) {
00867 if (Gblas::timekeeping) {
00868 clock_t start = clock();
00869 ssygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
00870 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00871 }
00872 else {
00873 ssygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
00874 }
00875 }
00876
00877 template<>
00878 inline void ggev<float>(const char *jobbl, const char *jobvr,
00879 const int *n, float *A, const int *lda,
00880 float *B, const int *ldb, float *alphar,
00881 float *alphai, float *beta, float *vl,
00882 const int *ldvl, float *vr, const int *ldvr,
00883 float *work, const int *lwork, int *info) {
00884 if (Gblas::timekeeping) {
00885 clock_t start = clock();
00886 sggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
00887 ldvl, vr, ldvr, work, lwork, info);
00888 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00889 }
00890 else {
00891 sggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
00892 ldvl, vr, ldvr, work, lwork, info);
00893 }
00894 }
00895
00896
00897 template<>
00898 inline void potrf<float>(const char *uplo, const int *n, float *A,
00899 const int *lda, int *info) {
00900 if (Gblas::timekeeping) {
00901 clock_t start = clock();
00902 spotrf_(uplo, n, A, lda, info);
00903 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00904 }
00905 else {
00906 spotrf_(uplo, n, A, lda, info);
00907 }
00908 }
00909
00910 template<>
00911 inline void trtri<float>(const char *uplo,const char *diag,const int *n,
00912 float *A, const int *lda, int *info) {
00913 if (Gblas::timekeeping) {
00914 clock_t start = clock();
00915 strtri_(uplo, diag, n, A, lda, info);
00916 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00917 }
00918 else {
00919 strtri_(uplo, diag, n, A, lda, info);
00920 }
00921 }
00922
00923 template<>
00924 inline void syrk<float>(const char *uplo, const char *trans,
00925 const int *n, const int *k, const float *alpha,
00926 const float *A, const int *lda,
00927 const float *beta, float *C, const int *ldc) {
00928 if (Gblas::timekeeping) {
00929 clock_t start = clock();
00930 ssyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
00931 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00932 }
00933 else {
00934 ssyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
00935 }
00936 }
00937
00938 template<>
00939 inline void symm<float>(const char *side,const char *uplo,
00940 const int *m,const int *n, const float *alpha,
00941 const float *A,const int *lda,
00942 const float *B,const int *ldb,
00943 const float* beta,
00944 float *C,const int *ldc) {
00945 if (Gblas::timekeeping) {
00946 clock_t start = clock();
00947 ssymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
00948 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00949 }
00950 else {
00951 ssymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
00952 }
00953 }
00954
00955 template<>
00956 inline void pocon<float>(const char *uplo, const int *n,
00957 const float *A, const int *lda,
00958 const float *anorm, float *rcond,
00959 float *work, int *iwork, int *info) {
00960 if (Gblas::timekeeping) {
00961 clock_t start = clock();
00962 spocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
00963 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00964 }
00965 else {
00966 spocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
00967 }
00968 }
00969
00970 template<>
00971 inline void stevx<float>(const char *jobz, const char *range,
00972 const int *n, float *d, float *e,
00973 const float *vl,
00974 const float *vu, const int *il, const int *iu,
00975 const float *abstol, int *m, float *w,
00976 float *z,
00977 const int *ldz, float *work, int *iwork,
00978 int *ifail, int *info) {
00979 if (Gblas::timekeeping) {
00980 clock_t start = clock();
00981 sstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
00982 work, iwork, ifail, info);
00983 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00984 }
00985 else {
00986 sstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
00987 work, iwork, ifail, info);
00988 }
00989 }
00990
00991 template<>
00992 inline void stevr<float>(const char *jobz, const char *range,
00993 const int *n, float *d, float *e,
00994 const float *vl, const float *vu,
00995 const int *il, const int *iu,
00996 const float *abstol,
00997 int *m, float *w,
00998 float *z, const int *ldz, int* isuppz,
00999 float *work, int* lwork,
01000 int *iwork, int* liwork, int *info) {
01001 if (Gblas::timekeeping) {
01002 clock_t start = clock();
01003 sstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
01004 m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
01005 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01006 }
01007 else {
01008 sstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
01009 m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
01010 }
01011 }
01012
01013 template<>
01014 inline void syev<float>(const char *jobz, const char *uplo, const int *n,
01015 float *a, const int *lda, float *w,
01016 float *work, const int *lwork, int *info) {
01017 if (Gblas::timekeeping) {
01018 clock_t start = clock();
01019 ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
01020 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01021 }
01022 else {
01023 ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
01024 }
01025 }
01026
01027
01028
01029 template<>
01030 inline void gemv<float>(const char *ta, const int *m, const int *n,
01031 const float *alpha, const float *A,
01032 const int *lda,
01033 const float *x, const int *incx,
01034 const float *beta, float *y, const int *incy) {
01035 if (Gblas::timekeeping) {
01036 clock_t start = clock();
01037 sgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
01038 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01039 }
01040 else {
01041 sgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
01042 }
01043 }
01044
01045 template<>
01046 inline void symv<float>(const char *uplo, const int *n,
01047 const float *alpha, const float *A,
01048 const int *lda, const float *x,
01049 const int *incx, const float *beta,
01050 float *y, const int *incy) {
01051 if (Gblas::timekeeping) {
01052 clock_t start = clock();
01053 ssymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
01054 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01055 }
01056 else {
01057 ssymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
01058 }
01059 }
01060
01061 template<>
01062 inline void trmv<float>(const char *uplo, const char *trans,
01063 const char *diag, const int *n,
01064 const float *A, const int *lda,
01065 float *x, const int *incx) {
01066 if (Gblas::timekeeping) {
01067 clock_t start = clock();
01068 strmv_(uplo, trans, diag, n, A, lda, x, incx);
01069 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01070 }
01071 else {
01072 strmv_(uplo, trans, diag, n, A, lda, x, incx);
01073 }
01074 }
01075
01076
01077 template<>
01078 inline void scal<float>(const int* n,const float* da, float* dx,
01079 const int* incx) {
01080 if (Gblas::timekeeping) {
01081 clock_t start = clock();
01082 sscal_(n, da, dx, incx);
01083 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01084 }
01085 else {
01086 sscal_(n, da, dx, incx);
01087 }
01088 }
01089 #if 0
01090
01091
01092 template<>
01093 inline float dot<float>(const int* n, const float* dx, const int* incx,
01094 const float* dy, const int* incy) {
01095 float tmp;
01096 if (Gblas::timekeeping) {
01097 clock_t start = clock();
01098 sdot_(n, dx, incx, dy, incy);
01099 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01100 }
01101 else {
01102 sdot_(n, dx, incx, dy, incy);
01103 }
01104 return tmp;
01105 }
01106 #endif
01107
01108 template<>
01109 inline void axpy<float>(const int* n, const float* da, const float* dx,
01110 const int* incx, float* dy,const int* incy) {
01111 if (Gblas::timekeeping) {
01112 clock_t start = clock();
01113 saxpy_(n, da, dx, incx, dy, incy);
01114 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01115 }
01116 else {
01117 saxpy_(n, da, dx, incx, dy, incy);
01118 }
01119 }
01120
01121
01122 #endif
01123
01124
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134 template<class Treal>
01135 static void fulltopacked(const Treal* full, Treal* packed, const int size){
01136 int pind=0;
01137 for (int col=0;col<size;col++)
01138 {
01139 for(int row=0;row<=col;row++)
01140 {
01141 packed[pind]=full[col*size+row];
01142 pind++;
01143 }
01144 }
01145 }
01146
01147 template<class Treal>
01148 static void packedtofull(const Treal* packed, Treal* full, const int size){
01149 int psize=(size+1)*size/2;
01150 int col=0;
01151 int row=0;
01152 for(int pind=0;pind<psize;pind++)
01153 {
01154 if (col==row)
01155 {
01156 full[col*size+row]=packed[pind];
01157 col++;
01158 row=0;
01159 }
01160 else
01161 {
01162 full[col*size+row]=packed[pind];
01163 full[row*size+col]=packed[pind];
01164 row++;
01165 }
01166 }
01167 }
01168
01169 template<class Treal>
01170 static void tripackedtofull(const Treal* packed,Treal* full,
01171 const int size) {
01172 int psize=(size+1)*size/2;
01173 int col=0;
01174 int row=0;
01175 for(int pind=0;pind<psize;pind++)
01176 {
01177 if (col==row)
01178 {
01179 full[col*size+row]=packed[pind];
01180 col++;
01181 row=0;
01182 }
01183 else
01184 {
01185 full[col*size+row]=packed[pind];
01186 full[row*size+col]=0;
01187 row++;
01188 }
01189 }
01190 }
01191
01192 template<class Treal>
01193 static void trifulltofull(Treal* trifull, const int size) {
01194 for(int col = 0; col < size - 1; col++)
01195 for(int row = col + 1; row < size; row++)
01196 trifull[col * size + row] = 0;
01197 }
01198
01199
01200 }
01201
01202 #endif