00001 /* Ergo, version 3.7, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, 00004 * and Anastasia Kruchinina. 00005 * 00006 * This program is free software: you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation, either version 3 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00018 * 00019 * Primary academic reference: 00020 * Ergo: An open-source program for linear-scaling electronic structure 00021 * calculations, 00022 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia 00023 * Kruchinina, 00024 * SoftwareX 7, 107 (2018), 00025 * <http://dx.doi.org/10.1016/j.softx.2018.03.005> 00026 * 00027 * For further information about Ergo, see <http://www.ergoscf.org>. 00028 */ 00029 00030 /* This file belongs to the template_lapack part of the Ergo source 00031 * code. The source files in the template_lapack directory are modified 00032 * versions of files originally distributed as CLAPACK, see the 00033 * Copyright/license notice in the file template_lapack/COPYING. 00034 */ 00035 00036 00037 #ifndef TEMPLATE_LAPACK_SYTRD_HEADER 00038 #define TEMPLATE_LAPACK_SYTRD_HEADER 00039 00040 #include "template_lapack_common.h" 00041 00042 template<class Treal> 00043 int template_lapack_sytrd(const char *uplo, const integer *n, Treal *a, const integer * 00044 lda, Treal *d__, Treal *e, Treal *tau, Treal * 00045 work, const integer *lwork, integer *info) 00046 { 00047 /* -- LAPACK routine (version 3.0) -- 00048 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00049 Courant Institute, Argonne National Lab, and Rice University 00050 June 30, 1999 00051 00052 00053 Purpose 00054 ======= 00055 00056 DSYTRD reduces a real symmetric matrix A to real symmetric 00057 tridiagonal form T by an orthogonal similarity transformation: 00058 Q**T * A * Q = T. 00059 00060 Arguments 00061 ========= 00062 00063 UPLO (input) CHARACTER*1 00064 = 'U': Upper triangle of A is stored; 00065 = 'L': Lower triangle of A is stored. 00066 00067 N (input) INTEGER 00068 The order of the matrix A. N >= 0. 00069 00070 A (input/output) DOUBLE PRECISION array, dimension (LDA,N) 00071 On entry, the symmetric matrix A. If UPLO = 'U', the leading 00072 N-by-N upper triangular part of A contains the upper 00073 triangular part of the matrix A, and the strictly lower 00074 triangular part of A is not referenced. If UPLO = 'L', the 00075 leading N-by-N lower triangular part of A contains the lower 00076 triangular part of the matrix A, and the strictly upper 00077 triangular part of A is not referenced. 00078 On exit, if UPLO = 'U', the diagonal and first superdiagonal 00079 of A are overwritten by the corresponding elements of the 00080 tridiagonal matrix T, and the elements above the first 00081 superdiagonal, with the array TAU, represent the orthogonal 00082 matrix Q as a product of elementary reflectors; if UPLO 00083 = 'L', the diagonal and first subdiagonal of A are over- 00084 written by the corresponding elements of the tridiagonal 00085 matrix T, and the elements below the first subdiagonal, with 00086 the array TAU, represent the orthogonal matrix Q as a product 00087 of elementary reflectors. See Further Details. 00088 00089 LDA (input) INTEGER 00090 The leading dimension of the array A. LDA >= max(1,N). 00091 00092 D (output) DOUBLE PRECISION array, dimension (N) 00093 The diagonal elements of the tridiagonal matrix T: 00094 D(i) = A(i,i). 00095 00096 E (output) DOUBLE PRECISION array, dimension (N-1) 00097 The off-diagonal elements of the tridiagonal matrix T: 00098 E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. 00099 00100 TAU (output) DOUBLE PRECISION array, dimension (N-1) 00101 The scalar factors of the elementary reflectors (see Further 00102 Details). 00103 00104 WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) 00105 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 00106 00107 LWORK (input) INTEGER 00108 The dimension of the array WORK. LWORK >= 1. 00109 For optimum performance LWORK >= N*NB, where NB is the 00110 optimal blocksize. 00111 00112 If LWORK = -1, then a workspace query is assumed; the routine 00113 only calculates the optimal size of the WORK array, returns 00114 this value as the first entry of the WORK array, and no error 00115 message related to LWORK is issued by XERBLA. 00116 00117 INFO (output) INTEGER 00118 = 0: successful exit 00119 < 0: if INFO = -i, the i-th argument had an illegal value 00120 00121 Further Details 00122 =============== 00123 00124 If UPLO = 'U', the matrix Q is represented as a product of elementary 00125 reflectors 00126 00127 Q = H(n-1) . . . H(2) H(1). 00128 00129 Each H(i) has the form 00130 00131 H(i) = I - tau * v * v' 00132 00133 where tau is a real scalar, and v is a real vector with 00134 v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in 00135 A(1:i-1,i+1), and tau in TAU(i). 00136 00137 If UPLO = 'L', the matrix Q is represented as a product of elementary 00138 reflectors 00139 00140 Q = H(1) H(2) . . . H(n-1). 00141 00142 Each H(i) has the form 00143 00144 H(i) = I - tau * v * v' 00145 00146 where tau is a real scalar, and v is a real vector with 00147 v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), 00148 and tau in TAU(i). 00149 00150 The contents of A on exit are illustrated by the following examples 00151 with n = 5: 00152 00153 if UPLO = 'U': if UPLO = 'L': 00154 00155 ( d e v2 v3 v4 ) ( d ) 00156 ( d e v3 v4 ) ( e d ) 00157 ( d e v4 ) ( v1 e d ) 00158 ( d e ) ( v1 v2 e d ) 00159 ( d ) ( v1 v2 v3 e d ) 00160 00161 where d and e denote diagonal and off-diagonal elements of T, and vi 00162 denotes an element of the vector defining H(i). 00163 00164 ===================================================================== 00165 00166 00167 Test the input parameters 00168 00169 Parameter adjustments */ 00170 /* Table of constant values */ 00171 integer c__1 = 1; 00172 integer c_n1 = -1; 00173 integer c__3 = 3; 00174 integer c__2 = 2; 00175 Treal c_b22 = -1.; 00176 Treal c_b23 = 1.; 00177 00178 /* System generated locals */ 00179 integer a_dim1, a_offset, i__1, i__2, i__3; 00180 /* Local variables */ 00181 integer i__, j; 00182 integer nbmin, iinfo; 00183 logical upper; 00184 integer nb, kk, nx; 00185 integer ldwork, lwkopt; 00186 logical lquery; 00187 integer iws; 00188 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00189 00190 00191 a_dim1 = *lda; 00192 a_offset = 1 + a_dim1 * 1; 00193 a -= a_offset; 00194 --d__; 00195 --e; 00196 --tau; 00197 --work; 00198 00199 /* Initialization added by Elias to get rid of compiler warnings. */ 00200 lwkopt = 0; 00201 /* Function Body */ 00202 *info = 0; 00203 upper = template_blas_lsame(uplo, "U"); 00204 lquery = *lwork == -1; 00205 if (! upper && ! template_blas_lsame(uplo, "L")) { 00206 *info = -1; 00207 } else if (*n < 0) { 00208 *info = -2; 00209 } else if (*lda < maxMACRO(1,*n)) { 00210 *info = -4; 00211 } else if (*lwork < 1 && ! lquery) { 00212 *info = -9; 00213 } 00214 00215 if (*info == 0) { 00216 00217 /* Determine the block size. */ 00218 00219 nb = template_lapack_ilaenv(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, 00220 (ftnlen)1); 00221 lwkopt = *n * nb; 00222 work[1] = (Treal) lwkopt; 00223 } 00224 00225 if (*info != 0) { 00226 i__1 = -(*info); 00227 template_blas_erbla("SYTRD ", &i__1); 00228 return 0; 00229 } else if (lquery) { 00230 return 0; 00231 } 00232 00233 /* Quick return if possible */ 00234 00235 if (*n == 0) { 00236 work[1] = 1.; 00237 return 0; 00238 } 00239 00240 nx = *n; 00241 iws = 1; 00242 if (nb > 1 && nb < *n) { 00243 00244 /* Determine when to cross over from blocked to unblocked code 00245 (last block is always handled by unblocked code). 00246 00247 Computing MAX */ 00248 i__1 = nb, i__2 = template_lapack_ilaenv(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, & 00249 c_n1, (ftnlen)6, (ftnlen)1); 00250 nx = maxMACRO(i__1,i__2); 00251 if (nx < *n) { 00252 00253 /* Determine if workspace is large enough for blocked code. */ 00254 00255 ldwork = *n; 00256 iws = ldwork * nb; 00257 if (*lwork < iws) { 00258 00259 /* Not enough workspace to use optimal NB: determine the 00260 minimum value of NB, and reduce NB or force use of 00261 unblocked code by setting NX = N. 00262 00263 Computing MAX */ 00264 i__1 = *lwork / ldwork; 00265 nb = maxMACRO(i__1,1); 00266 nbmin = template_lapack_ilaenv(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, 00267 (ftnlen)6, (ftnlen)1); 00268 if (nb < nbmin) { 00269 nx = *n; 00270 } 00271 } 00272 } else { 00273 nx = *n; 00274 } 00275 } else { 00276 nb = 1; 00277 } 00278 00279 if (upper) { 00280 00281 /* Reduce the upper triangle of A. 00282 Columns 1:kk are handled by the unblocked method. */ 00283 00284 kk = *n - (*n - nx + nb - 1) / nb * nb; 00285 i__1 = kk + 1; 00286 i__2 = -nb; 00287 for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 00288 i__2) { 00289 00290 /* Reduce columns i:i+nb-1 to tridiagonal form and form the 00291 matrix W which is needed to update the unreduced part of 00292 the matrix */ 00293 00294 i__3 = i__ + nb - 1; 00295 template_lapack_latrd(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & 00296 work[1], &ldwork); 00297 00298 /* Update the unreduced submatrix A(1:i-1,1:i-1), using an 00299 update of the form: A := A - V*W' - W*V' */ 00300 00301 i__3 = i__ - 1; 00302 template_blas_syr2k(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(1, i__), 00303 lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda); 00304 00305 /* Copy superdiagonal elements back into A, and diagonal 00306 elements into D */ 00307 00308 i__3 = i__ + nb - 1; 00309 for (j = i__; j <= i__3; ++j) { 00310 a_ref(j - 1, j) = e[j - 1]; 00311 d__[j] = a_ref(j, j); 00312 /* L10: */ 00313 } 00314 /* L20: */ 00315 } 00316 00317 /* Use unblocked code to reduce the last or only block */ 00318 00319 template_lapack_sytd2(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); 00320 } else { 00321 00322 /* Reduce the lower triangle of A */ 00323 00324 i__2 = *n - nx; 00325 i__1 = nb; 00326 for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { 00327 00328 /* Reduce columns i:i+nb-1 to tridiagonal form and form the 00329 matrix W which is needed to update the unreduced part of 00330 the matrix */ 00331 00332 i__3 = *n - i__ + 1; 00333 template_lapack_latrd(uplo, &i__3, &nb, &a_ref(i__, i__), lda, &e[i__], &tau[ 00334 i__], &work[1], &ldwork); 00335 00336 /* Update the unreduced submatrix A(i+ib:n,i+ib:n), using 00337 an update of the form: A := A - V*W' - W*V' */ 00338 00339 i__3 = *n - i__ - nb + 1; 00340 template_blas_syr2k(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(i__ + nb, 00341 i__), lda, &work[nb + 1], &ldwork, &c_b23, &a_ref(i__ + 00342 nb, i__ + nb), lda); 00343 00344 /* Copy subdiagonal elements back into A, and diagonal 00345 elements into D */ 00346 00347 i__3 = i__ + nb - 1; 00348 for (j = i__; j <= i__3; ++j) { 00349 a_ref(j + 1, j) = e[j]; 00350 d__[j] = a_ref(j, j); 00351 /* L30: */ 00352 } 00353 /* L40: */ 00354 } 00355 00356 /* Use unblocked code to reduce the last or only block */ 00357 00358 i__1 = *n - i__ + 1; 00359 template_lapack_sytd2(uplo, &i__1, &a_ref(i__, i__), lda, &d__[i__], &e[i__], &tau[ 00360 i__], &iinfo); 00361 } 00362 00363 work[1] = (Treal) lwkopt; 00364 return 0; 00365 00366 /* End of DSYTRD */ 00367 00368 } /* dsytrd_ */ 00369 00370 #undef a_ref 00371 00372 00373 #endif