00001 /* Ergo, version 3.7, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, 00004 * and Anastasia Kruchinina. 00005 * 00006 * This program is free software: you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation, either version 3 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00018 * 00019 * Primary academic reference: 00020 * Ergo: An open-source program for linear-scaling electronic structure 00021 * calculations, 00022 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia 00023 * Kruchinina, 00024 * SoftwareX 7, 107 (2018), 00025 * <http://dx.doi.org/10.1016/j.softx.2018.03.005> 00026 * 00027 * For further information about Ergo, see <http://www.ergoscf.org>. 00028 */ 00029 00030 /* This file belongs to the template_lapack part of the Ergo source 00031 * code. The source files in the template_lapack directory are modified 00032 * versions of files originally distributed as CLAPACK, see the 00033 * Copyright/license notice in the file template_lapack/COPYING. 00034 */ 00035 00036 00037 #ifndef TEMPLATE_LAPACK_LARRE_HEADER 00038 #define TEMPLATE_LAPACK_LARRE_HEADER 00039 00040 00041 #include "template_lapack_larrk.h" 00042 #include "template_lapack_lasq2.h" 00043 00044 00045 template<class Treal> 00046 int template_lapack_larre(const char *range, const integer *n, Treal *vl, 00047 Treal *vu, integer *il, integer *iu, Treal *d__, Treal 00048 *e, Treal *e2, Treal *rtol1, Treal *rtol2, Treal * 00049 spltol, integer *nsplit, integer *isplit, integer *m, Treal *w, 00050 Treal *werr, Treal *wgap, integer *iblock, integer *indexw, 00051 Treal *gers, Treal *pivmin, Treal *work, integer * 00052 iwork, integer *info) 00053 { 00054 /* System generated locals */ 00055 integer i__1, i__2; 00056 Treal d__1, d__2, d__3; 00057 00058 00059 /* Local variables */ 00060 integer i__, j; 00061 Treal s1, s2; 00062 integer mb = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning 00063 Treal gl; 00064 integer in, mm; 00065 Treal gu; 00066 integer cnt; 00067 Treal eps, tau, tmp, rtl; 00068 integer cnt1, cnt2; 00069 Treal tmp1, eabs; 00070 integer iend, jblk; 00071 Treal eold; 00072 integer indl; 00073 Treal dmax__, emax; 00074 integer wend = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning 00075 integer idum, indu; 00076 Treal rtol; 00077 integer iseed[4]; 00078 Treal avgap, sigma; 00079 integer iinfo; 00080 logical norep; 00081 integer ibegin; 00082 logical forceb; 00083 integer irange = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning 00084 Treal sgndef; 00085 integer wbegin; 00086 Treal safmin, spdiam; 00087 logical usedqd; 00088 Treal clwdth, isleft; 00089 Treal isrght, bsrtol, dpivot; 00090 00091 00092 /* -- LAPACK auxiliary routine (version 3.2) -- */ 00093 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ 00094 /* November 2006 */ 00095 00096 /* .. Scalar Arguments .. */ 00097 /* .. */ 00098 /* .. Array Arguments .. */ 00099 /* .. */ 00100 00101 /* Purpose */ 00102 /* ======= */ 00103 00104 /* To find the desired eigenvalues of a given real symmetric */ 00105 /* tridiagonal matrix T, DLARRE sets any "small" off-diagonal */ 00106 /* elements to zero, and for each unreduced block T_i, it finds */ 00107 /* (a) a suitable shift at one end of the block's spectrum, */ 00108 /* (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and */ 00109 /* (c) eigenvalues of each L_i D_i L_i^T. */ 00110 /* The representations and eigenvalues found are then used by */ 00111 /* DSTEMR to compute the eigenvectors of T. */ 00112 /* The accuracy varies depending on whether bisection is used to */ 00113 /* find a few eigenvalues or the dqds algorithm (subroutine DLASQ2) to */ 00114 /* conpute all and then discard any unwanted one. */ 00115 /* As an added benefit, DLARRE also outputs the n */ 00116 /* Gerschgorin intervals for the matrices L_i D_i L_i^T. */ 00117 00118 /* Arguments */ 00119 /* ========= */ 00120 00121 /* RANGE (input) CHARACTER */ 00122 /* = 'A': ("All") all eigenvalues will be found. */ 00123 /* = 'V': ("Value") all eigenvalues in the half-open interval */ 00124 /* (VL, VU] will be found. */ 00125 /* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */ 00126 /* entire matrix) will be found. */ 00127 00128 /* N (input) INTEGER */ 00129 /* The order of the matrix. N > 0. */ 00130 00131 /* VL (input/output) DOUBLE PRECISION */ 00132 /* VU (input/output) DOUBLE PRECISION */ 00133 /* If RANGE='V', the lower and upper bounds for the eigenvalues. */ 00134 /* Eigenvalues less than or equal to VL, or greater than VU, */ 00135 /* will not be returned. VL < VU. */ 00136 /* If RANGE='I' or ='A', DLARRE computes bounds on the desired */ 00137 /* part of the spectrum. */ 00138 00139 /* IL (input) INTEGER */ 00140 /* IU (input) INTEGER */ 00141 /* If RANGE='I', the indices (in ascending order) of the */ 00142 /* smallest and largest eigenvalues to be returned. */ 00143 /* 1 <= IL <= IU <= N. */ 00144 00145 /* D (input/output) DOUBLE PRECISION array, dimension (N) */ 00146 /* On entry, the N diagonal elements of the tridiagonal */ 00147 /* matrix T. */ 00148 /* On exit, the N diagonal elements of the diagonal */ 00149 /* matrices D_i. */ 00150 00151 /* E (input/output) DOUBLE PRECISION array, dimension (N) */ 00152 /* On entry, the first (N-1) entries contain the subdiagonal */ 00153 /* elements of the tridiagonal matrix T; E(N) need not be set. */ 00154 /* On exit, E contains the subdiagonal elements of the unit */ 00155 /* bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), */ 00156 /* 1 <= I <= NSPLIT, contain the base points sigma_i on output. */ 00157 00158 /* E2 (input/output) DOUBLE PRECISION array, dimension (N) */ 00159 /* On entry, the first (N-1) entries contain the SQUARES of the */ 00160 /* subdiagonal elements of the tridiagonal matrix T; */ 00161 /* E2(N) need not be set. */ 00162 /* On exit, the entries E2( ISPLIT( I ) ), */ 00163 /* 1 <= I <= NSPLIT, have been set to zero */ 00164 00165 /* RTOL1 (input) DOUBLE PRECISION */ 00166 /* RTOL2 (input) DOUBLE PRECISION */ 00167 /* Parameters for bisection. */ 00168 /* An interval [LEFT,RIGHT] has converged if */ 00169 /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ 00170 00171 /* SPLTOL (input) DOUBLE PRECISION */ 00172 /* The threshold for splitting. */ 00173 00174 /* NSPLIT (output) INTEGER */ 00175 /* The number of blocks T splits into. 1 <= NSPLIT <= N. */ 00176 00177 /* ISPLIT (output) INTEGER array, dimension (N) */ 00178 /* The splitting points, at which T breaks up into blocks. */ 00179 /* The first block consists of rows/columns 1 to ISPLIT(1), */ 00180 /* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ 00181 /* etc., and the NSPLIT-th consists of rows/columns */ 00182 /* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ 00183 00184 /* M (output) INTEGER */ 00185 /* The total number of eigenvalues (of all L_i D_i L_i^T) */ 00186 /* found. */ 00187 00188 /* W (output) DOUBLE PRECISION array, dimension (N) */ 00189 /* The first M elements contain the eigenvalues. The */ 00190 /* eigenvalues of each of the blocks, L_i D_i L_i^T, are */ 00191 /* sorted in ascending order ( DLARRE may use the */ 00192 /* remaining N-M elements as workspace). */ 00193 00194 /* WERR (output) DOUBLE PRECISION array, dimension (N) */ 00195 /* The error bound on the corresponding eigenvalue in W. */ 00196 00197 /* WGAP (output) DOUBLE PRECISION array, dimension (N) */ 00198 /* The separation from the right neighbor eigenvalue in W. */ 00199 /* The gap is only with respect to the eigenvalues of the same block */ 00200 /* as each block has its own representation tree. */ 00201 /* Exception: at the right end of a block we store the left gap */ 00202 00203 /* IBLOCK (output) INTEGER array, dimension (N) */ 00204 /* The indices of the blocks (submatrices) associated with the */ 00205 /* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */ 00206 /* W(i) belongs to the first block from the top, =2 if W(i) */ 00207 /* belongs to the second block, etc. */ 00208 00209 /* INDEXW (output) INTEGER array, dimension (N) */ 00210 /* The indices of the eigenvalues within each block (submatrix); */ 00211 /* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */ 00212 /* i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 */ 00213 00214 /* GERS (output) DOUBLE PRECISION array, dimension (2*N) */ 00215 /* The N Gerschgorin intervals (the i-th Gerschgorin interval */ 00216 /* is (GERS(2*i-1), GERS(2*i)). */ 00217 00218 /* PIVMIN (output) DOUBLE PRECISION */ 00219 /* The minimum pivot in the Sturm sequence for T. */ 00220 00221 /* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) */ 00222 /* Workspace. */ 00223 00224 /* IWORK (workspace) INTEGER array, dimension (5*N) */ 00225 /* Workspace. */ 00226 00227 /* INFO (output) INTEGER */ 00228 /* = 0: successful exit */ 00229 /* > 0: A problem occured in DLARRE. */ 00230 /* < 0: One of the called subroutines signaled an internal problem. */ 00231 /* Needs inspection of the corresponding parameter IINFO */ 00232 /* for further information. */ 00233 00234 /* =-1: Problem in DLARRD. */ 00235 /* = 2: No base representation could be found in MAXTRY iterations. */ 00236 /* Increasing MAXTRY and recompilation might be a remedy. */ 00237 /* =-3: Problem in DLARRB when computing the refined root */ 00238 /* representation for DLASQ2. */ 00239 /* =-4: Problem in DLARRB when preforming bisection on the */ 00240 /* desired part of the spectrum. */ 00241 /* =-5: Problem in DLASQ2. */ 00242 /* =-6: Problem in DLASQ2. */ 00243 00244 /* Further Details */ 00245 /* The base representations are required to suffer very little */ 00246 /* element growth and consequently define all their eigenvalues to */ 00247 /* high relative accuracy. */ 00248 /* =============== */ 00249 00250 /* Based on contributions by */ 00251 /* Beresford Parlett, University of California, Berkeley, USA */ 00252 /* Jim Demmel, University of California, Berkeley, USA */ 00253 /* Inderjit Dhillon, University of Texas, Austin, USA */ 00254 /* Osni Marques, LBNL/NERSC, USA */ 00255 /* Christof Voemel, University of California, Berkeley, USA */ 00256 00257 /* ===================================================================== */ 00258 00259 /* .. Parameters .. */ 00260 /* .. */ 00261 /* .. Local Scalars .. */ 00262 /* .. */ 00263 /* .. Local Arrays .. */ 00264 /* .. */ 00265 /* .. External Functions .. */ 00266 /* .. */ 00267 /* .. External Subroutines .. */ 00268 /* .. */ 00269 /* .. Intrinsic Functions .. */ 00270 /* .. */ 00271 /* .. Executable Statements .. */ 00272 00273 /* Parameter adjustments */ 00274 00275 00276 /* Table of constant values */ 00277 00278 integer c__1 = 1; 00279 integer c__2 = 2; 00280 00281 00282 --iwork; 00283 --work; 00284 --gers; 00285 --indexw; 00286 --iblock; 00287 --wgap; 00288 --werr; 00289 --w; 00290 --isplit; 00291 --e2; 00292 --e; 00293 --d__; 00294 00295 /* Initialization added by Elias to get rid of compiler warnings. */ 00296 mm = 0; 00297 /* Function Body */ 00298 *info = 0; 00299 00300 /* Decode RANGE */ 00301 00302 if (template_blas_lsame(range, "A")) { 00303 irange = 1; 00304 } else if (template_blas_lsame(range, "V")) { 00305 irange = 3; 00306 } else if (template_blas_lsame(range, "I")) { 00307 irange = 2; 00308 } 00309 *m = 0; 00310 /* Get machine constants */ 00311 safmin = template_lapack_lamch("S",(Treal)0); 00312 eps = template_lapack_lamch("P",(Treal)0); 00313 /* Set parameters */ 00314 rtl = template_blas_sqrt(eps); 00315 bsrtol = template_blas_sqrt(eps); 00316 /* Treat case of 1x1 matrix for quick return */ 00317 if (*n == 1) { 00318 if (irange == 1 || ( irange == 3 && d__[1] > *vl && d__[1] <= *vu ) || 00319 ( irange == 2 && *il == 1 && *iu == 1 ) ) { 00320 *m = 1; 00321 w[1] = d__[1]; 00322 /* The computation error of the eigenvalue is zero */ 00323 werr[1] = 0.; 00324 wgap[1] = 0.; 00325 iblock[1] = 1; 00326 indexw[1] = 1; 00327 gers[1] = d__[1]; 00328 gers[2] = d__[1]; 00329 } 00330 /* store the shift for the initial RRR, which is zero in this case */ 00331 e[1] = 0.; 00332 return 0; 00333 } 00334 /* General case: tridiagonal matrix of order > 1 */ 00335 00336 /* Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. */ 00337 /* Compute maximum off-diagonal entry and pivmin. */ 00338 gl = d__[1]; 00339 gu = d__[1]; 00340 eold = 0.; 00341 emax = 0.; 00342 e[*n] = 0.; 00343 i__1 = *n; 00344 for (i__ = 1; i__ <= i__1; ++i__) { 00345 werr[i__] = 0.; 00346 wgap[i__] = 0.; 00347 eabs = (d__1 = e[i__], absMACRO(d__1)); 00348 if (eabs >= emax) { 00349 emax = eabs; 00350 } 00351 tmp1 = eabs + eold; 00352 gers[(i__ << 1) - 1] = d__[i__] - tmp1; 00353 /* Computing MIN */ 00354 d__1 = gl, d__2 = gers[(i__ << 1) - 1]; 00355 gl = minMACRO(d__1,d__2); 00356 gers[i__ * 2] = d__[i__] + tmp1; 00357 /* Computing MAX */ 00358 d__1 = gu, d__2 = gers[i__ * 2]; 00359 gu = maxMACRO(d__1,d__2); 00360 eold = eabs; 00361 /* L5: */ 00362 } 00363 /* The minimum pivot allowed in the Sturm sequence for T */ 00364 /* Computing MAX */ 00365 /* Computing 2nd power */ 00366 d__3 = emax; 00367 d__1 = 1., d__2 = d__3 * d__3; 00368 *pivmin = safmin * maxMACRO(d__1,d__2); 00369 /* Compute spectral diameter. The Gerschgorin bounds give an */ 00370 /* estimate that is wrong by at most a factor of SQRT(2) */ 00371 spdiam = gu - gl; 00372 /* Compute splitting points */ 00373 template_lapack_larra(n, &d__[1], &e[1], &e2[1], spltol, &spdiam, nsplit, &isplit[1], & 00374 iinfo); 00375 /* Can force use of bisection instead of faster DQDS. */ 00376 /* Option left in the code for future multisection work. */ 00377 forceb = FALSE_; 00378 /* Initialize USEDQD, DQDS should be used for ALLRNG unless someone */ 00379 /* explicitly wants bisection. */ 00380 usedqd = irange == 1 && ! forceb; 00381 if (irange == 1 && ! forceb) { 00382 /* Set interval [VL,VU] that contains all eigenvalues */ 00383 *vl = gl; 00384 *vu = gu; 00385 } else { 00386 /* We call DLARRD to find crude approximations to the eigenvalues */ 00387 /* in the desired range. In case IRANGE = INDRNG, we also obtain the */ 00388 /* interval (VL,VU] that contains all the wanted eigenvalues. */ 00389 /* An interval [LEFT,RIGHT] has converged if */ 00390 /* RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) */ 00391 /* DLARRD needs a WORK of size 4*N, IWORK of size 3*N */ 00392 template_lapack_larrd(range, "B", n, vl, vu, il, iu, &gers[1], &bsrtol, &d__[1], &e[ 00393 1], &e2[1], pivmin, nsplit, &isplit[1], &mm, &w[1], &werr[1], 00394 vl, vu, &iblock[1], &indexw[1], &work[1], &iwork[1], &iinfo); 00395 if (iinfo != 0) { 00396 *info = -1; 00397 return 0; 00398 } 00399 /* Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 */ 00400 i__1 = *n; 00401 for (i__ = mm + 1; i__ <= i__1; ++i__) { 00402 w[i__] = 0.; 00403 werr[i__] = 0.; 00404 iblock[i__] = 0; 00405 indexw[i__] = 0; 00406 /* L14: */ 00407 } 00408 } 00409 /* ** */ 00410 /* Loop over unreduced blocks */ 00411 ibegin = 1; 00412 wbegin = 1; 00413 i__1 = *nsplit; 00414 for (jblk = 1; jblk <= i__1; ++jblk) { 00415 iend = isplit[jblk]; 00416 in = iend - ibegin + 1; 00417 /* 1 X 1 block */ 00418 if (in == 1) { 00419 if (irange == 1 || ( irange == 3 && d__[ibegin] > *vl && d__[ibegin] 00420 <= *vu ) || ( irange == 2 && iblock[wbegin] == jblk ) ) { 00421 ++(*m); 00422 w[*m] = d__[ibegin]; 00423 werr[*m] = 0.; 00424 /* The gap for a single block doesn't matter for the later */ 00425 /* algorithm and is assigned an arbitrary large value */ 00426 wgap[*m] = 0.; 00427 iblock[*m] = jblk; 00428 indexw[*m] = 1; 00429 ++wbegin; 00430 } 00431 /* E( IEND ) holds the shift for the initial RRR */ 00432 e[iend] = 0.; 00433 ibegin = iend + 1; 00434 goto L170; 00435 } 00436 00437 /* Blocks of size larger than 1x1 */ 00438 00439 /* E( IEND ) will hold the shift for the initial RRR, for now set it =0 */ 00440 e[iend] = 0.; 00441 00442 /* Find local outer bounds GL,GU for the block */ 00443 gl = d__[ibegin]; 00444 gu = d__[ibegin]; 00445 i__2 = iend; 00446 for (i__ = ibegin; i__ <= i__2; ++i__) { 00447 /* Computing MIN */ 00448 d__1 = gers[(i__ << 1) - 1]; 00449 gl = minMACRO(d__1,gl); 00450 /* Computing MAX */ 00451 d__1 = gers[i__ * 2]; 00452 gu = maxMACRO(d__1,gu); 00453 /* L15: */ 00454 } 00455 spdiam = gu - gl; 00456 if (! (irange == 1 && ! forceb)) { 00457 /* Count the number of eigenvalues in the current block. */ 00458 mb = 0; 00459 i__2 = mm; 00460 for (i__ = wbegin; i__ <= i__2; ++i__) { 00461 if (iblock[i__] == jblk) { 00462 ++mb; 00463 } else { 00464 goto L21; 00465 } 00466 /* L20: */ 00467 } 00468 L21: 00469 if (mb == 0) { 00470 /* No eigenvalue in the current block lies in the desired range */ 00471 /* E( IEND ) holds the shift for the initial RRR */ 00472 e[iend] = 0.; 00473 ibegin = iend + 1; 00474 goto L170; 00475 } else { 00476 /* Decide whether dqds or bisection is more efficient */ 00477 usedqd = (Treal) mb > in * .5 && ! forceb; 00478 wend = wbegin + mb - 1; 00479 /* Calculate gaps for the current block */ 00480 /* In later stages, when representations for individual */ 00481 /* eigenvalues are different, we use SIGMA = E( IEND ). */ 00482 sigma = 0.; 00483 i__2 = wend - 1; 00484 for (i__ = wbegin; i__ <= i__2; ++i__) { 00485 /* Computing MAX */ 00486 d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + 00487 werr[i__]); 00488 wgap[i__] = maxMACRO(d__1,d__2); 00489 /* L30: */ 00490 } 00491 /* Computing MAX */ 00492 d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]); 00493 wgap[wend] = maxMACRO(d__1,d__2); 00494 /* Find local index of the first and last desired evalue. */ 00495 indl = indexw[wbegin]; 00496 indu = indexw[wend]; 00497 } 00498 } 00499 if ( ( irange == 1 && ! forceb ) || usedqd) { 00500 /* Case of DQDS */ 00501 /* Find approximations to the extremal eigenvalues of the block */ 00502 template_lapack_larrk(&in, &c__1, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, & 00503 rtl, &tmp, &tmp1, &iinfo); 00504 if (iinfo != 0) { 00505 *info = -1; 00506 return 0; 00507 } 00508 /* Computing MAX */ 00509 d__2 = gl, d__3 = tmp - tmp1 - eps * 100. * (d__1 = tmp - tmp1, 00510 absMACRO(d__1)); 00511 isleft = maxMACRO(d__2,d__3); 00512 template_lapack_larrk(&in, &in, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, & 00513 rtl, &tmp, &tmp1, &iinfo); 00514 if (iinfo != 0) { 00515 *info = -1; 00516 return 0; 00517 } 00518 /* Computing MIN */ 00519 d__2 = gu, d__3 = tmp + tmp1 + eps * 100. * (d__1 = tmp + tmp1, 00520 absMACRO(d__1)); 00521 isrght = minMACRO(d__2,d__3); 00522 /* Improve the estimate of the spectral diameter */ 00523 spdiam = isrght - isleft; 00524 } else { 00525 /* Case of bisection */ 00526 /* Find approximations to the wanted extremal eigenvalues */ 00527 /* Computing MAX */ 00528 d__2 = gl, d__3 = w[wbegin] - werr[wbegin] - eps * 100. * (d__1 = 00529 w[wbegin] - werr[wbegin], absMACRO(d__1)); 00530 isleft = maxMACRO(d__2,d__3); 00531 /* Computing MIN */ 00532 d__2 = gu, d__3 = w[wend] + werr[wend] + eps * 100. * (d__1 = w[ 00533 wend] + werr[wend], absMACRO(d__1)); 00534 isrght = minMACRO(d__2,d__3); 00535 } 00536 /* Decide whether the base representation for the current block */ 00537 /* L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I */ 00538 /* should be on the left or the right end of the current block. */ 00539 /* The strategy is to shift to the end which is "more populated" */ 00540 /* Furthermore, decide whether to use DQDS for the computation of */ 00541 /* the eigenvalue approximations at the end of DLARRE or bisection. */ 00542 /* dqds is chosen if all eigenvalues are desired or the number of */ 00543 /* eigenvalues to be computed is large compared to the blocksize. */ 00544 if (irange == 1 && ! forceb) { 00545 /* If all the eigenvalues have to be computed, we use dqd */ 00546 usedqd = TRUE_; 00547 /* INDL is the local index of the first eigenvalue to compute */ 00548 indl = 1; 00549 indu = in; 00550 /* MB = number of eigenvalues to compute */ 00551 mb = in; 00552 wend = wbegin + mb - 1; 00553 /* Define 1/4 and 3/4 points of the spectrum */ 00554 s1 = isleft + spdiam * .25; 00555 s2 = isrght - spdiam * .25; 00556 } else { 00557 /* DLARRD has computed IBLOCK and INDEXW for each eigenvalue */ 00558 /* approximation. */ 00559 /* choose sigma */ 00560 if (usedqd) { 00561 s1 = isleft + spdiam * .25; 00562 s2 = isrght - spdiam * .25; 00563 } else { 00564 tmp = minMACRO(isrght,*vu) - maxMACRO(isleft,*vl); 00565 s1 = maxMACRO(isleft,*vl) + tmp * .25; 00566 s2 = minMACRO(isrght,*vu) - tmp * .25; 00567 } 00568 } 00569 /* Compute the negcount at the 1/4 and 3/4 points */ 00570 if (mb > 1) { 00571 template_lapack_larrc("T", &in, &s1, &s2, &d__[ibegin], &e[ibegin], pivmin, & 00572 cnt, &cnt1, &cnt2, &iinfo); 00573 } 00574 if (mb == 1) { 00575 sigma = gl; 00576 sgndef = 1.; 00577 } else if (cnt1 - indl >= indu - cnt2) { 00578 if (irange == 1 && ! forceb) { 00579 sigma = maxMACRO(isleft,gl); 00580 } else if (usedqd) { 00581 /* use Gerschgorin bound as shift to get pos def matrix */ 00582 /* for dqds */ 00583 sigma = isleft; 00584 } else { 00585 /* use approximation of the first desired eigenvalue of the */ 00586 /* block as shift */ 00587 sigma = maxMACRO(isleft,*vl); 00588 } 00589 sgndef = 1.; 00590 } else { 00591 if (irange == 1 && ! forceb) { 00592 sigma = minMACRO(isrght,gu); 00593 } else if (usedqd) { 00594 /* use Gerschgorin bound as shift to get neg def matrix */ 00595 /* for dqds */ 00596 sigma = isrght; 00597 } else { 00598 /* use approximation of the first desired eigenvalue of the */ 00599 /* block as shift */ 00600 sigma = minMACRO(isrght,*vu); 00601 } 00602 sgndef = -1.; 00603 } 00604 /* An initial SIGMA has been chosen that will be used for computing */ 00605 /* T - SIGMA I = L D L^T */ 00606 /* Define the increment TAU of the shift in case the initial shift */ 00607 /* needs to be refined to obtain a factorization with not too much */ 00608 /* element growth. */ 00609 if (usedqd) { 00610 /* The initial SIGMA was to the outer end of the spectrum */ 00611 /* the matrix is definite and we need not retreat. */ 00612 tau = spdiam * eps * *n + *pivmin * 2.; 00613 } else { 00614 if (mb > 1) { 00615 clwdth = w[wend] + werr[wend] - w[wbegin] - werr[wbegin]; 00616 avgap = (d__1 = clwdth / (Treal) (wend - wbegin), absMACRO( 00617 d__1)); 00618 if (sgndef == 1.) { 00619 /* Computing MAX */ 00620 d__1 = wgap[wbegin]; 00621 tau = maxMACRO(d__1,avgap) * .5; 00622 /* Computing MAX */ 00623 d__1 = tau, d__2 = werr[wbegin]; 00624 tau = maxMACRO(d__1,d__2); 00625 } else { 00626 /* Computing MAX */ 00627 d__1 = wgap[wend - 1]; 00628 tau = maxMACRO(d__1,avgap) * .5; 00629 /* Computing MAX */ 00630 d__1 = tau, d__2 = werr[wend]; 00631 tau = maxMACRO(d__1,d__2); 00632 } 00633 } else { 00634 tau = werr[wbegin]; 00635 } 00636 } 00637 00638 for (idum = 1; idum <= 6; ++idum) { 00639 /* Compute L D L^T factorization of tridiagonal matrix T - sigma I. */ 00640 /* Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of */ 00641 /* pivots in WORK(2*IN+1:3*IN) */ 00642 dpivot = d__[ibegin] - sigma; 00643 work[1] = dpivot; 00644 dmax__ = absMACRO(work[1]); 00645 j = ibegin; 00646 i__2 = in - 1; 00647 for (i__ = 1; i__ <= i__2; ++i__) { 00648 work[(in << 1) + i__] = 1. / work[i__]; 00649 tmp = e[j] * work[(in << 1) + i__]; 00650 work[in + i__] = tmp; 00651 dpivot = d__[j + 1] - sigma - tmp * e[j]; 00652 work[i__ + 1] = dpivot; 00653 /* Computing MAX */ 00654 d__1 = dmax__, d__2 = absMACRO(dpivot); 00655 dmax__ = maxMACRO(d__1,d__2); 00656 ++j; 00657 /* L70: */ 00658 } 00659 /* check for element growth */ 00660 if (dmax__ > spdiam * 64.) { 00661 norep = TRUE_; 00662 } else { 00663 norep = FALSE_; 00664 } 00665 if (usedqd && ! norep) { 00666 /* Ensure the definiteness of the representation */ 00667 /* All entries of D (of L D L^T) must have the same sign */ 00668 i__2 = in; 00669 for (i__ = 1; i__ <= i__2; ++i__) { 00670 tmp = sgndef * work[i__]; 00671 if (tmp < 0.) { 00672 norep = TRUE_; 00673 } 00674 /* L71: */ 00675 } 00676 } 00677 if (norep) { 00678 /* Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin */ 00679 /* shift which makes the matrix definite. So we should end up */ 00680 /* here really only in the case of IRANGE = VALRNG or INDRNG. */ 00681 if (idum == 5) { 00682 if (sgndef == 1.) { 00683 /* The fudged Gerschgorin shift should succeed */ 00684 sigma = gl - spdiam * 2. * eps * *n - *pivmin * 4.; 00685 } else { 00686 sigma = gu + spdiam * 2. * eps * *n + *pivmin * 4.; 00687 } 00688 } else { 00689 sigma -= sgndef * tau; 00690 tau *= 2.; 00691 } 00692 } else { 00693 /* an initial RRR is found */ 00694 goto L83; 00695 } 00696 /* L80: */ 00697 } 00698 /* if the program reaches this point, no base representation could be */ 00699 /* found in MAXTRY iterations. */ 00700 *info = 2; 00701 return 0; 00702 L83: 00703 /* At this point, we have found an initial base representation */ 00704 /* T - SIGMA I = L D L^T with not too much element growth. */ 00705 /* Store the shift. */ 00706 e[iend] = sigma; 00707 /* Store D and L. */ 00708 template_blas_copy(&in, &work[1], &c__1, &d__[ibegin], &c__1); 00709 i__2 = in - 1; 00710 template_blas_copy(&i__2, &work[in + 1], &c__1, &e[ibegin], &c__1); 00711 if (mb > 1) { 00712 00713 /* Perturb each entry of the base representation by a small */ 00714 /* (but random) relative amount to overcome difficulties with */ 00715 /* glued matrices. */ 00716 00717 for (i__ = 1; i__ <= 4; ++i__) { 00718 iseed[i__ - 1] = 1; 00719 /* L122: */ 00720 } 00721 i__2 = (in << 1) - 1; 00722 template_lapack_larnv(&c__2, iseed, &i__2, &work[1]); 00723 i__2 = in - 1; 00724 for (i__ = 1; i__ <= i__2; ++i__) { 00725 d__[ibegin + i__ - 1] *= eps * 8. * work[i__] + 1.; 00726 e[ibegin + i__ - 1] *= eps * 8. * work[in + i__] + 1.; 00727 /* L125: */ 00728 } 00729 d__[iend] *= eps * 4. * work[in] + 1.; 00730 00731 } 00732 00733 /* Don't update the Gerschgorin intervals because keeping track */ 00734 /* of the updates would be too much work in DLARRV. */ 00735 /* We update W instead and use it to locate the proper Gerschgorin */ 00736 /* intervals. */ 00737 /* Compute the required eigenvalues of L D L' by bisection or dqds */ 00738 if (! usedqd) { 00739 /* If DLARRD has been used, shift the eigenvalue approximations */ 00740 /* according to their representation. This is necessary for */ 00741 /* a uniform DLARRV since dqds computes eigenvalues of the */ 00742 /* shifted representation. In DLARRV, W will always hold the */ 00743 /* UNshifted eigenvalue approximation. */ 00744 i__2 = wend; 00745 for (j = wbegin; j <= i__2; ++j) { 00746 w[j] -= sigma; 00747 werr[j] += (d__1 = w[j], absMACRO(d__1)) * eps; 00748 /* L134: */ 00749 } 00750 /* call DLARRB to reduce eigenvalue error of the approximations */ 00751 /* from DLARRD */ 00752 i__2 = iend - 1; 00753 for (i__ = ibegin; i__ <= i__2; ++i__) { 00754 /* Computing 2nd power */ 00755 d__1 = e[i__]; 00756 work[i__] = d__[i__] * (d__1 * d__1); 00757 /* L135: */ 00758 } 00759 /* use bisection to find EV from INDL to INDU */ 00760 i__2 = indl - 1; 00761 template_lapack_larrb(&in, &d__[ibegin], &work[ibegin], &indl, &indu, rtol1, 00762 rtol2, &i__2, &w[wbegin], &wgap[wbegin], &werr[wbegin], & 00763 work[(*n << 1) + 1], &iwork[1], pivmin, &spdiam, &in, & 00764 iinfo); 00765 if (iinfo != 0) { 00766 *info = -4; 00767 return 0; 00768 } 00769 /* DLARRB computes all gaps correctly except for the last one */ 00770 /* Record distance to VU/GU */ 00771 /* Computing MAX */ 00772 d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]); 00773 wgap[wend] = maxMACRO(d__1,d__2); 00774 i__2 = indu; 00775 for (i__ = indl; i__ <= i__2; ++i__) { 00776 ++(*m); 00777 iblock[*m] = jblk; 00778 indexw[*m] = i__; 00779 /* L138: */ 00780 } 00781 } else { 00782 /* Call dqds to get all eigs (and then possibly delete unwanted */ 00783 /* eigenvalues). */ 00784 /* Note that dqds finds the eigenvalues of the L D L^T representation */ 00785 /* of T to high relative accuracy. High relative accuracy */ 00786 /* might be lost when the shift of the RRR is subtracted to obtain */ 00787 /* the eigenvalues of T. However, T is not guaranteed to define its */ 00788 /* eigenvalues to high relative accuracy anyway. */ 00789 /* Set RTOL to the order of the tolerance used in DLASQ2 */ 00790 /* This is an ESTIMATED error, the worst case bound is 4*N*EPS */ 00791 /* which is usually too large and requires unnecessary work to be */ 00792 /* done by bisection when computing the eigenvectors */ 00793 rtol = template_blas_log((Treal) in) * 4. * eps; 00794 j = ibegin; 00795 i__2 = in - 1; 00796 for (i__ = 1; i__ <= i__2; ++i__) { 00797 work[(i__ << 1) - 1] = (d__1 = d__[j], absMACRO(d__1)); 00798 work[i__ * 2] = e[j] * e[j] * work[(i__ << 1) - 1]; 00799 ++j; 00800 /* L140: */ 00801 } 00802 work[(in << 1) - 1] = (d__1 = d__[iend], absMACRO(d__1)); 00803 work[in * 2] = 0.; 00804 template_lapack_lasq2(&in, &work[1], &iinfo); 00805 if (iinfo != 0) { 00806 /* If IINFO = -5 then an index is part of a tight cluster */ 00807 /* and should be changed. The index is in IWORK(1) and the */ 00808 /* gap is in WORK(N+1) */ 00809 *info = -5; 00810 return 0; 00811 } else { 00812 /* Test that all eigenvalues are positive as expected */ 00813 i__2 = in; 00814 for (i__ = 1; i__ <= i__2; ++i__) { 00815 if (work[i__] < 0.) { 00816 *info = -6; 00817 return 0; 00818 } 00819 /* L149: */ 00820 } 00821 } 00822 if (sgndef > 0.) { 00823 i__2 = indu; 00824 for (i__ = indl; i__ <= i__2; ++i__) { 00825 ++(*m); 00826 w[*m] = work[in - i__ + 1]; 00827 iblock[*m] = jblk; 00828 indexw[*m] = i__; 00829 /* L150: */ 00830 } 00831 } else { 00832 i__2 = indu; 00833 for (i__ = indl; i__ <= i__2; ++i__) { 00834 ++(*m); 00835 w[*m] = -work[i__]; 00836 iblock[*m] = jblk; 00837 indexw[*m] = i__; 00838 /* L160: */ 00839 } 00840 } 00841 i__2 = *m; 00842 for (i__ = *m - mb + 1; i__ <= i__2; ++i__) { 00843 /* the value of RTOL below should be the tolerance in DLASQ2 */ 00844 werr[i__] = rtol * (d__1 = w[i__], absMACRO(d__1)); 00845 /* L165: */ 00846 } 00847 i__2 = *m - 1; 00848 for (i__ = *m - mb + 1; i__ <= i__2; ++i__) { 00849 /* compute the right gap between the intervals */ 00850 /* Computing MAX */ 00851 d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[ 00852 i__]); 00853 wgap[i__] = maxMACRO(d__1,d__2); 00854 /* L166: */ 00855 } 00856 /* Computing MAX */ 00857 d__1 = 0., d__2 = *vu - sigma - (w[*m] + werr[*m]); 00858 wgap[*m] = maxMACRO(d__1,d__2); 00859 } 00860 /* proceed with next block */ 00861 ibegin = iend + 1; 00862 wbegin = wend + 1; 00863 L170: 00864 ; 00865 } 00866 00867 return 0; 00868 00869 /* end of DLARRE */ 00870 00871 } /* dlarre_ */ 00872 00873 #endif