00001 /* Ergo, version 3.7, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, 00004 * and Anastasia Kruchinina. 00005 * 00006 * This program is free software: you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation, either version 3 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00018 * 00019 * Primary academic reference: 00020 * Ergo: An open-source program for linear-scaling electronic structure 00021 * calculations, 00022 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia 00023 * Kruchinina, 00024 * SoftwareX 7, 107 (2018), 00025 * <http://dx.doi.org/10.1016/j.softx.2018.03.005> 00026 * 00027 * For further information about Ergo, see <http://www.ergoscf.org>. 00028 */ 00029 00030 /* This file belongs to the template_lapack part of the Ergo source 00031 * code. The source files in the template_lapack directory are modified 00032 * versions of files originally distributed as CLAPACK, see the 00033 * Copyright/license notice in the file template_lapack/COPYING. 00034 */ 00035 00036 00037 #ifndef TEMPLATE_LAPACK_LARRV_HEADER 00038 #define TEMPLATE_LAPACK_LARRV_HEADER 00039 00040 00041 #include "template_lapack_lar1v.h" 00042 00043 00044 template<class Treal> 00045 int template_lapack_larrv(const integer *n, Treal *vl, Treal *vu, 00046 Treal *d__, Treal *l, Treal *pivmin, integer *isplit, 00047 integer *m, integer *dol, integer *dou, Treal *minrgp, 00048 Treal *rtol1, Treal *rtol2, Treal *w, Treal *werr, 00049 Treal *wgap, integer *iblock, integer *indexw, Treal *gers, 00050 Treal *z__, const integer *ldz, integer *isuppz, Treal *work, 00051 integer *iwork, integer *info) 00052 { 00053 /* System generated locals */ 00054 integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; 00055 Treal d__1, d__2; 00056 logical L__1; 00057 00058 00059 /* Local variables */ 00060 integer minwsize, i__, j, k, p, q, miniwsize, ii; 00061 Treal gl; 00062 integer im, in; 00063 Treal gu, gap, eps, tau, tol, tmp; 00064 integer zto; 00065 Treal ztz; 00066 integer iend, jblk; 00067 Treal lgap; 00068 integer done; 00069 Treal rgap, left; 00070 integer wend, iter; 00071 Treal bstw = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning 00072 integer itmp1; 00073 integer indld; 00074 Treal fudge; 00075 integer idone; 00076 Treal sigma; 00077 integer iinfo, iindr; 00078 Treal resid; 00079 logical eskip; 00080 Treal right; 00081 integer nclus, zfrom; 00082 Treal rqtol; 00083 integer iindc1, iindc2; 00084 logical stp2ii; 00085 Treal lambda; 00086 integer ibegin, indeig; 00087 logical needbs; 00088 integer indlld; 00089 Treal sgndef, mingma; 00090 integer oldien, oldncl, wbegin; 00091 Treal spdiam; 00092 integer negcnt; 00093 integer oldcls; 00094 Treal savgap; 00095 integer ndepth; 00096 Treal ssigma; 00097 logical usedbs; 00098 integer iindwk, offset; 00099 Treal gaptol; 00100 integer newcls, oldfst, indwrk, windex, oldlst; 00101 logical usedrq; 00102 integer newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl; 00103 Treal bstres = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning 00104 integer newsiz, zusedu, zusedw; 00105 Treal nrminv, rqcorr; 00106 logical tryrqc; 00107 integer isupmx; 00108 00109 00110 /* -- LAPACK auxiliary routine (version 3.2) -- */ 00111 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ 00112 /* November 2006 */ 00113 00114 /* .. Scalar Arguments .. */ 00115 /* .. */ 00116 /* .. Array Arguments .. */ 00117 /* .. */ 00118 00119 /* Purpose */ 00120 /* ======= */ 00121 00122 /* DLARRV computes the eigenvectors of the tridiagonal matrix */ 00123 /* T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */ 00124 /* The input eigenvalues should have been computed by DLARRE. */ 00125 00126 /* Arguments */ 00127 /* ========= */ 00128 00129 /* N (input) INTEGER */ 00130 /* The order of the matrix. N >= 0. */ 00131 00132 /* VL (input) DOUBLE PRECISION */ 00133 /* VU (input) DOUBLE PRECISION */ 00134 /* Lower and upper bounds of the interval that contains the desired */ 00135 /* eigenvalues. VL < VU. Needed to compute gaps on the left or right */ 00136 /* end of the extremal eigenvalues in the desired RANGE. */ 00137 00138 /* D (input/output) DOUBLE PRECISION array, dimension (N) */ 00139 /* On entry, the N diagonal elements of the diagonal matrix D. */ 00140 /* On exit, D may be overwritten. */ 00141 00142 /* L (input/output) DOUBLE PRECISION array, dimension (N) */ 00143 /* On entry, the (N-1) subdiagonal elements of the unit */ 00144 /* bidiagonal matrix L are in elements 1 to N-1 of L */ 00145 /* (if the matrix is not splitted.) At the end of each block */ 00146 /* is stored the corresponding shift as given by DLARRE. */ 00147 /* On exit, L is overwritten. */ 00148 00149 /* PIVMIN (in) DOUBLE PRECISION */ 00150 /* The minimum pivot allowed in the Sturm sequence. */ 00151 00152 /* ISPLIT (input) INTEGER array, dimension (N) */ 00153 /* The splitting points, at which T breaks up into blocks. */ 00154 /* The first block consists of rows/columns 1 to */ 00155 /* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */ 00156 /* through ISPLIT( 2 ), etc. */ 00157 00158 /* M (input) INTEGER */ 00159 /* The total number of input eigenvalues. 0 <= M <= N. */ 00160 00161 /* DOL (input) INTEGER */ 00162 /* DOU (input) INTEGER */ 00163 /* If the user wants to compute only selected eigenvectors from all */ 00164 /* the eigenvalues supplied, he can specify an index range DOL:DOU. */ 00165 /* Or else the setting DOL=1, DOU=M should be applied. */ 00166 /* Note that DOL and DOU refer to the order in which the eigenvalues */ 00167 /* are stored in W. */ 00168 /* If the user wants to compute only selected eigenpairs, then */ 00169 /* the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */ 00170 /* computed eigenvectors. All other columns of Z are set to zero. */ 00171 00172 /* MINRGP (input) DOUBLE PRECISION */ 00173 00174 /* RTOL1 (input) DOUBLE PRECISION */ 00175 /* RTOL2 (input) DOUBLE PRECISION */ 00176 /* Parameters for bisection. */ 00177 /* An interval [LEFT,RIGHT] has converged if */ 00178 /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ 00179 00180 /* W (input/output) DOUBLE PRECISION array, dimension (N) */ 00181 /* The first M elements of W contain the APPROXIMATE eigenvalues for */ 00182 /* which eigenvectors are to be computed. The eigenvalues */ 00183 /* should be grouped by split-off block and ordered from */ 00184 /* smallest to largest within the block ( The output array */ 00185 /* W from DLARRE is expected here ). Furthermore, they are with */ 00186 /* respect to the shift of the corresponding root representation */ 00187 /* for their block. On exit, W holds the eigenvalues of the */ 00188 /* UNshifted matrix. */ 00189 00190 /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ 00191 /* The first M elements contain the semiwidth of the uncertainty */ 00192 /* interval of the corresponding eigenvalue in W */ 00193 00194 /* WGAP (input/output) DOUBLE PRECISION array, dimension (N) */ 00195 /* The separation from the right neighbor eigenvalue in W. */ 00196 00197 /* IBLOCK (input) INTEGER array, dimension (N) */ 00198 /* The indices of the blocks (submatrices) associated with the */ 00199 /* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */ 00200 /* W(i) belongs to the first block from the top, =2 if W(i) */ 00201 /* belongs to the second block, etc. */ 00202 00203 /* INDEXW (input) INTEGER array, dimension (N) */ 00204 /* The indices of the eigenvalues within each block (submatrix); */ 00205 /* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */ 00206 /* i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */ 00207 00208 /* GERS (input) DOUBLE PRECISION array, dimension (2*N) */ 00209 /* The N Gerschgorin intervals (the i-th Gerschgorin interval */ 00210 /* is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */ 00211 /* be computed from the original UNshifted matrix. */ 00212 00213 /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ 00214 /* If INFO = 0, the first M columns of Z contain the */ 00215 /* orthonormal eigenvectors of the matrix T */ 00216 /* corresponding to the input eigenvalues, with the i-th */ 00217 /* column of Z holding the eigenvector associated with W(i). */ 00218 /* Note: the user must ensure that at least max(1,M) columns are */ 00219 /* supplied in the array Z. */ 00220 00221 /* LDZ (input) INTEGER */ 00222 /* The leading dimension of the array Z. LDZ >= 1, and if */ 00223 /* JOBZ = 'V', LDZ >= max(1,N). */ 00224 00225 /* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */ 00226 /* The support of the eigenvectors in Z, i.e., the indices */ 00227 /* indicating the nonzero elements in Z. The I-th eigenvector */ 00228 /* is nonzero only in elements ISUPPZ( 2*I-1 ) through */ 00229 /* ISUPPZ( 2*I ). */ 00230 00231 /* WORK (workspace) DOUBLE PRECISION array, dimension (12*N) */ 00232 00233 /* IWORK (workspace) INTEGER array, dimension (7*N) */ 00234 00235 /* INFO (output) INTEGER */ 00236 /* = 0: successful exit */ 00237 00238 /* > 0: A problem occured in DLARRV. */ 00239 /* < 0: One of the called subroutines signaled an internal problem. */ 00240 /* Needs inspection of the corresponding parameter IINFO */ 00241 /* for further information. */ 00242 00243 /* =-1: Problem in DLARRB when refining a child's eigenvalues. */ 00244 /* =-2: Problem in DLARRF when computing the RRR of a child. */ 00245 /* When a child is inside a tight cluster, it can be difficult */ 00246 /* to find an RRR. A partial remedy from the user's point of */ 00247 /* view is to make the parameter MINRGP smaller and recompile. */ 00248 /* However, as the orthogonality of the computed vectors is */ 00249 /* proportional to 1/MINRGP, the user should be aware that */ 00250 /* he might be trading in precision when he decreases MINRGP. */ 00251 /* =-3: Problem in DLARRB when refining a single eigenvalue */ 00252 /* after the Rayleigh correction was rejected. */ 00253 /* = 5: The Rayleigh Quotient Iteration failed to converge to */ 00254 /* full accuracy in MAXITR steps. */ 00255 00256 /* Further Details */ 00257 /* =============== */ 00258 00259 /* Based on contributions by */ 00260 /* Beresford Parlett, University of California, Berkeley, USA */ 00261 /* Jim Demmel, University of California, Berkeley, USA */ 00262 /* Inderjit Dhillon, University of Texas, Austin, USA */ 00263 /* Osni Marques, LBNL/NERSC, USA */ 00264 /* Christof Voemel, University of California, Berkeley, USA */ 00265 00266 /* ===================================================================== */ 00267 00268 /* .. Parameters .. */ 00269 /* .. */ 00270 /* .. Local Scalars .. */ 00271 /* .. */ 00272 /* .. External Functions .. */ 00273 /* .. */ 00274 /* .. External Subroutines .. */ 00275 /* .. */ 00276 /* .. Intrinsic Functions .. */ 00277 /* .. */ 00278 /* .. Executable Statements .. */ 00279 /* .. */ 00280 /* The first N entries of WORK are reserved for the eigenvalues */ 00281 00282 /* Table of constant values */ 00283 00284 Treal c_b5 = 0.; 00285 integer c__1 = 1; 00286 integer c__2 = 2; 00287 00288 00289 /* Parameter adjustments */ 00290 --d__; 00291 --l; 00292 --isplit; 00293 --w; 00294 --werr; 00295 --wgap; 00296 --iblock; 00297 --indexw; 00298 --gers; 00299 z_dim1 = *ldz; 00300 z_offset = 1 + z_dim1; 00301 z__ -= z_offset; 00302 --isuppz; 00303 --work; 00304 --iwork; 00305 00306 /* Function Body */ 00307 indld = *n + 1; 00308 indlld = (*n << 1) + 1; 00309 indwrk = *n * 3 + 1; 00310 minwsize = *n * 12; 00311 i__1 = minwsize; 00312 for (i__ = 1; i__ <= i__1; ++i__) { 00313 work[i__] = 0.; 00314 /* L5: */ 00315 } 00316 /* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */ 00317 /* factorization used to compute the FP vector */ 00318 iindr = 0; 00319 /* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */ 00320 /* layer and the one above. */ 00321 iindc1 = *n; 00322 iindc2 = *n << 1; 00323 iindwk = *n * 3 + 1; 00324 miniwsize = *n * 7; 00325 i__1 = miniwsize; 00326 for (i__ = 1; i__ <= i__1; ++i__) { 00327 iwork[i__] = 0; 00328 /* L10: */ 00329 } 00330 zusedl = 1; 00331 if (*dol > 1) { 00332 /* Set lower bound for use of Z */ 00333 zusedl = *dol - 1; 00334 } 00335 zusedu = *m; 00336 if (*dou < *m) { 00337 /* Set lower bound for use of Z */ 00338 zusedu = *dou + 1; 00339 } 00340 /* The width of the part of Z that is used */ 00341 zusedw = zusedu - zusedl + 1; 00342 template_lapack_laset("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz); 00343 eps = template_lapack_lamch("Precision",(Treal)0); 00344 rqtol = eps * 2.; 00345 00346 /* Set expert flags for standard code. */ 00347 tryrqc = TRUE_; 00348 if (*dol == 1 && *dou == *m) { 00349 } else { 00350 /* Only selected eigenpairs are computed. Since the other evalues */ 00351 /* are not refined by RQ iteration, bisection has to compute to full */ 00352 /* accuracy. */ 00353 *rtol1 = eps * 4.; 00354 *rtol2 = eps * 4.; 00355 } 00356 /* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */ 00357 /* desired eigenvalues. The support of the nonzero eigenvector */ 00358 /* entries is contained in the interval IBEGIN:IEND. */ 00359 /* Remark that if k eigenpairs are desired, then the eigenvectors */ 00360 /* are stored in k contiguous columns of Z. */ 00361 /* DONE is the number of eigenvectors already computed */ 00362 done = 0; 00363 ibegin = 1; 00364 wbegin = 1; 00365 i__1 = iblock[*m]; 00366 for (jblk = 1; jblk <= i__1; ++jblk) { 00367 iend = isplit[jblk]; 00368 sigma = l[iend]; 00369 /* Find the eigenvectors of the submatrix indexed IBEGIN */ 00370 /* through IEND. */ 00371 wend = wbegin - 1; 00372 L15: 00373 if (wend < *m) { 00374 if (iblock[wend + 1] == jblk) { 00375 ++wend; 00376 goto L15; 00377 } 00378 } 00379 if (wend < wbegin) { 00380 ibegin = iend + 1; 00381 goto L170; 00382 } else if (wend < *dol || wbegin > *dou) { 00383 ibegin = iend + 1; 00384 wbegin = wend + 1; 00385 goto L170; 00386 } 00387 /* Find local spectral diameter of the block */ 00388 gl = gers[(ibegin << 1) - 1]; 00389 gu = gers[ibegin * 2]; 00390 i__2 = iend; 00391 for (i__ = ibegin + 1; i__ <= i__2; ++i__) { 00392 /* Computing MIN */ 00393 d__1 = gers[(i__ << 1) - 1]; 00394 gl = minMACRO(d__1,gl); 00395 /* Computing MAX */ 00396 d__1 = gers[i__ * 2]; 00397 gu = maxMACRO(d__1,gu); 00398 /* L20: */ 00399 } 00400 spdiam = gu - gl; 00401 /* OLDIEN is the last index of the previous block */ 00402 oldien = ibegin - 1; 00403 /* Calculate the size of the current block */ 00404 in = iend - ibegin + 1; 00405 /* The number of eigenvalues in the current block */ 00406 im = wend - wbegin + 1; 00407 /* This is for a 1x1 block */ 00408 if (ibegin == iend) { 00409 ++done; 00410 z__[ibegin + wbegin * z_dim1] = 1.; 00411 isuppz[(wbegin << 1) - 1] = ibegin; 00412 isuppz[wbegin * 2] = ibegin; 00413 w[wbegin] += sigma; 00414 work[wbegin] = w[wbegin]; 00415 ibegin = iend + 1; 00416 ++wbegin; 00417 goto L170; 00418 } 00419 /* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */ 00420 /* Note that these can be approximations, in this case, the corresp. */ 00421 /* entries of WERR give the size of the uncertainty interval. */ 00422 /* The eigenvalue approximations will be refined when necessary as */ 00423 /* high relative accuracy is required for the computation of the */ 00424 /* corresponding eigenvectors. */ 00425 template_blas_copy(&im, &w[wbegin], &c__1, &work[wbegin], &c__1); 00426 /* We store in W the eigenvalue approximations w.r.t. the original */ 00427 /* matrix T. */ 00428 i__2 = im; 00429 for (i__ = 1; i__ <= i__2; ++i__) { 00430 w[wbegin + i__ - 1] += sigma; 00431 /* L30: */ 00432 } 00433 /* NDEPTH is the current depth of the representation tree */ 00434 ndepth = 0; 00435 /* PARITY is either 1 or 0 */ 00436 parity = 1; 00437 /* NCLUS is the number of clusters for the next level of the */ 00438 /* representation tree, we start with NCLUS = 1 for the root */ 00439 nclus = 1; 00440 iwork[iindc1 + 1] = 1; 00441 iwork[iindc1 + 2] = im; 00442 /* IDONE is the number of eigenvectors already computed in the current */ 00443 /* block */ 00444 idone = 0; 00445 /* loop while( IDONE.LT.IM ) */ 00446 /* generate the representation tree for the current block and */ 00447 /* compute the eigenvectors */ 00448 L40: 00449 if (idone < im) { 00450 /* This is a crude protection against infinitely deep trees */ 00451 if (ndepth > *m) { 00452 *info = -2; 00453 return 0; 00454 } 00455 /* breadth first processing of the current level of the representation */ 00456 /* tree: OLDNCL = number of clusters on current level */ 00457 oldncl = nclus; 00458 /* reset NCLUS to count the number of child clusters */ 00459 nclus = 0; 00460 00461 parity = 1 - parity; 00462 if (parity == 0) { 00463 oldcls = iindc1; 00464 newcls = iindc2; 00465 } else { 00466 oldcls = iindc2; 00467 newcls = iindc1; 00468 } 00469 /* Process the clusters on the current level */ 00470 i__2 = oldncl; 00471 for (i__ = 1; i__ <= i__2; ++i__) { 00472 j = oldcls + (i__ << 1); 00473 /* OLDFST, OLDLST = first, last index of current cluster. */ 00474 /* cluster indices start with 1 and are relative */ 00475 /* to WBEGIN when accessing W, WGAP, WERR, Z */ 00476 oldfst = iwork[j - 1]; 00477 oldlst = iwork[j]; 00478 if (ndepth > 0) { 00479 /* Retrieve relatively robust representation (RRR) of cluster */ 00480 /* that has been computed at the previous level */ 00481 /* The RRR is stored in Z and overwritten once the eigenvectors */ 00482 /* have been computed or when the cluster is refined */ 00483 if (*dol == 1 && *dou == *m) { 00484 /* Get representation from location of the leftmost evalue */ 00485 /* of the cluster */ 00486 j = wbegin + oldfst - 1; 00487 } else { 00488 if (wbegin + oldfst - 1 < *dol) { 00489 /* Get representation from the left end of Z array */ 00490 j = *dol - 1; 00491 } else if (wbegin + oldfst - 1 > *dou) { 00492 /* Get representation from the right end of Z array */ 00493 j = *dou; 00494 } else { 00495 j = wbegin + oldfst - 1; 00496 } 00497 } 00498 template_blas_copy(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin] 00499 , &c__1); 00500 i__3 = in - 1; 00501 template_blas_copy(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[ 00502 ibegin], &c__1); 00503 sigma = z__[iend + (j + 1) * z_dim1]; 00504 /* Set the corresponding entries in Z to zero */ 00505 template_lapack_laset("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j 00506 * z_dim1], ldz); 00507 } 00508 /* Compute DL and DLL of current RRR */ 00509 i__3 = iend - 1; 00510 for (j = ibegin; j <= i__3; ++j) { 00511 tmp = d__[j] * l[j]; 00512 work[indld - 1 + j] = tmp; 00513 work[indlld - 1 + j] = tmp * l[j]; 00514 /* L50: */ 00515 } 00516 if (ndepth > 0) { 00517 /* P and Q are index of the first and last eigenvalue to compute */ 00518 /* within the current block */ 00519 p = indexw[wbegin - 1 + oldfst]; 00520 q = indexw[wbegin - 1 + oldlst]; 00521 /* Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */ 00522 /* thru' Q-OFFSET elements of these arrays are to be used. */ 00523 /* OFFSET = P-OLDFST */ 00524 offset = indexw[wbegin] - 1; 00525 /* perform limited bisection (if necessary) to get approximate */ 00526 /* eigenvalues to the precision needed. */ 00527 template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p, 00528 &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[ 00529 wbegin], &werr[wbegin], &work[indwrk], &iwork[ 00530 iindwk], pivmin, &spdiam, &in, &iinfo); 00531 if (iinfo != 0) { 00532 *info = -1; 00533 return 0; 00534 } 00535 /* We also recompute the extremal gaps. W holds all eigenvalues */ 00536 /* of the unshifted matrix and must be used for computation */ 00537 /* of WGAP, the entries of WORK might stem from RRRs with */ 00538 /* different shifts. The gaps from WBEGIN-1+OLDFST to */ 00539 /* WBEGIN-1+OLDLST are correctly computed in DLARRB. */ 00540 /* However, we only allow the gaps to become greater since */ 00541 /* this is what should happen when we decrease WERR */ 00542 if (oldfst > 1) { 00543 /* Computing MAX */ 00544 d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin + 00545 oldfst - 1] - werr[wbegin + oldfst - 1] - w[ 00546 wbegin + oldfst - 2] - werr[wbegin + oldfst - 00547 2]; 00548 wgap[wbegin + oldfst - 2] = maxMACRO(d__1,d__2); 00549 } 00550 if (wbegin + oldlst - 1 < wend) { 00551 /* Computing MAX */ 00552 d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin + 00553 oldlst] - werr[wbegin + oldlst] - w[wbegin + 00554 oldlst - 1] - werr[wbegin + oldlst - 1]; 00555 wgap[wbegin + oldlst - 1] = maxMACRO(d__1,d__2); 00556 } 00557 /* Each time the eigenvalues in WORK get refined, we store */ 00558 /* the newly found approximation with all shifts applied in W */ 00559 i__3 = oldlst; 00560 for (j = oldfst; j <= i__3; ++j) { 00561 w[wbegin + j - 1] = work[wbegin + j - 1] + sigma; 00562 /* L53: */ 00563 } 00564 } 00565 /* Process the current node. */ 00566 newfst = oldfst; 00567 i__3 = oldlst; 00568 for (j = oldfst; j <= i__3; ++j) { 00569 if (j == oldlst) { 00570 /* we are at the right end of the cluster, this is also the */ 00571 /* boundary of the child cluster */ 00572 newlst = j; 00573 } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[ 00574 wbegin + j - 1], absMACRO(d__1))) { 00575 /* the right relative gap is big enough, the child cluster */ 00576 /* (NEWFST,..,NEWLST) is well separated from the following */ 00577 newlst = j; 00578 } else { 00579 /* inside a child cluster, the relative gap is not */ 00580 /* big enough. */ 00581 goto L140; 00582 } 00583 /* Compute size of child cluster found */ 00584 newsiz = newlst - newfst + 1; 00585 /* NEWFTT is the place in Z where the new RRR or the computed */ 00586 /* eigenvector is to be stored */ 00587 if (*dol == 1 && *dou == *m) { 00588 /* Store representation at location of the leftmost evalue */ 00589 /* of the cluster */ 00590 newftt = wbegin + newfst - 1; 00591 } else { 00592 if (wbegin + newfst - 1 < *dol) { 00593 /* Store representation at the left end of Z array */ 00594 newftt = *dol - 1; 00595 } else if (wbegin + newfst - 1 > *dou) { 00596 /* Store representation at the right end of Z array */ 00597 newftt = *dou; 00598 } else { 00599 newftt = wbegin + newfst - 1; 00600 } 00601 } 00602 if (newsiz > 1) { 00603 00604 /* Current child is not a singleton but a cluster. */ 00605 /* Compute and store new representation of child. */ 00606 00607 00608 /* Compute left and right cluster gap. */ 00609 00610 /* LGAP and RGAP are not computed from WORK because */ 00611 /* the eigenvalue approximations may stem from RRRs */ 00612 /* different shifts. However, W hold all eigenvalues */ 00613 /* of the unshifted matrix. Still, the entries in WGAP */ 00614 /* have to be computed from WORK since the entries */ 00615 /* in W might be of the same order so that gaps are not */ 00616 /* exhibited correctly for very close eigenvalues. */ 00617 if (newfst == 1) { 00618 /* Computing MAX */ 00619 d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl; 00620 lgap = maxMACRO(d__1,d__2); 00621 } else { 00622 lgap = wgap[wbegin + newfst - 2]; 00623 } 00624 rgap = wgap[wbegin + newlst - 1]; 00625 00626 /* Compute left- and rightmost eigenvalue of child */ 00627 /* to high precision in order to shift as close */ 00628 /* as possible and obtain as large relative gaps */ 00629 /* as possible */ 00630 00631 for (k = 1; k <= 2; ++k) { 00632 if (k == 1) { 00633 p = indexw[wbegin - 1 + newfst]; 00634 } else { 00635 p = indexw[wbegin - 1 + newlst]; 00636 } 00637 offset = indexw[wbegin] - 1; 00638 template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin 00639 - 1], &p, &p, &rqtol, &rqtol, &offset, & 00640 work[wbegin], &wgap[wbegin], &werr[wbegin] 00641 , &work[indwrk], &iwork[iindwk], pivmin, & 00642 spdiam, &in, &iinfo); 00643 /* L55: */ 00644 } 00645 00646 if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1 00647 > *dou) { 00648 /* if the cluster contains no desired eigenvalues */ 00649 /* skip the computation of that branch of the rep. tree */ 00650 00651 /* We could skip before the refinement of the extremal */ 00652 /* eigenvalues of the child, but then the representation */ 00653 /* tree could be different from the one when nothing is */ 00654 /* skipped. For this reason we skip at this place. */ 00655 idone = idone + newlst - newfst + 1; 00656 goto L139; 00657 } 00658 00659 /* Compute RRR of child cluster. */ 00660 /* Note that the new RRR is stored in Z */ 00661 00662 /* DLARRF needs LWORK = 2*N */ 00663 template_lapack_larrf(&in, &d__[ibegin], &l[ibegin], &work[indld + 00664 ibegin - 1], &newfst, &newlst, &work[wbegin], 00665 &wgap[wbegin], &werr[wbegin], &spdiam, &lgap, 00666 &rgap, pivmin, &tau, &z__[ibegin + newftt * 00667 z_dim1], &z__[ibegin + (newftt + 1) * z_dim1], 00668 &work[indwrk], &iinfo); 00669 if (iinfo == 0) { 00670 /* a new RRR for the cluster was found by DLARRF */ 00671 /* update shift and store it */ 00672 ssigma = sigma + tau; 00673 z__[iend + (newftt + 1) * z_dim1] = ssigma; 00674 /* WORK() are the midpoints and WERR() the semi-width */ 00675 /* Note that the entries in W are unchanged. */ 00676 i__4 = newlst; 00677 for (k = newfst; k <= i__4; ++k) { 00678 fudge = eps * 3. * (d__1 = work[wbegin + k - 00679 1], absMACRO(d__1)); 00680 work[wbegin + k - 1] -= tau; 00681 fudge += eps * 4. * (d__1 = work[wbegin + k - 00682 1], absMACRO(d__1)); 00683 /* Fudge errors */ 00684 werr[wbegin + k - 1] += fudge; 00685 /* Gaps are not fudged. Provided that WERR is small */ 00686 /* when eigenvalues are close, a zero gap indicates */ 00687 /* that a new representation is needed for resolving */ 00688 /* the cluster. A fudge could lead to a wrong decision */ 00689 /* of judging eigenvalues 'separated' which in */ 00690 /* reality are not. This could have a negative impact */ 00691 /* on the orthogonality of the computed eigenvectors. */ 00692 /* L116: */ 00693 } 00694 ++nclus; 00695 k = newcls + (nclus << 1); 00696 iwork[k - 1] = newfst; 00697 iwork[k] = newlst; 00698 } else { 00699 *info = -2; 00700 return 0; 00701 } 00702 } else { 00703 00704 /* Compute eigenvector of singleton */ 00705 00706 iter = 0; 00707 00708 tol = template_blas_log((Treal) in) * 4. * eps; 00709 00710 k = newfst; 00711 windex = wbegin + k - 1; 00712 /* Computing MAX */ 00713 i__4 = windex - 1; 00714 windmn = maxMACRO(i__4,1); 00715 /* Computing MIN */ 00716 i__4 = windex + 1; 00717 windpl = minMACRO(i__4,*m); 00718 lambda = work[windex]; 00719 ++done; 00720 /* Check if eigenvector computation is to be skipped */ 00721 if (windex < *dol || windex > *dou) { 00722 eskip = TRUE_; 00723 goto L125; 00724 } else { 00725 eskip = FALSE_; 00726 } 00727 left = work[windex] - werr[windex]; 00728 right = work[windex] + werr[windex]; 00729 indeig = indexw[windex]; 00730 /* Note that since we compute the eigenpairs for a child, */ 00731 /* all eigenvalue approximations are w.r.t the same shift. */ 00732 /* In this case, the entries in WORK should be used for */ 00733 /* computing the gaps since they exhibit even very small */ 00734 /* differences in the eigenvalues, as opposed to the */ 00735 /* entries in W which might "look" the same. */ 00736 if (k == 1) { 00737 /* In the case RANGE='I' and with not much initial */ 00738 /* accuracy in LAMBDA and VL, the formula */ 00739 /* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */ 00740 /* can lead to an overestimation of the left gap and */ 00741 /* thus to inadequately early RQI 'convergence'. */ 00742 /* Prevent this by forcing a small left gap. */ 00743 /* Computing MAX */ 00744 d__1 = absMACRO(left), d__2 = absMACRO(right); 00745 lgap = eps * maxMACRO(d__1,d__2); 00746 } else { 00747 lgap = wgap[windmn]; 00748 } 00749 if (k == im) { 00750 /* In the case RANGE='I' and with not much initial */ 00751 /* accuracy in LAMBDA and VU, the formula */ 00752 /* can lead to an overestimation of the right gap and */ 00753 /* thus to inadequately early RQI 'convergence'. */ 00754 /* Prevent this by forcing a small right gap. */ 00755 /* Computing MAX */ 00756 d__1 = absMACRO(left), d__2 = absMACRO(right); 00757 rgap = eps * maxMACRO(d__1,d__2); 00758 } else { 00759 rgap = wgap[windex]; 00760 } 00761 gap = minMACRO(lgap,rgap); 00762 if (k == 1 || k == im) { 00763 /* The eigenvector support can become wrong */ 00764 /* because significant entries could be cut off due to a */ 00765 /* large GAPTOL parameter in LAR1V. Prevent this. */ 00766 gaptol = 0.; 00767 } else { 00768 gaptol = gap * eps; 00769 } 00770 isupmn = in; 00771 isupmx = 1; 00772 /* Update WGAP so that it holds the minimum gap */ 00773 /* to the left or the right. This is crucial in the */ 00774 /* case where bisection is used to ensure that the */ 00775 /* eigenvalue is refined up to the required precision. */ 00776 /* The correct value is restored afterwards. */ 00777 savgap = wgap[windex]; 00778 wgap[windex] = gap; 00779 /* We want to use the Rayleigh Quotient Correction */ 00780 /* as often as possible since it converges quadratically */ 00781 /* when we are close enough to the desired eigenvalue. */ 00782 /* However, the Rayleigh Quotient can have the wrong sign */ 00783 /* and lead us away from the desired eigenvalue. In this */ 00784 /* case, the best we can do is to use bisection. */ 00785 usedbs = FALSE_; 00786 usedrq = FALSE_; 00787 /* Bisection is initially turned off unless it is forced */ 00788 needbs = ! tryrqc; 00789 L120: 00790 /* Check if bisection should be used to refine eigenvalue */ 00791 if (needbs) { 00792 /* Take the bisection as new iterate */ 00793 usedbs = TRUE_; 00794 itmp1 = iwork[iindr + windex]; 00795 offset = indexw[wbegin] - 1; 00796 d__1 = eps * 2.; 00797 template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin 00798 - 1], &indeig, &indeig, &c_b5, &d__1, & 00799 offset, &work[wbegin], &wgap[wbegin], & 00800 werr[wbegin], &work[indwrk], &iwork[ 00801 iindwk], pivmin, &spdiam, &itmp1, &iinfo); 00802 if (iinfo != 0) { 00803 *info = -3; 00804 return 0; 00805 } 00806 lambda = work[windex]; 00807 /* Reset twist index from inaccurate LAMBDA to */ 00808 /* force computation of true MINGMA */ 00809 iwork[iindr + windex] = 0; 00810 } 00811 /* Given LAMBDA, compute the eigenvector. */ 00812 L__1 = ! usedbs; 00813 template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin], &l[ 00814 ibegin], &work[indld + ibegin - 1], &work[ 00815 indlld + ibegin - 1], pivmin, &gaptol, &z__[ 00816 ibegin + windex * z_dim1], &L__1, &negcnt, & 00817 ztz, &mingma, &iwork[iindr + windex], &isuppz[ 00818 (windex << 1) - 1], &nrminv, &resid, &rqcorr, 00819 &work[indwrk]); 00820 if (iter == 0) { 00821 bstres = resid; 00822 bstw = lambda; 00823 } else if (resid < bstres) { 00824 bstres = resid; 00825 bstw = lambda; 00826 } 00827 /* Computing MIN */ 00828 i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1]; 00829 isupmn = minMACRO(i__4,i__5); 00830 /* Computing MAX */ 00831 i__4 = isupmx, i__5 = isuppz[windex * 2]; 00832 isupmx = maxMACRO(i__4,i__5); 00833 ++iter; 00834 /* sin alpha <= |resid|/gap */ 00835 /* Note that both the residual and the gap are */ 00836 /* proportional to the matrix, so ||T|| doesn't play */ 00837 /* a role in the quotient */ 00838 00839 /* Convergence test for Rayleigh-Quotient iteration */ 00840 /* (omitted when Bisection has been used) */ 00841 00842 if (resid > tol * gap && absMACRO(rqcorr) > rqtol * absMACRO( 00843 lambda) && ! usedbs) { 00844 /* We need to check that the RQCORR update doesn't */ 00845 /* move the eigenvalue away from the desired one and */ 00846 /* towards a neighbor. -> protection with bisection */ 00847 if (indeig <= negcnt) { 00848 /* The wanted eigenvalue lies to the left */ 00849 sgndef = -1.; 00850 } else { 00851 /* The wanted eigenvalue lies to the right */ 00852 sgndef = 1.; 00853 } 00854 /* We only use the RQCORR if it improves the */ 00855 /* the iterate reasonably. */ 00856 if (rqcorr * sgndef >= 0. && lambda + rqcorr <= 00857 right && lambda + rqcorr >= left) { 00858 usedrq = TRUE_; 00859 /* Store new midpoint of bisection interval in WORK */ 00860 if (sgndef == 1.) { 00861 /* The current LAMBDA is on the left of the true */ 00862 /* eigenvalue */ 00863 left = lambda; 00864 /* We prefer to assume that the error estimate */ 00865 /* is correct. We could make the interval not */ 00866 /* as a bracket but to be modified if the RQCORR */ 00867 /* chooses to. In this case, the RIGHT side should */ 00868 /* be modified as follows: */ 00869 /* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */ 00870 } else { 00871 /* The current LAMBDA is on the right of the true */ 00872 /* eigenvalue */ 00873 right = lambda; 00874 /* See comment about assuming the error estimate is */ 00875 /* correct above. */ 00876 /* LEFT = MIN(LEFT, LAMBDA + RQCORR) */ 00877 } 00878 work[windex] = (right + left) * .5; 00879 /* Take RQCORR since it has the correct sign and */ 00880 /* improves the iterate reasonably */ 00881 lambda += rqcorr; 00882 /* Update width of error interval */ 00883 werr[windex] = (right - left) * .5; 00884 } else { 00885 needbs = TRUE_; 00886 } 00887 if (right - left < rqtol * absMACRO(lambda)) { 00888 /* The eigenvalue is computed to bisection accuracy */ 00889 /* compute eigenvector and stop */ 00890 usedbs = TRUE_; 00891 goto L120; 00892 } else if (iter < 10) { 00893 goto L120; 00894 } else if (iter == 10) { 00895 needbs = TRUE_; 00896 goto L120; 00897 } else { 00898 *info = 5; 00899 return 0; 00900 } 00901 } else { 00902 stp2ii = FALSE_; 00903 if (usedrq && usedbs && bstres <= resid) { 00904 lambda = bstw; 00905 stp2ii = TRUE_; 00906 } 00907 if (stp2ii) { 00908 /* improve error angle by second step */ 00909 L__1 = ! usedbs; 00910 template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin] 00911 , &l[ibegin], &work[indld + ibegin - 00912 1], &work[indlld + ibegin - 1], 00913 pivmin, &gaptol, &z__[ibegin + windex 00914 * z_dim1], &L__1, &negcnt, &ztz, & 00915 mingma, &iwork[iindr + windex], & 00916 isuppz[(windex << 1) - 1], &nrminv, & 00917 resid, &rqcorr, &work[indwrk]); 00918 } 00919 work[windex] = lambda; 00920 } 00921 00922 /* Compute FP-vector support w.r.t. whole matrix */ 00923 00924 isuppz[(windex << 1) - 1] += oldien; 00925 isuppz[windex * 2] += oldien; 00926 zfrom = isuppz[(windex << 1) - 1]; 00927 zto = isuppz[windex * 2]; 00928 isupmn += oldien; 00929 isupmx += oldien; 00930 /* Ensure vector is ok if support in the RQI has changed */ 00931 if (isupmn < zfrom) { 00932 i__4 = zfrom - 1; 00933 for (ii = isupmn; ii <= i__4; ++ii) { 00934 z__[ii + windex * z_dim1] = 0.; 00935 /* L122: */ 00936 } 00937 } 00938 if (isupmx > zto) { 00939 i__4 = isupmx; 00940 for (ii = zto + 1; ii <= i__4; ++ii) { 00941 z__[ii + windex * z_dim1] = 0.; 00942 /* L123: */ 00943 } 00944 } 00945 i__4 = zto - zfrom + 1; 00946 template_blas_scal(&i__4, &nrminv, &z__[zfrom + windex * z_dim1], 00947 &c__1); 00948 L125: 00949 /* Update W */ 00950 w[windex] = lambda + sigma; 00951 /* Recompute the gaps on the left and right */ 00952 /* But only allow them to become larger and not */ 00953 /* smaller (which can only happen through "bad" */ 00954 /* cancellation and doesn't reflect the theory */ 00955 /* where the initial gaps are underestimated due */ 00956 /* to WERR being too crude.) */ 00957 if (! eskip) { 00958 if (k > 1) { 00959 /* Computing MAX */ 00960 d__1 = wgap[windmn], d__2 = w[windex] - werr[ 00961 windex] - w[windmn] - werr[windmn]; 00962 wgap[windmn] = maxMACRO(d__1,d__2); 00963 } 00964 if (windex < wend) { 00965 /* Computing MAX */ 00966 d__1 = savgap, d__2 = w[windpl] - werr[windpl] 00967 - w[windex] - werr[windex]; 00968 wgap[windex] = maxMACRO(d__1,d__2); 00969 } 00970 } 00971 ++idone; 00972 } 00973 /* here ends the code for the current child */ 00974 00975 L139: 00976 /* Proceed to any remaining child nodes */ 00977 newfst = j + 1; 00978 L140: 00979 ; 00980 } 00981 /* L150: */ 00982 } 00983 ++ndepth; 00984 goto L40; 00985 } 00986 ibegin = iend + 1; 00987 wbegin = wend + 1; 00988 L170: 00989 ; 00990 } 00991 00992 return 0; 00993 00994 /* End of DLARRV */ 00995 00996 } /* dlarrv_ */ 00997 00998 #endif