00001 /* Ergo, version 3.7, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, 00004 * and Anastasia Kruchinina. 00005 * 00006 * This program is free software: you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation, either version 3 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00018 * 00019 * Primary academic reference: 00020 * Ergo: An open-source program for linear-scaling electronic structure 00021 * calculations, 00022 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia 00023 * Kruchinina, 00024 * SoftwareX 7, 107 (2018), 00025 * <http://dx.doi.org/10.1016/j.softx.2018.03.005> 00026 * 00027 * For further information about Ergo, see <http://www.ergoscf.org>. 00028 */ 00029 00030 /* This file belongs to the template_lapack part of the Ergo source 00031 * code. The source files in the template_lapack directory are modified 00032 * versions of files originally distributed as CLAPACK, see the 00033 * Copyright/license notice in the file template_lapack/COPYING. 00034 */ 00035 00036 00037 #ifndef TEMPLATE_LAPACK_LARRJ_HEADER 00038 #define TEMPLATE_LAPACK_LARRJ_HEADER 00039 00040 template<class Treal> 00041 int template_lapack_larrj(integer *n, Treal *d__, Treal *e2, 00042 integer *ifirst, integer *ilast, Treal *rtol, integer *offset, 00043 Treal *w, Treal *werr, Treal *work, integer *iwork, 00044 Treal *pivmin, Treal *spdiam, integer *info) 00045 { 00046 /* System generated locals */ 00047 integer i__1, i__2; 00048 Treal d__1, d__2; 00049 00050 00051 /* Local variables */ 00052 integer i__, j, k, p; 00053 Treal s; 00054 integer i1, i2, ii; 00055 Treal fac, mid; 00056 integer cnt; 00057 Treal tmp, left; 00058 integer iter, nint, prev, next, savi1; 00059 Treal right, width, dplus; 00060 integer olnint, maxitr; 00061 00062 00063 /* -- LAPACK auxiliary routine (version 3.2) -- */ 00064 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ 00065 /* November 2006 */ 00066 00067 /* .. Scalar Arguments .. */ 00068 /* .. */ 00069 /* .. Array Arguments .. */ 00070 /* .. */ 00071 00072 /* Purpose */ 00073 /* ======= */ 00074 00075 /* Given the initial eigenvalue approximations of T, DLARRJ */ 00076 /* does bisection to refine the eigenvalues of T, */ 00077 /* W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */ 00078 /* guesses for these eigenvalues are input in W, the corresponding estimate */ 00079 /* of the error in these guesses in WERR. During bisection, intervals */ 00080 /* [left, right] are maintained by storing their mid-points and */ 00081 /* semi-widths in the arrays W and WERR respectively. */ 00082 00083 /* Arguments */ 00084 /* ========= */ 00085 00086 /* N (input) INTEGER */ 00087 /* The order of the matrix. */ 00088 00089 /* D (input) DOUBLE PRECISION array, dimension (N) */ 00090 /* The N diagonal elements of T. */ 00091 00092 /* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ 00093 /* The Squares of the (N-1) subdiagonal elements of T. */ 00094 00095 /* IFIRST (input) INTEGER */ 00096 /* The index of the first eigenvalue to be computed. */ 00097 00098 /* ILAST (input) INTEGER */ 00099 /* The index of the last eigenvalue to be computed. */ 00100 00101 /* RTOL (input) DOUBLE PRECISION */ 00102 /* Tolerance for the convergence of the bisection intervals. */ 00103 /* An interval [LEFT,RIGHT] has converged if */ 00104 /* RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|). */ 00105 00106 /* OFFSET (input) INTEGER */ 00107 /* Offset for the arrays W and WERR, i.e., the IFIRST-OFFSET */ 00108 /* through ILAST-OFFSET elements of these arrays are to be used. */ 00109 00110 /* W (input/output) DOUBLE PRECISION array, dimension (N) */ 00111 /* On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */ 00112 /* estimates of the eigenvalues of L D L^T indexed IFIRST through */ 00113 /* ILAST. */ 00114 /* On output, these estimates are refined. */ 00115 00116 /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ 00117 /* On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */ 00118 /* the errors in the estimates of the corresponding elements in W. */ 00119 /* On output, these errors are refined. */ 00120 00121 /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ 00122 /* Workspace. */ 00123 00124 /* IWORK (workspace) INTEGER array, dimension (2*N) */ 00125 /* Workspace. */ 00126 00127 /* PIVMIN (input) DOUBLE PRECISION */ 00128 /* The minimum pivot in the Sturm sequence for T. */ 00129 00130 /* SPDIAM (input) DOUBLE PRECISION */ 00131 /* The spectral diameter of T. */ 00132 00133 /* INFO (output) INTEGER */ 00134 /* Error flag. */ 00135 00136 /* Further Details */ 00137 /* =============== */ 00138 00139 /* Based on contributions by */ 00140 /* Beresford Parlett, University of California, Berkeley, USA */ 00141 /* Jim Demmel, University of California, Berkeley, USA */ 00142 /* Inderjit Dhillon, University of Texas, Austin, USA */ 00143 /* Osni Marques, LBNL/NERSC, USA */ 00144 /* Christof Voemel, University of California, Berkeley, USA */ 00145 00146 /* ===================================================================== */ 00147 00148 /* .. Parameters .. */ 00149 /* .. */ 00150 /* .. Local Scalars .. */ 00151 00152 /* .. */ 00153 /* .. Intrinsic Functions .. */ 00154 /* .. */ 00155 /* .. Executable Statements .. */ 00156 00157 /* Parameter adjustments */ 00158 --iwork; 00159 --work; 00160 --werr; 00161 --w; 00162 --e2; 00163 --d__; 00164 00165 /* Function Body */ 00166 *info = 0; 00167 00168 maxitr = (integer) ((template_blas_log(*spdiam + *pivmin) - template_blas_log(*pivmin)) / template_blas_log(2.)) + 00169 2; 00170 00171 /* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */ 00172 /* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */ 00173 /* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) */ 00174 /* for an unconverged interval is set to the index of the next unconverged */ 00175 /* interval, and is -1 or 0 for a converged interval. Thus a linked */ 00176 /* list of unconverged intervals is set up. */ 00177 00178 i1 = *ifirst; 00179 i2 = *ilast; 00180 /* The number of unconverged intervals */ 00181 nint = 0; 00182 /* The last unconverged interval found */ 00183 prev = 0; 00184 i__1 = i2; 00185 for (i__ = i1; i__ <= i__1; ++i__) { 00186 k = i__ << 1; 00187 ii = i__ - *offset; 00188 left = w[ii] - werr[ii]; 00189 mid = w[ii]; 00190 right = w[ii] + werr[ii]; 00191 width = right - mid; 00192 /* Computing MAX */ 00193 d__1 = absMACRO(left), d__2 = absMACRO(right); 00194 tmp = maxMACRO(d__1,d__2); 00195 /* The following test prevents the test of converged intervals */ 00196 if (width < *rtol * tmp) { 00197 /* This interval has already converged and does not need refinement. */ 00198 /* (Note that the gaps might change through refining the */ 00199 /* eigenvalues, however, they can only get bigger.) */ 00200 /* Remove it from the list. */ 00201 iwork[k - 1] = -1; 00202 /* Make sure that I1 always points to the first unconverged interval */ 00203 if (i__ == i1 && i__ < i2) { 00204 i1 = i__ + 1; 00205 } 00206 if (prev >= i1 && i__ <= i2) { 00207 iwork[(prev << 1) - 1] = i__ + 1; 00208 } 00209 } else { 00210 /* unconverged interval found */ 00211 prev = i__; 00212 /* Make sure that [LEFT,RIGHT] contains the desired eigenvalue */ 00213 00214 /* Do while( CNT(LEFT).GT.I-1 ) */ 00215 00216 fac = 1.; 00217 L20: 00218 cnt = 0; 00219 s = left; 00220 dplus = d__[1] - s; 00221 if (dplus < 0.) { 00222 ++cnt; 00223 } 00224 i__2 = *n; 00225 for (j = 2; j <= i__2; ++j) { 00226 dplus = d__[j] - s - e2[j - 1] / dplus; 00227 if (dplus < 0.) { 00228 ++cnt; 00229 } 00230 /* L30: */ 00231 } 00232 if (cnt > i__ - 1) { 00233 left -= werr[ii] * fac; 00234 fac *= 2.; 00235 goto L20; 00236 } 00237 00238 /* Do while( CNT(RIGHT).LT.I ) */ 00239 00240 fac = 1.; 00241 L50: 00242 cnt = 0; 00243 s = right; 00244 dplus = d__[1] - s; 00245 if (dplus < 0.) { 00246 ++cnt; 00247 } 00248 i__2 = *n; 00249 for (j = 2; j <= i__2; ++j) { 00250 dplus = d__[j] - s - e2[j - 1] / dplus; 00251 if (dplus < 0.) { 00252 ++cnt; 00253 } 00254 /* L60: */ 00255 } 00256 if (cnt < i__) { 00257 right += werr[ii] * fac; 00258 fac *= 2.; 00259 goto L50; 00260 } 00261 ++nint; 00262 iwork[k - 1] = i__ + 1; 00263 iwork[k] = cnt; 00264 } 00265 work[k - 1] = left; 00266 work[k] = right; 00267 /* L75: */ 00268 } 00269 savi1 = i1; 00270 00271 /* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */ 00272 /* and while (ITER.LT.MAXITR) */ 00273 00274 iter = 0; 00275 L80: 00276 prev = i1 - 1; 00277 i__ = i1; 00278 olnint = nint; 00279 i__1 = olnint; 00280 for (p = 1; p <= i__1; ++p) { 00281 k = i__ << 1; 00282 ii = i__ - *offset; 00283 next = iwork[k - 1]; 00284 left = work[k - 1]; 00285 right = work[k]; 00286 mid = (left + right) * .5; 00287 /* semiwidth of interval */ 00288 width = right - mid; 00289 /* Computing MAX */ 00290 d__1 = absMACRO(left), d__2 = absMACRO(right); 00291 tmp = maxMACRO(d__1,d__2); 00292 if (width < *rtol * tmp || iter == maxitr) { 00293 /* reduce number of unconverged intervals */ 00294 --nint; 00295 /* Mark interval as converged. */ 00296 iwork[k - 1] = 0; 00297 if (i1 == i__) { 00298 i1 = next; 00299 } else { 00300 /* Prev holds the last unconverged interval previously examined */ 00301 if (prev >= i1) { 00302 iwork[(prev << 1) - 1] = next; 00303 } 00304 } 00305 i__ = next; 00306 goto L100; 00307 } 00308 prev = i__; 00309 00310 /* Perform one bisection step */ 00311 00312 cnt = 0; 00313 s = mid; 00314 dplus = d__[1] - s; 00315 if (dplus < 0.) { 00316 ++cnt; 00317 } 00318 i__2 = *n; 00319 for (j = 2; j <= i__2; ++j) { 00320 dplus = d__[j] - s - e2[j - 1] / dplus; 00321 if (dplus < 0.) { 00322 ++cnt; 00323 } 00324 /* L90: */ 00325 } 00326 if (cnt <= i__ - 1) { 00327 work[k - 1] = mid; 00328 } else { 00329 work[k] = mid; 00330 } 00331 i__ = next; 00332 L100: 00333 ; 00334 } 00335 ++iter; 00336 /* do another loop if there are still unconverged intervals */ 00337 /* However, in the last iteration, all intervals are accepted */ 00338 /* since this is the best we can do. */ 00339 if (nint > 0 && iter <= maxitr) { 00340 goto L80; 00341 } 00342 00343 00344 /* At this point, all the intervals have converged */ 00345 i__1 = *ilast; 00346 for (i__ = savi1; i__ <= i__1; ++i__) { 00347 k = i__ << 1; 00348 ii = i__ - *offset; 00349 /* All intervals marked by '0' have been refined. */ 00350 if (iwork[k - 1] == 0) { 00351 w[ii] = (work[k - 1] + work[k]) * .5; 00352 werr[ii] = work[k] - w[ii]; 00353 } 00354 /* L110: */ 00355 } 00356 00357 return 0; 00358 00359 /* End of DLARRJ */ 00360 00361 } /* dlarrj_ */ 00362 00363 #endif