00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #ifndef TEMPLATE_LAPACK_GGBAL_HEADER
00038 #define TEMPLATE_LAPACK_GGBAL_HEADER
00039
00040
00041 template<class Treal>
00042 int template_lapack_ggbal(const char *job, const integer *n, Treal *a, const integer *
00043 lda, Treal *b, const integer *ldb, integer *ilo, integer *ihi,
00044 Treal *lscale, Treal *rscale, Treal *work, integer *
00045 info)
00046 {
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145 integer c__1 = 1;
00146 Treal c_b34 = 10.;
00147 Treal c_b70 = .5;
00148
00149
00150 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
00151 Treal d__1, d__2, d__3;
00152
00153 integer lcab;
00154 Treal beta, coef;
00155 integer irab, lrab;
00156 Treal basl, cmax;
00157 Treal coef2, coef5;
00158 integer i__, j, k, l, m;
00159 Treal gamma, t, alpha;
00160 Treal sfmin, sfmax;
00161 integer iflow;
00162 integer kount, jc;
00163 Treal ta, tb, tc;
00164 integer ir, it;
00165 Treal ew;
00166 integer nr;
00167 Treal pgamma;
00168 integer lsfmin, lsfmax, ip1, jp1, lm1;
00169 Treal cab, rab, ewc, cor, sum;
00170 integer nrp2, icab;
00171 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00172 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
00173
00174
00175 a_dim1 = *lda;
00176 a_offset = 1 + a_dim1 * 1;
00177 a -= a_offset;
00178 b_dim1 = *ldb;
00179 b_offset = 1 + b_dim1 * 1;
00180 b -= b_offset;
00181 --lscale;
00182 --rscale;
00183 --work;
00184
00185
00186 pgamma = 0;
00187
00188 *info = 0;
00189 if (! template_blas_lsame(job, "N") && ! template_blas_lsame(job, "P") && ! template_blas_lsame(job, "S")
00190 && ! template_blas_lsame(job, "B")) {
00191 *info = -1;
00192 } else if (*n < 0) {
00193 *info = -2;
00194 } else if (*lda < maxMACRO(1,*n)) {
00195 *info = -4;
00196 } else if (*ldb < maxMACRO(1,*n)) {
00197 *info = -5;
00198 }
00199 if (*info != 0) {
00200 i__1 = -(*info);
00201 template_blas_erbla("GGBAL ", &i__1);
00202 return 0;
00203 }
00204
00205 k = 1;
00206 l = *n;
00207
00208
00209
00210 if (*n == 0) {
00211 return 0;
00212 }
00213
00214 if (template_blas_lsame(job, "N")) {
00215 *ilo = 1;
00216 *ihi = *n;
00217 i__1 = *n;
00218 for (i__ = 1; i__ <= i__1; ++i__) {
00219 lscale[i__] = 1.;
00220 rscale[i__] = 1.;
00221
00222 }
00223 return 0;
00224 }
00225
00226 if (k == l) {
00227 *ilo = 1;
00228 *ihi = 1;
00229 lscale[1] = 1.;
00230 rscale[1] = 1.;
00231 return 0;
00232 }
00233
00234 if (template_blas_lsame(job, "S")) {
00235 goto L190;
00236 }
00237
00238 goto L30;
00239
00240
00241
00242
00243
00244 L20:
00245 l = lm1;
00246 if (l != 1) {
00247 goto L30;
00248 }
00249
00250 rscale[1] = 1.;
00251 lscale[1] = 1.;
00252 goto L190;
00253
00254 L30:
00255 lm1 = l - 1;
00256 for (i__ = l; i__ >= 1; --i__) {
00257 i__1 = lm1;
00258 for (j = 1; j <= i__1; ++j) {
00259 jp1 = j + 1;
00260 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
00261 goto L50;
00262 }
00263
00264 }
00265 j = l;
00266 goto L70;
00267
00268 L50:
00269 i__1 = l;
00270 for (j = jp1; j <= i__1; ++j) {
00271 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
00272 goto L80;
00273 }
00274
00275 }
00276 j = jp1 - 1;
00277
00278 L70:
00279 m = l;
00280 iflow = 1;
00281 goto L160;
00282 L80:
00283 ;
00284 }
00285 goto L100;
00286
00287
00288
00289 L90:
00290 ++k;
00291
00292 L100:
00293 i__1 = l;
00294 for (j = k; j <= i__1; ++j) {
00295 i__2 = lm1;
00296 for (i__ = k; i__ <= i__2; ++i__) {
00297 ip1 = i__ + 1;
00298 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
00299 goto L120;
00300 }
00301
00302 }
00303 i__ = l;
00304 goto L140;
00305 L120:
00306 i__2 = l;
00307 for (i__ = ip1; i__ <= i__2; ++i__) {
00308 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
00309 goto L150;
00310 }
00311
00312 }
00313 i__ = ip1 - 1;
00314 L140:
00315 m = k;
00316 iflow = 2;
00317 goto L160;
00318 L150:
00319 ;
00320 }
00321 goto L190;
00322
00323
00324
00325 L160:
00326 lscale[m] = (Treal) i__;
00327 if (i__ == m) {
00328 goto L170;
00329 }
00330 i__1 = *n - k + 1;
00331 template_blas_swap(&i__1, &a_ref(i__, k), lda, &a_ref(m, k), lda);
00332 i__1 = *n - k + 1;
00333 template_blas_swap(&i__1, &b_ref(i__, k), ldb, &b_ref(m, k), ldb);
00334
00335
00336
00337 L170:
00338 rscale[m] = (Treal) j;
00339 if (j == m) {
00340 goto L180;
00341 }
00342 template_blas_swap(&l, &a_ref(1, j), &c__1, &a_ref(1, m), &c__1);
00343 template_blas_swap(&l, &b_ref(1, j), &c__1, &b_ref(1, m), &c__1);
00344
00345 L180:
00346 switch (iflow) {
00347 case 1: goto L20;
00348 case 2: goto L90;
00349 }
00350
00351 L190:
00352 *ilo = k;
00353 *ihi = l;
00354
00355 if (*ilo == *ihi) {
00356 return 0;
00357 }
00358
00359 if (template_blas_lsame(job, "P")) {
00360 return 0;
00361 }
00362
00363
00364
00365 nr = *ihi - *ilo + 1;
00366 i__1 = *ihi;
00367 for (i__ = *ilo; i__ <= i__1; ++i__) {
00368 rscale[i__] = 0.;
00369 lscale[i__] = 0.;
00370
00371 work[i__] = 0.;
00372 work[i__ + *n] = 0.;
00373 work[i__ + (*n << 1)] = 0.;
00374 work[i__ + *n * 3] = 0.;
00375 work[i__ + (*n << 2)] = 0.;
00376 work[i__ + *n * 5] = 0.;
00377
00378 }
00379
00380
00381
00382 basl = template_blas_lg10(&c_b34);
00383 i__1 = *ihi;
00384 for (i__ = *ilo; i__ <= i__1; ++i__) {
00385 i__2 = *ihi;
00386 for (j = *ilo; j <= i__2; ++j) {
00387 tb = b_ref(i__, j);
00388 ta = a_ref(i__, j);
00389 if (ta == 0.) {
00390 goto L210;
00391 }
00392 d__1 = absMACRO(ta);
00393 ta = template_blas_lg10(&d__1) / basl;
00394 L210:
00395 if (tb == 0.) {
00396 goto L220;
00397 }
00398 d__1 = absMACRO(tb);
00399 tb = template_blas_lg10(&d__1) / basl;
00400 L220:
00401 work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb;
00402 work[j + *n * 5] = work[j + *n * 5] - ta - tb;
00403
00404 }
00405
00406 }
00407
00408 coef = 1. / (Treal) (nr << 1);
00409 coef2 = coef * coef;
00410 coef5 = coef2 * .5;
00411 nrp2 = nr + 2;
00412 beta = 0.;
00413 it = 1;
00414
00415
00416
00417 L250:
00418
00419 gamma = template_blas_dot(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)]
00420 , &c__1) + template_blas_dot(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + *
00421 n * 5], &c__1);
00422
00423 ew = 0.;
00424 ewc = 0.;
00425 i__1 = *ihi;
00426 for (i__ = *ilo; i__ <= i__1; ++i__) {
00427 ew += work[i__ + (*n << 2)];
00428 ewc += work[i__ + *n * 5];
00429
00430 }
00431
00432
00433 d__1 = ew;
00434
00435 d__2 = ewc;
00436
00437 d__3 = ew - ewc;
00438 gamma = coef * gamma - coef2 * (d__1 * d__1 + d__2 * d__2) - coef5 * (
00439 d__3 * d__3);
00440 if (gamma == 0.) {
00441 goto L350;
00442 }
00443 if (it != 1) {
00444 beta = gamma / pgamma;
00445 }
00446 t = coef5 * (ewc - ew * 3.);
00447 tc = coef5 * (ew - ewc * 3.);
00448
00449 template_blas_scal(&nr, &beta, &work[*ilo], &c__1);
00450 template_blas_scal(&nr, &beta, &work[*ilo + *n], &c__1);
00451
00452 template_blas_axpy(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], &
00453 c__1);
00454 template_blas_axpy(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1);
00455
00456 i__1 = *ihi;
00457 for (i__ = *ilo; i__ <= i__1; ++i__) {
00458 work[i__] += tc;
00459 work[i__ + *n] += t;
00460
00461 }
00462
00463
00464
00465 i__1 = *ihi;
00466 for (i__ = *ilo; i__ <= i__1; ++i__) {
00467 kount = 0;
00468 sum = 0.;
00469 i__2 = *ihi;
00470 for (j = *ilo; j <= i__2; ++j) {
00471 if (a_ref(i__, j) == 0.) {
00472 goto L280;
00473 }
00474 ++kount;
00475 sum += work[j];
00476 L280:
00477 if (b_ref(i__, j) == 0.) {
00478 goto L290;
00479 }
00480 ++kount;
00481 sum += work[j];
00482 L290:
00483 ;
00484 }
00485 work[i__ + (*n << 1)] = (Treal) kount * work[i__ + *n] + sum;
00486
00487 }
00488
00489 i__1 = *ihi;
00490 for (j = *ilo; j <= i__1; ++j) {
00491 kount = 0;
00492 sum = 0.;
00493 i__2 = *ihi;
00494 for (i__ = *ilo; i__ <= i__2; ++i__) {
00495 if (a_ref(i__, j) == 0.) {
00496 goto L310;
00497 }
00498 ++kount;
00499 sum += work[i__ + *n];
00500 L310:
00501 if (b_ref(i__, j) == 0.) {
00502 goto L320;
00503 }
00504 ++kount;
00505 sum += work[i__ + *n];
00506 L320:
00507 ;
00508 }
00509 work[j + *n * 3] = (Treal) kount * work[j] + sum;
00510
00511 }
00512
00513 sum = template_blas_dot(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1)
00514 + template_blas_dot(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1);
00515 alpha = gamma / sum;
00516
00517
00518
00519 cmax = 0.;
00520 i__1 = *ihi;
00521 for (i__ = *ilo; i__ <= i__1; ++i__) {
00522 cor = alpha * work[i__ + *n];
00523 if (absMACRO(cor) > cmax) {
00524 cmax = absMACRO(cor);
00525 }
00526 lscale[i__] += cor;
00527 cor = alpha * work[i__];
00528 if (absMACRO(cor) > cmax) {
00529 cmax = absMACRO(cor);
00530 }
00531 rscale[i__] += cor;
00532
00533 }
00534 if (cmax < .5) {
00535 goto L350;
00536 }
00537
00538 d__1 = -alpha;
00539 template_blas_axpy(&nr, &d__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)]
00540 , &c__1);
00541 d__1 = -alpha;
00542 template_blas_axpy(&nr, &d__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], &
00543 c__1);
00544
00545 pgamma = gamma;
00546 ++it;
00547 if (it <= nrp2) {
00548 goto L250;
00549 }
00550
00551
00552
00553 L350:
00554 sfmin = template_lapack_lamch("S", (Treal)0);
00555 sfmax = 1. / sfmin;
00556 lsfmin = (integer) (template_blas_lg10(&sfmin) / basl + 1.);
00557 lsfmax = (integer) (template_blas_lg10(&sfmax) / basl);
00558 i__1 = *ihi;
00559 for (i__ = *ilo; i__ <= i__1; ++i__) {
00560 i__2 = *n - *ilo + 1;
00561 irab = template_blas_idamax(&i__2, &a_ref(i__, *ilo), lda);
00562 rab = (d__1 = a_ref(i__, irab + *ilo - 1), absMACRO(d__1));
00563 i__2 = *n - *ilo + 1;
00564 irab = template_blas_idamax(&i__2, &b_ref(i__, *ilo), lda);
00565
00566 d__2 = rab, d__3 = (d__1 = b_ref(i__, irab + *ilo - 1), absMACRO(d__1));
00567 rab = maxMACRO(d__2,d__3);
00568 d__1 = rab + sfmin;
00569 lrab = (integer) (template_blas_lg10(&d__1) / basl + 1.);
00570 ir = (integer) (lscale[i__] + template_lapack_d_sign(&c_b70, &lscale[i__]));
00571
00572 i__2 = maxMACRO(ir,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lrab;
00573 ir = minMACRO(i__2,i__3);
00574 lscale[i__] = template_lapack_pow_di(&c_b34, &ir);
00575 icab = template_blas_idamax(ihi, &a_ref(1, i__), &c__1);
00576 cab = (d__1 = a_ref(icab, i__), absMACRO(d__1));
00577 icab = template_blas_idamax(ihi, &b_ref(1, i__), &c__1);
00578
00579 d__2 = cab, d__3 = (d__1 = b_ref(icab, i__), absMACRO(d__1));
00580 cab = maxMACRO(d__2,d__3);
00581 d__1 = cab + sfmin;
00582 lcab = (integer) (template_blas_lg10(&d__1) / basl + 1.);
00583 jc = (integer) (rscale[i__] + template_lapack_d_sign(&c_b70, &rscale[i__]));
00584
00585 i__2 = maxMACRO(jc,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lcab;
00586 jc = minMACRO(i__2,i__3);
00587 rscale[i__] = template_lapack_pow_di(&c_b34, &jc);
00588
00589 }
00590
00591
00592
00593 i__1 = *ihi;
00594 for (i__ = *ilo; i__ <= i__1; ++i__) {
00595 i__2 = *n - *ilo + 1;
00596 template_blas_scal(&i__2, &lscale[i__], &a_ref(i__, *ilo), lda);
00597 i__2 = *n - *ilo + 1;
00598 template_blas_scal(&i__2, &lscale[i__], &b_ref(i__, *ilo), ldb);
00599
00600 }
00601
00602
00603
00604 i__1 = *ihi;
00605 for (j = *ilo; j <= i__1; ++j) {
00606 template_blas_scal(ihi, &rscale[j], &a_ref(1, j), &c__1);
00607 template_blas_scal(ihi, &rscale[j], &b_ref(1, j), &c__1);
00608
00609 }
00610
00611 return 0;
00612
00613
00614
00615 }
00616
00617 #undef b_ref
00618 #undef a_ref
00619
00620
00621 #endif