Actual source code: hz.c
slepc-3.17.1 2022-04-11
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: HZ iteration for generalized symmetric-indefinite eigenproblem.
12: Based on Matlab code from David Watkins.
14: References:
16: [1] D.S. Watkins, The Matrix Eigenvalue Problem: GR and Krylov Subspace
17: Methods, SIAM, 2007.
19: [2] M.A. Brebner, J. Grad, "Eigenvalues of Ax = lambda Bx for real
20: symmetric matrices A and B computed by reduction to pseudosymmetric
21: form and the HR process", Linear Alg. Appl. 43:99-118, 1982.
22: */
24: #include <slepc/private/dsimpl.h>
25: #include <slepcblaslapack.h>
27: /*
28: Sets up a 2-by-2 matrix to eliminate y in the vector [x y]'.
29: Transformation is rotator if sygn = 1 and hyperbolic if sygn = -1.
30: */
31: static PetscErrorCode UnifiedRotation(PetscReal x,PetscReal y,PetscReal sygn,PetscReal *rot,PetscReal *rcond,PetscBool *swap)
32: {
33: PetscReal nrm,c,s;
35: *swap = PETSC_FALSE;
36: if (y == 0) {
37: rot[0] = 1.0; rot[1] = 0.0; rot[2] = 0.0; rot[3] = 1.0;
38: *rcond = 1.0;
39: } else {
40: nrm = PetscMax(PetscAbs(x),PetscAbs(y));
41: c = x/nrm; s = y/nrm;
43: if (sygn == 1.0) { /* set up a rotator */
44: nrm = PetscSqrtReal(c*c+s*s);
45: c = c/nrm; s = s/nrm;
46: /* rot = [c s; -s c]; */
47: rot[0] = c; rot[1] = -s; rot[2] = s; rot[3] = c;
48: *rcond = 1.0;
49: } else { /* sygn == -1, set up a hyperbolic transformation */
50: nrm = c*c-s*s;
51: if (nrm > 0) nrm = PetscSqrtReal(nrm);
52: else {
54: nrm = PetscSqrtReal(-nrm);
55: *swap = PETSC_TRUE;
56: }
57: c = c/nrm; s = s/nrm;
58: /* rot = [c -s; -s c]; */
59: rot[0] = c; rot[1] = -s; rot[2] = -s; rot[3] = c;
60: *rcond = PetscAbs(PetscAbs(s)-PetscAbs(c))/(PetscAbs(s)+PetscAbs(c));
61: }
62: }
63: PetscFunctionReturn(0);
64: }
66: static PetscErrorCode HZStep(PetscBLASInt ntop,PetscBLASInt nn,PetscReal tr,PetscReal dt,PetscReal *aa,PetscReal *bb,PetscReal *dd,PetscScalar *uu,PetscInt n,PetscInt ld,PetscBool *flag)
67: {
68: PetscBLASInt one=1;
69: PetscInt k,jj,ii;
70: PetscBLASInt n_;
71: PetscReal bulge10,bulge20,bulge30,bulge31,bulge41,bulge42;
72: PetscReal sygn,rcond=1.0,worstcond,rot[4],buf[2],t;
73: PetscScalar rtmp;
74: PetscBool swap;
76: *flag = PETSC_FALSE;
77: worstcond = 1.0;
78: PetscBLASIntCast(n,&n_);
80: /* Build initial bulge that sets step in motion */
81: bulge10 = dd[ntop+1]*(aa[ntop]*(aa[ntop] - dd[ntop]*tr) + dt*dd[ntop]*dd[ntop]) + dd[ntop]*bb[ntop]*bb[ntop];
82: bulge20 = bb[ntop]*(dd[ntop+1]*aa[ntop] + dd[ntop]*aa[ntop+1] - dd[ntop]*dd[ntop+1]*tr);
83: bulge30 = bb[ntop]*bb[ntop+1]*dd[ntop];
84: bulge31 = 0.0;
85: bulge41 = 0.0;
86: bulge42 = 0.0;
88: /* Chase the bulge */
89: for (jj=ntop;jj<nn-1;jj++) {
91: /* Check for trivial bulge */
92: if (jj>ntop && PetscMax(PetscMax(PetscAbs(bulge10),PetscAbs(bulge20)),PetscAbs(bulge30))<PETSC_MACHINE_EPSILON*(PetscAbs(aa[jj]) + PetscAbs(aa[jj+1]))) {
93: bb[jj-1] = 0.0; /* deflate and move on */
95: } else { /* carry out the step */
97: /* Annihilate tip entry bulge30 */
98: if (bulge30 != 0.0) {
100: /* Make an interchange if necessary to ensure that the
101: first transformation is othogonal, not hyperbolic. */
102: if (dd[jj+1] != dd[jj+2]) { /* make an interchange */
103: if (dd[jj] != dd[jj+1]) { /* interchange 1st and 2nd */
104: buf[0] = bulge20; bulge20 = bulge10; bulge10 = buf[0];
105: buf[0] = aa[jj]; aa[jj] = aa[jj+1]; aa[jj+1] = buf[0];
106: buf[0] = bb[jj+1]; bb[jj+1] = bulge31; bulge31 = buf[0];
107: buf[0] = dd[jj]; dd[jj] = dd[jj+1]; dd[jj+1] = buf[0];
108: for (k=0;k<n;k++) {
109: rtmp = uu[k+jj*ld]; uu[k+jj*ld] = uu[k+(jj+1)*ld]; uu[k+(jj+1)*ld] = rtmp;
110: }
111: } else { /* interchange 1st and 3rd */
112: buf[0] = bulge30; bulge30 = bulge10; bulge10 = buf[0];
113: buf[0] = aa[jj]; aa[jj] = aa[jj+2]; aa[jj+2] = buf[0];
114: buf[0] = bb[jj]; bb[jj] = bb[jj+1]; bb[jj+1] = buf[0];
115: buf[0] = dd[jj]; dd[jj] = dd[jj+2]; dd[jj+2] = buf[0];
116: if (jj + 2 < nn-1) {
117: bulge41 = bb[jj+2];
118: bb[jj+2] = 0;
119: }
120: for (k=0;k<n;k++) {
121: rtmp = uu[k+jj*ld]; uu[k+jj*ld] = uu[k+(jj+2)*ld]; uu[k+(jj+2)*ld] = rtmp;
122: }
123: }
124: }
126: /* Set up transforming matrix rot. */
127: UnifiedRotation(bulge20,bulge30,1,rot,&rcond,&swap);
129: /* Apply transforming matrix rot to T. */
130: bulge20 = rot[0]*bulge20 + rot[2]*bulge30;
131: buf[0] = rot[0]*bb[jj] + rot[2]*bulge31;
132: buf[1] = rot[1]*bb[jj] + rot[3]*bulge31;
133: bb[jj] = buf[0];
134: bulge31 = buf[1];
135: buf[0] = rot[0]*rot[0]*aa[jj+1] + 2.0*rot[0]*rot[2]*bb[jj+1] + rot[2]*rot[2]*aa[jj+2];
136: buf[1] = rot[1]*rot[1]*aa[jj+1] + 2.0*rot[3]*rot[1]*bb[jj+1] + rot[3]*rot[3]*aa[jj+2];
137: bb[jj+1] = rot[1]*rot[0]*aa[jj+1] + rot[3]*rot[2]*aa[jj+2] + (rot[3]*rot[0] + rot[1]*rot[2])*bb[jj+1];
138: aa[jj+1] = buf[0];
139: aa[jj+2] = buf[1];
140: if (jj + 2 < nn-1) {
141: bulge42 = bb[jj+2]*rot[2];
142: bb[jj+2] = bb[jj+2]*rot[3];
143: }
145: /* Accumulate transforming matrix */
146: PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&n_,uu+(jj+1)*ld,&one,uu+(jj+2)*ld,&one,&rot[0],&rot[2]));
147: }
149: /* Annihilate inner entry bulge20 */
150: if (bulge20 != 0.0) {
152: /* Begin by setting up transforming matrix rot */
153: sygn = dd[jj]*dd[jj+1];
154: UnifiedRotation(bulge10,bulge20,sygn,rot,&rcond,&swap);
155: if (rcond<PETSC_MACHINE_EPSILON) {
156: *flag = PETSC_TRUE;
157: PetscFunctionReturn(0);
158: }
159: if (rcond < worstcond) worstcond = rcond;
161: /* Apply transforming matrix rot to T */
162: if (jj > ntop) bb[jj-1] = rot[0]*bulge10 + rot[2]*bulge20;
163: buf[0] = rot[0]*rot[0]*aa[jj] + 2*rot[0]*rot[2]*bb[jj] + rot[2]*rot[2]*aa[jj+1];
164: buf[1] = rot[1]*rot[1]*aa[jj] + 2*rot[3]*rot[1]*bb[jj] + rot[3]*rot[3]*aa[jj+1];
165: bb[jj] = rot[1]*rot[0]*aa[jj] + rot[3]*rot[2]*aa[jj+1] + (rot[3]*rot[0] + rot[1]*rot[2])*bb[jj];
166: aa[jj] = buf[0];
167: aa[jj+1] = buf[1];
168: if (jj + 1 < nn-1) {
169: /* buf = [ bulge31 bb(jj+1) ] * rot' */
170: buf[0] = rot[0]*bulge31 + rot[2]*bb[jj+1];
171: buf[1] = rot[1]*bulge31 + rot[3]*bb[jj+1];
172: bulge31 = buf[0];
173: bb[jj+1] = buf[1];
174: }
175: if (jj + 2 < nn-1) {
176: /* buf = [bulge41 bulge42] * rot' */
177: buf[0] = rot[0]*bulge41 + rot[2]*bulge42;
178: buf[1] = rot[1]*bulge41 + rot[3]*bulge42;
179: bulge41 = buf[0];
180: bulge42 = buf[1];
181: }
183: /* Apply transforming matrix rot to D */
184: if (swap == 1) {
185: buf[0] = dd[jj]; dd[jj] = dd[jj+1]; dd[jj+1] = buf[0];
186: }
188: /* Accumulate transforming matrix, uu(jj:jj+1,:) = rot*uu(jj:jj+1,:) */
189: if (sygn==1) {
190: PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&n_,uu+jj*ld,&one,uu+(jj+1)*ld,&one,&rot[0],&rot[2]));
191: } else {
192: if (PetscAbsReal(rot[0])>PetscAbsReal(rot[1])) { /* Type I */
193: t = rot[1]/rot[0];
194: for (ii=0;ii<n;ii++) {
195: uu[jj*ld+ii] = rot[0]*uu[jj*ld+ii] + rot[1]*uu[(jj+1)*ld+ii];
196: uu[(jj+1)*ld+ii] = t*uu[jj*ld+ii] + uu[(jj+1)*ld+ii]/rot[0];
197: }
198: } else { /* Type II */
199: t = rot[0]/rot[1];
200: for (ii=0;ii<n;ii++) {
201: rtmp = uu[jj*ld+ii];
202: uu[jj*ld+ii] = rot[0]*uu[jj*ld+ii] + rot[1]*uu[(jj+1)*ld+ii];
203: uu[(jj+1)*ld+ii] = t*uu[jj*ld+ii] + rtmp/rot[1];
204: }
205: }
206: }
207: }
208: }
210: /* Adjust bulge for next step */
211: bulge10 = bb[jj];
212: bulge20 = bulge31;
213: bulge30 = bulge41;
214: bulge31 = bulge42;
215: bulge41 = 0.0;
216: bulge42 = 0.0;
217: }
218: PetscFunctionReturn(0);
219: }
221: static PetscErrorCode HZIteration(PetscBLASInt nn,PetscBLASInt cgd,PetscReal *aa,PetscReal *bb,PetscReal *dd,PetscScalar *uu,PetscBLASInt ld)
222: {
223: PetscBLASInt j2,one=1;
224: PetscInt its,nits,nstop,jj,ntop,nbot,ntry;
225: PetscReal htr,det,dis,dif,tn,kt,c,s,tr,dt;
226: PetscBool flag=PETSC_FALSE;
228: its = 0;
229: nbot = nn-1;
230: nits = 0;
231: nstop = 40*(nn - cgd);
233: while (nbot >= cgd && nits < nstop) {
235: /* Check for zeros on the subdiagonal */
236: jj = nbot - 1;
237: while (jj>=cgd && PetscAbs(bb[jj])>PETSC_MACHINE_EPSILON*(PetscAbs(aa[jj])+PetscAbs(aa[jj+1]))) jj = jj-1;
238: if (jj>=cgd) bb[jj]=0;
239: ntop = jj + 1; /* starting point for step */
240: if (ntop == nbot) { /* isolate single eigenvalue */
241: nbot = ntop - 1;
242: its = 0;
243: } else if (ntop+1 == nbot) { /* isolate pair of eigenvalues */
244: htr = 0.5*(aa[ntop]*dd[ntop] + aa[nbot]*dd[nbot]);
245: det = dd[ntop]*dd[nbot]*(aa[ntop]*aa[nbot]-bb[ntop]*bb[ntop]);
246: dis = htr*htr - det;
247: if (dis > 0) { /* distinct real eigenvalues */
248: if (dd[ntop] == dd[nbot]) { /* separate the eigenvalues by a Jacobi rotator */
249: dif = aa[ntop]-aa[nbot];
250: if (2.0*PetscAbs(bb[ntop])<=dif) {
251: tn = 2*bb[ntop]/dif;
252: tn = tn/(1.0 + PetscSqrtReal(1.0+tn*tn));
253: } else {
254: kt = dif/(2.0*bb[ntop]);
255: tn = PetscSign(kt)/(PetscAbsReal(kt)+PetscSqrtReal(1.0+kt*kt));
256: }
257: c = 1.0/PetscSqrtReal(1.0 + tn*tn);
258: s = c*tn;
259: aa[ntop] = aa[ntop] + tn*bb[ntop];
260: aa[nbot] = aa[nbot] - tn*bb[ntop];
261: bb[ntop] = 0;
262: j2 = nn-cgd;
263: PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&j2,uu+ntop*ld+cgd,&one,uu+nbot*ld+cgd,&one,&c,&s));
264: }
265: }
266: nbot = ntop - 1;
267: } else { /* Do an HZ iteration */
268: its = its + 1;
269: nits = nits + 1;
270: tr = aa[nbot-1]*dd[nbot-1] + aa[nbot]*dd[nbot];
271: dt = dd[nbot-1]*dd[nbot]*(aa[nbot-1]*aa[nbot]-bb[nbot-1]*bb[nbot-1]);
272: for (ntry=1;ntry<=6;ntry++) {
273: HZStep(ntop,nbot+1,tr,dt,aa,bb,dd,uu,nn,ld,&flag);
274: if (!flag) break;
276: tr = 0.9*tr; dt = 0.81*dt;
277: }
278: }
279: }
280: PetscFunctionReturn(0);
281: }
283: PetscErrorCode DSSolve_GHIEP_HZ(DS ds,PetscScalar *wr,PetscScalar *wi)
284: {
285: PetscInt i,off;
286: PetscBLASInt n1,ld = 0;
287: PetscScalar *A,*B,*Q;
288: PetscReal *d,*e,*s;
290: #if !defined(PETSC_USE_COMPLEX)
292: #endif
293: PetscBLASIntCast(ds->ld,&ld);
294: n1 = ds->n - ds->l;
295: off = ds->l + ds->l*ld;
296: A = ds->mat[DS_MAT_A];
297: B = ds->mat[DS_MAT_B];
298: Q = ds->mat[DS_MAT_Q];
299: d = ds->rmat[DS_MAT_T];
300: e = ds->rmat[DS_MAT_T] + ld;
301: s = ds->rmat[DS_MAT_D];
302: #if defined(PETSC_USE_DEBUG)
303: /* Check signature */
304: for (i=0;i<ds->n;i++) {
305: PetscReal de = (ds->compact)?s[i]:PetscRealPart(B[i*ld+i]);
307: }
308: #endif
309: /* Quick return */
310: if (n1 == 1) {
311: for (i=0;i<=ds->l;i++) Q[i+i*ld] = 1.0;
312: DSGHIEPComplexEigs(ds,0,ds->l,wr,wi);
313: if (ds->compact) {
314: wr[ds->l] = d[ds->l]/s[ds->l]; wi[ds->l] = 0.0;
315: } else {
316: d[ds->l] = PetscRealPart(A[off]); s[ds->l] = PetscRealPart(B[off]);
317: wr[ds->l] = d[ds->l]/s[ds->l]; wi[ds->l] = 0.0;
318: }
319: PetscFunctionReturn(0);
320: }
321: /* Reduce to pseudotriadiagonal form */
322: DSIntermediate_GHIEP(ds);
323: HZIteration(ds->n,ds->l,d,e,s,Q,ld);
324: if (!ds->compact) DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
325: /* Undo from diagonal the blocks with real eigenvalues*/
326: DSGHIEPRealBlocks(ds);
328: /* Recover eigenvalues from diagonal */
329: DSGHIEPComplexEigs(ds,0,ds->n,wr,wi);
330: #if defined(PETSC_USE_COMPLEX)
331: if (wi) {
332: for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
333: }
334: #endif
335: ds->t = ds->n;
336: PetscFunctionReturn(0);
337: }