Actual source code: dshep.c

slepc-3.17.1 2022-04-11
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: PetscErrorCode DSAllocate_HEP(DS ds,PetscInt ld)
 15: {
 16:   DSAllocateMat_Private(ds,DS_MAT_A);
 17:   DSAllocateMat_Private(ds,DS_MAT_Q);
 18:   DSAllocateMatReal_Private(ds,DS_MAT_T);
 19:   PetscFree(ds->perm);
 20:   PetscMalloc1(ld,&ds->perm);
 21:   PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
 22:   PetscFunctionReturn(0);
 23: }

 25: /*   0       l           k                 n-1
 26:     -----------------------------------------
 27:     |*       .           .                  |
 28:     |  *     .           .                  |
 29:     |    *   .           .                  |
 30:     |      * .           .                  |
 31:     |. . . . o           o                  |
 32:     |          o         o                  |
 33:     |            o       o                  |
 34:     |              o     o                  |
 35:     |                o   o                  |
 36:     |                  o o                  |
 37:     |. . . . o o o o o o o x                |
 38:     |                    x x x              |
 39:     |                      x x x            |
 40:     |                        x x x          |
 41:     |                          x x x        |
 42:     |                            x x x      |
 43:     |                              x x x    |
 44:     |                                x x x  |
 45:     |                                  x x x|
 46:     |                                    x x|
 47:     -----------------------------------------
 48: */

 50: static PetscErrorCode DSSwitchFormat_HEP(DS ds)
 51: {
 52:   PetscReal      *T = ds->rmat[DS_MAT_T];
 53:   PetscScalar    *A = ds->mat[DS_MAT_A];
 54:   PetscInt       i,n=ds->n,k=ds->k,ld=ds->ld;

 56:   /* switch from compact (arrow) to dense storage */
 57:   PetscArrayzero(A,ld*ld);
 58:   for (i=0;i<k;i++) {
 59:     A[i+i*ld] = T[i];
 60:     A[k+i*ld] = T[i+ld];
 61:     A[i+k*ld] = T[i+ld];
 62:   }
 63:   A[k+k*ld] = T[k];
 64:   for (i=k+1;i<n;i++) {
 65:     A[i+i*ld]     = T[i];
 66:     A[i-1+i*ld]   = T[i-1+ld];
 67:     A[i+(i-1)*ld] = T[i-1+ld];
 68:   }
 69:   if (ds->extrarow) A[n+(n-1)*ld] = T[n-1+ld];
 70:   PetscFunctionReturn(0);
 71: }

 73: PetscErrorCode DSView_HEP(DS ds,PetscViewer viewer)
 74: {
 75:   PetscViewerFormat format;
 76:   PetscInt          i,j,r,c,rows;
 77:   PetscReal         value;
 78:   const char        *methodname[] = {
 79:                      "Implicit QR method (_steqr)",
 80:                      "Relatively Robust Representations (_stevr)",
 81:                      "Divide and Conquer method (_stedc)",
 82:                      "Block Divide and Conquer method (dsbtdc)"
 83:   };
 84:   const int         nmeth=sizeof(methodname)/sizeof(methodname[0]);

 86:   PetscViewerGetFormat(viewer,&format);
 87:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
 88:     if (ds->bs>1) PetscViewerASCIIPrintf(viewer,"block size: %" PetscInt_FMT "\n",ds->bs);
 89:     if (ds->method<nmeth) PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
 90:     PetscFunctionReturn(0);
 91:   }
 92:   if (ds->compact) {
 93:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
 94:     rows = ds->extrarow? ds->n+1: ds->n;
 95:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
 96:       PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rows,ds->n);
 97:       PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",3*ds->n);
 98:       PetscViewerASCIIPrintf(viewer,"zzz = [\n");
 99:       for (i=0;i<ds->n;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,i+1,(double)*(ds->rmat[DS_MAT_T]+i));
100:       for (i=0;i<rows-1;i++) {
101:         r = PetscMax(i+2,ds->k+1);
102:         c = i+1;
103:         PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",r,c,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
104:         if (i<ds->n-1 && ds->k<ds->n) { /* do not print vertical arrow when k=n */
105:           PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",c,r,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
106:         }
107:       }
108:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
109:     } else {
110:       for (i=0;i<rows;i++) {
111:         for (j=0;j<ds->n;j++) {
112:           if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
113:           else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
114:           else if (i==j+1 && i>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+i-1);
115:           else if (i+1==j && j>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+j-1);
116:           else value = 0.0;
117:           PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
118:         }
119:         PetscViewerASCIIPrintf(viewer,"\n");
120:       }
121:     }
122:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
123:     PetscViewerFlush(viewer);
124:   } else DSViewMat(ds,viewer,DS_MAT_A);
125:   if (ds->state>DS_STATE_INTERMEDIATE) DSViewMat(ds,viewer,DS_MAT_Q);
126:   PetscFunctionReturn(0);
127: }

129: PetscErrorCode DSVectors_HEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
130: {
131:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
132:   PetscInt       ld = ds->ld;

134:   switch (mat) {
135:     case DS_MAT_X:
136:     case DS_MAT_Y:
137:       if (j) {
138:         if (ds->state>=DS_STATE_CONDENSED) PetscArraycpy(ds->mat[mat]+(*j)*ld,Q+(*j)*ld,ld);
139:         else {
140:           PetscArrayzero(ds->mat[mat]+(*j)*ld,ld);
141:           *(ds->mat[mat]+(*j)+(*j)*ld) = 1.0;
142:         }
143:       } else {
144:         if (ds->state>=DS_STATE_CONDENSED) PetscArraycpy(ds->mat[mat],Q,ld*ld);
145:         else DSSetIdentity(ds,mat);
146:       }
147:       if (rnorm && j) *rnorm = PetscAbsScalar(Q[ds->n-1+(*j)*ld]);
148:       break;
149:     case DS_MAT_U:
150:     case DS_MAT_V:
151:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
152:     default:
153:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
154:   }
155:   PetscFunctionReturn(0);
156: }

158: /*
159:   ARROWTRIDIAG reduces a symmetric arrowhead matrix of the form

161:                 [ d 0 0 0 e ]
162:                 [ 0 d 0 0 e ]
163:             A = [ 0 0 d 0 e ]
164:                 [ 0 0 0 d e ]
165:                 [ e e e e d ]

167:   to tridiagonal form

169:                 [ d e 0 0 0 ]
170:                 [ e d e 0 0 ]
171:    T = Q'*A*Q = [ 0 e d e 0 ],
172:                 [ 0 0 e d e ]
173:                 [ 0 0 0 e d ]

175:   where Q is an orthogonal matrix. Rutishauser's algorithm is used to
176:   perform the reduction, which requires O(n**2) flops. The accumulation
177:   of the orthogonal factor Q, however, requires O(n**3) flops.

179:   Arguments
180:   =========

182:   N       (input) INTEGER
183:           The order of the matrix A.  N >= 0.

185:   D       (input/output) DOUBLE PRECISION array, dimension (N)
186:           On entry, the diagonal entries of the matrix A to be
187:           reduced.
188:           On exit, the diagonal entries of the reduced matrix T.

190:   E       (input/output) DOUBLE PRECISION array, dimension (N-1)
191:           On entry, the off-diagonal entries of the matrix A to be
192:           reduced.
193:           On exit, the subdiagonal entries of the reduced matrix T.

195:   Q       (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
196:           On exit, the orthogonal matrix Q.

198:   LDQ     (input) INTEGER
199:           The leading dimension of the array Q.

201:   Note
202:   ====
203:   Based on Fortran code contributed by Daniel Kressner
204: */
205: static PetscErrorCode ArrowTridiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ld)
206: {
207:   PetscBLASInt i,j,j2,one=1;
208:   PetscReal    c,s,p,off,temp;

210:   if (n<=2) PetscFunctionReturn(0);

212:   for (j=0;j<n-2;j++) {

214:     /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
215:     temp = e[j+1];
216:     PetscStackCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&e[j],&c,&s,&e[j+1]));
217:     s = -s;

219:     /* Apply rotation to diagonal elements */
220:     temp   = d[j+1];
221:     e[j]   = c*s*(temp-d[j]);
222:     d[j+1] = s*s*d[j] + c*c*temp;
223:     d[j]   = c*c*d[j] + s*s*temp;

225:     /* Apply rotation to Q */
226:     j2 = j+2;
227:     PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+j*ld,&one,Q+(j+1)*ld,&one,&c,&s));

229:     /* Chase newly introduced off-diagonal entry to the top left corner */
230:     for (i=j-1;i>=0;i--) {
231:       off  = -s*e[i];
232:       e[i] = c*e[i];
233:       temp = e[i+1];
234:       PetscStackCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&off,&c,&s,&e[i+1]));
235:       s = -s;
236:       temp = (d[i]-d[i+1])*s - 2.0*c*e[i];
237:       p = s*temp;
238:       d[i+1] += p;
239:       d[i] -= p;
240:       e[i] = -e[i] - c*temp;
241:       j2 = j+2;
242:       PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+i*ld,&one,Q+(i+1)*ld,&one,&c,&s));
243:     }
244:   }
245:   PetscFunctionReturn(0);
246: }

248: /*
249:    Reduce to tridiagonal form by means of ArrowTridiag.
250: */
251: static PetscErrorCode DSIntermediate_HEP(DS ds)
252: {
253:   PetscInt       i;
254:   PetscBLASInt   n1 = 0,n2,lwork,info,l = 0,n = 0,ld,off;
255:   PetscScalar    *A,*Q,*work,*tau;
256:   PetscReal      *d,*e;

258:   PetscBLASIntCast(ds->n,&n);
259:   PetscBLASIntCast(ds->l,&l);
260:   PetscBLASIntCast(ds->ld,&ld);
261:   PetscBLASIntCast(PetscMax(0,ds->k-l+1),&n1); /* size of leading block, excl. locked */
262:   n2 = n-l;     /* n2 = n1 + size of trailing block */
263:   off = l+l*ld;
264:   A  = ds->mat[DS_MAT_A];
265:   Q  = ds->mat[DS_MAT_Q];
266:   d  = ds->rmat[DS_MAT_T];
267:   e  = ds->rmat[DS_MAT_T]+ld;
268:   PetscArrayzero(Q,ld*ld);
269:   for (i=0;i<n;i++) Q[i+i*ld] = 1.0;

271:   if (ds->compact) {

273:     if (ds->state<DS_STATE_INTERMEDIATE) ArrowTridiag(n1,d+l,e+l,Q+off,ld);

275:   } else {

277:     for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }

279:     if (ds->state<DS_STATE_INTERMEDIATE) {
280:       DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
281:       DSAllocateWork_Private(ds,ld+ld*ld,0,0);
282:       tau  = ds->work;
283:       work = ds->work+ld;
284:       lwork = ld*ld;
285:       PetscStackCallBLAS("LAPACKsytrd",LAPACKsytrd_("L",&n2,Q+off,&ld,d+l,e+l,tau,work,&lwork,&info));
286:       SlepcCheckLapackInfo("sytrd",info);
287:       PetscStackCallBLAS("LAPACKorgtr",LAPACKorgtr_("L",&n2,Q+off,&ld,tau,work,&lwork,&info));
288:       SlepcCheckLapackInfo("orgtr",info);
289:     } else {
290:       /* copy tridiagonal to d,e */
291:       for (i=l;i<n;i++)   d[i] = PetscRealPart(A[i+i*ld]);
292:       for (i=l;i<n-1;i++) e[i] = PetscRealPart(A[(i+1)+i*ld]);
293:     }
294:   }
295:   PetscFunctionReturn(0);
296: }

298: PetscErrorCode DSSort_HEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
299: {
300:   PetscInt       n,l,i,*perm,ld=ds->ld;
301:   PetscScalar    *A;
302:   PetscReal      *d;

304:   if (!ds->sc) PetscFunctionReturn(0);
305:   n = ds->n;
306:   l = ds->l;
307:   A = ds->mat[DS_MAT_A];
308:   d = ds->rmat[DS_MAT_T];
309:   perm = ds->perm;
310:   if (!rr) DSSortEigenvaluesReal_Private(ds,d,perm);
311:   else DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE);
312:   for (i=l;i<n;i++) wr[i] = d[perm[i]];
313:   DSPermuteColumns_Private(ds,l,n,n,DS_MAT_Q,perm);
314:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
315:   if (!ds->compact) {
316:     for (i=l;i<n;i++) A[i+i*ld] = wr[i];
317:   }
318:   PetscFunctionReturn(0);
319: }

321: PetscErrorCode DSUpdateExtraRow_HEP(DS ds)
322: {
323:   PetscInt       i;
324:   PetscBLASInt   n,ld,incx=1;
325:   PetscScalar    *A,*Q,*x,*y,one=1.0,zero=0.0;
326:   PetscReal      *e,beta;

328:   PetscBLASIntCast(ds->n,&n);
329:   PetscBLASIntCast(ds->ld,&ld);
330:   A  = ds->mat[DS_MAT_A];
331:   Q  = ds->mat[DS_MAT_Q];
332:   e  = ds->rmat[DS_MAT_T]+ld;

334:   if (ds->compact) {
335:     beta = e[n-1];   /* in compact, we assume all entries are zero except the last one */
336:     for (i=0;i<n;i++) e[i] = PetscRealPart(beta*Q[n-1+i*ld]);
337:     ds->k = n;
338:   } else {
339:     DSAllocateWork_Private(ds,2*ld,0,0);
340:     x = ds->work;
341:     y = ds->work+ld;
342:     for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
343:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
344:     for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
345:     ds->k = n;
346:   }
347:   PetscFunctionReturn(0);
348: }

350: PetscErrorCode DSSolve_HEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
351: {
352:   PetscInt       i;
353:   PetscBLASInt   n1,info,l = 0,n = 0,ld,off;
354:   PetscScalar    *Q,*A;
355:   PetscReal      *d,*e;

358:   PetscBLASIntCast(ds->n,&n);
359:   PetscBLASIntCast(ds->l,&l);
360:   PetscBLASIntCast(ds->ld,&ld);
361:   n1 = n-l;     /* n1 = size of leading block, excl. locked + size of trailing block */
362:   off = l+l*ld;
363:   Q  = ds->mat[DS_MAT_Q];
364:   A  = ds->mat[DS_MAT_A];
365:   d  = ds->rmat[DS_MAT_T];
366:   e  = ds->rmat[DS_MAT_T]+ld;

368:   /* Reduce to tridiagonal form */
369:   DSIntermediate_HEP(ds);

371:   /* Solve the tridiagonal eigenproblem */
372:   for (i=0;i<l;i++) wr[i] = d[i];

374:   DSAllocateWork_Private(ds,0,2*ld,0);
375:   PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&info));
376:   SlepcCheckLapackInfo("steqr",info);
377:   for (i=l;i<n;i++) wr[i] = d[i];

379:   /* Create diagonal matrix as a result */
380:   if (ds->compact) PetscArrayzero(e,n-1);
381:   else {
382:     for (i=l;i<n;i++) PetscArrayzero(A+l+i*ld,n-l);
383:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
384:   }

386:   /* Set zero wi */
387:   if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
388:   PetscFunctionReturn(0);
389: }

391: PetscErrorCode DSSolve_HEP_MRRR(DS ds,PetscScalar *wr,PetscScalar *wi)
392: {
393:   PetscInt       i;
394:   PetscBLASInt   n1 = 0,n2 = 0,n3,lwork,liwork,info,l = 0,n = 0,m = 0,ld,off,il,iu,*isuppz;
395:   PetscScalar    *A,*Q,*W=NULL,one=1.0,zero=0.0;
396:   PetscReal      *d,*e,abstol=0.0,vl,vu;
397: #if defined(PETSC_USE_COMPLEX)
398:   PetscInt       j;
399:   PetscReal      *ritz;
400: #endif

403:   PetscBLASIntCast(ds->n,&n);
404:   PetscBLASIntCast(ds->l,&l);
405:   PetscBLASIntCast(ds->ld,&ld);
406:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
407:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
408:   n3 = n1+n2;
409:   off = l+l*ld;
410:   A  = ds->mat[DS_MAT_A];
411:   Q  = ds->mat[DS_MAT_Q];
412:   d  = ds->rmat[DS_MAT_T];
413:   e  = ds->rmat[DS_MAT_T]+ld;

415:   /* Reduce to tridiagonal form */
416:   DSIntermediate_HEP(ds);

418:   /* Solve the tridiagonal eigenproblem */
419:   for (i=0;i<l;i++) wr[i] = d[i];

421:   if (ds->state<DS_STATE_INTERMEDIATE) {  /* Q contains useful info */
422:     DSAllocateMat_Private(ds,DS_MAT_W);
423:     DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
424:     W = ds->mat[DS_MAT_W];
425:   }
426: #if defined(PETSC_USE_COMPLEX)
427:   DSAllocateMatReal_Private(ds,DS_MAT_Q);
428: #endif
429:   lwork = 20*ld;
430:   liwork = 10*ld;
431:   DSAllocateWork_Private(ds,0,lwork+ld,liwork+2*ld);
432:   isuppz = ds->iwork+liwork;
433: #if defined(PETSC_USE_COMPLEX)
434:   ritz = ds->rwork+lwork;
435:   PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,ritz+l,ds->rmat[DS_MAT_Q]+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
436:   for (i=l;i<n;i++) wr[i] = ritz[i];
437: #else
438:   PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,wr+l,Q+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
439: #endif
440:   SlepcCheckLapackInfo("stevr",info);
441: #if defined(PETSC_USE_COMPLEX)
442:   for (i=l;i<n;i++)
443:     for (j=l;j<n;j++)
444:       Q[i+j*ld] = (ds->rmat[DS_MAT_Q])[i+j*ld];
445: #endif
446:   if (ds->state<DS_STATE_INTERMEDIATE) {  /* accumulate previous Q */
447:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n3,&n3,&n3,&one,W+off,&ld,Q+off,&ld,&zero,A+off,&ld));
448:     DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
449:   }
450:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);

452:   /* Create diagonal matrix as a result */
453:   if (ds->compact) PetscArrayzero(e,n-1);
454:   else {
455:     for (i=l;i<n;i++) PetscArrayzero(A+l+i*ld,n-l);
456:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
457:   }

459:   /* Set zero wi */
460:   if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
461:   PetscFunctionReturn(0);
462: }

464: PetscErrorCode DSSolve_HEP_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
465: {
466:   PetscInt       i;
467:   PetscBLASInt   n1,info,l = 0,ld,off,lrwork,liwork;
468:   PetscScalar    *Q,*A;
469:   PetscReal      *d,*e;
470: #if defined(PETSC_USE_COMPLEX)
471:   PetscBLASInt   lwork;
472:   PetscInt       j;
473: #endif

476:   PetscBLASIntCast(ds->l,&l);
477:   PetscBLASIntCast(ds->ld,&ld);
478:   PetscBLASIntCast(ds->n-ds->l,&n1);
479:   off = l+l*ld;
480:   Q  = ds->mat[DS_MAT_Q];
481:   A  = ds->mat[DS_MAT_A];
482:   d  = ds->rmat[DS_MAT_T];
483:   e  = ds->rmat[DS_MAT_T]+ld;

485:   /* Reduce to tridiagonal form */
486:   DSIntermediate_HEP(ds);

488:   /* Solve the tridiagonal eigenproblem */
489:   for (i=0;i<l;i++) wr[i] = d[i];

491:   lrwork = 5*n1*n1+3*n1+1;
492:   liwork = 5*n1*n1+6*n1+6;
493: #if !defined(PETSC_USE_COMPLEX)
494:   DSAllocateWork_Private(ds,0,lrwork,liwork);
495:   PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
496: #else
497:   lwork = ld*ld;
498:   DSAllocateWork_Private(ds,lwork,lrwork,liwork);
499:   PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->work,&lwork,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
500:   /* Fixing Lapack bug*/
501:   for (j=ds->l;j<ds->n;j++)
502:     for (i=0;i<ds->l;i++) Q[i+j*ld] = 0.0;
503: #endif
504:   SlepcCheckLapackInfo("stedc",info);
505:   for (i=l;i<ds->n;i++) wr[i] = d[i];

507:   /* Create diagonal matrix as a result */
508:   if (ds->compact) PetscArrayzero(e,ds->n-1);
509:   else {
510:     for (i=l;i<ds->n;i++) PetscArrayzero(A+l+i*ld,ds->n-l);
511:     for (i=l;i<ds->n;i++) A[i+i*ld] = d[i];
512:   }

514:   /* Set zero wi */
515:   if (wi) for (i=l;i<ds->n;i++) wi[i] = 0.0;
516:   PetscFunctionReturn(0);
517: }

519: #if !defined(PETSC_USE_COMPLEX)
520: PetscErrorCode DSSolve_HEP_BDC(DS ds,PetscScalar *wr,PetscScalar *wi)
521: {
522:   PetscBLASInt   i,j,k,m,n = 0,info,nblks,bs = 0,ld = 0,lde,lrwork,liwork,*ksizes,*iwork,mingapi;
523:   PetscScalar    *Q,*A;
524:   PetscReal      *D,*E,*d,*e,tol=PETSC_MACHINE_EPSILON/2,tau1=1e-16,tau2=1e-18,*rwork,mingap;

528:   PetscBLASIntCast(ds->ld,&ld);
529:   PetscBLASIntCast(ds->bs,&bs);
530:   PetscBLASIntCast(ds->n,&n);
531:   nblks = n/bs;
532:   Q  = ds->mat[DS_MAT_Q];
533:   A  = ds->mat[DS_MAT_A];
534:   d  = ds->rmat[DS_MAT_T];
535:   e  = ds->rmat[DS_MAT_T]+ld;
536:   lrwork = 4*n*n+60*n+1;
537:   liwork = 5*n+5*nblks-1;
538:   lde = 2*bs+1;
539:   DSAllocateWork_Private(ds,bs*n+lde*lde*(nblks-1),lrwork,nblks+liwork);
540:   D      = ds->work;
541:   E      = ds->work+bs*n;
542:   rwork  = ds->rwork;
543:   ksizes = ds->iwork;
544:   iwork  = ds->iwork+nblks;
545:   PetscArrayzero(iwork,liwork);

547:   /* Copy matrix to block tridiagonal format */
548:   j=0;
549:   for (i=0;i<nblks;i++) {
550:     ksizes[i]=bs;
551:     for (k=0;k<bs;k++)
552:       for (m=0;m<bs;m++)
553:         D[k+m*bs+i*bs*bs] = PetscRealPart(A[j+k+(j+m)*n]);
554:     j = j + bs;
555:   }
556:   j=0;
557:   for (i=0;i<nblks-1;i++) {
558:     for (k=0;k<bs;k++)
559:       for (m=0;m<bs;m++)
560:         E[k+m*lde+i*lde*lde] = PetscRealPart(A[j+bs+k+(j+m)*n]);
561:     j = j + bs;
562:   }

564:   /* Solve the block tridiagonal eigenproblem */
565:   BDC_dsbtdc_("D","A",n,nblks,ksizes,D,bs,bs,E,lde,lde,tol,tau1,tau2,d,
566:            Q,n,rwork,lrwork,iwork,liwork,&mingap,&mingapi,&info,1,1);
567:   for (i=0;i<ds->n;i++) wr[i] = d[i];

569:   /* Create diagonal matrix as a result */
570:   if (ds->compact) PetscArrayzero(e,ds->n-1);
571:   else {
572:     for (i=0;i<ds->n;i++) PetscArrayzero(A+i*ld,ds->n);
573:     for (i=0;i<ds->n;i++) A[i+i*ld] = wr[i];
574:   }

576:   /* Set zero wi */
577:   if (wi) for (i=0;i<ds->n;i++) wi[i] = 0.0;
578:   PetscFunctionReturn(0);
579: }
580: #endif

582: PetscErrorCode DSTruncate_HEP(DS ds,PetscInt n,PetscBool trim)
583: {
584:   PetscInt    i,ld=ds->ld,l=ds->l;
585:   PetscScalar *A = ds->mat[DS_MAT_A];

587:   if (trim) {
588:     if (!ds->compact && ds->extrarow) {   /* clean extra row */
589:       for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
590:     }
591:     ds->l = 0;
592:     ds->k = 0;
593:     ds->n = n;
594:     ds->t = ds->n;   /* truncated length equal to the new dimension */
595:   } else {
596:     if (!ds->compact && ds->extrarow && ds->k==ds->n) {
597:       /* copy entries of extra row to the new position, then clean last row */
598:       for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
599:       for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
600:     }
601:     ds->k = (ds->extrarow)? n: 0;
602:     ds->t = ds->n;   /* truncated length equal to previous dimension */
603:     ds->n = n;
604:   }
605:   PetscFunctionReturn(0);
606: }

608: PetscErrorCode DSSynchronize_HEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
609: {
610:   PetscInt       ld=ds->ld,l=ds->l,k=0,kr=0;
611:   PetscMPIInt    n,rank,off=0,size,ldn,ld3;

613:   if (ds->compact) kr = 3*ld;
614:   else k = (ds->n-l)*ld;
615:   if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
616:   if (eigr) k += (ds->n-l);
617:   DSAllocateWork_Private(ds,k+kr,0,0);
618:   PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size);
619:   PetscMPIIntCast(ds->n-l,&n);
620:   PetscMPIIntCast(ld*(ds->n-l),&ldn);
621:   PetscMPIIntCast(ld*3,&ld3);
622:   MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
623:   if (!rank) {
624:     if (ds->compact) MPI_Pack(ds->rmat[DS_MAT_T],ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
625:     else MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
626:     if (ds->state>DS_STATE_RAW) MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
627:     if (eigr) MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
628:   }
629:   MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
630:   if (rank) {
631:     if (ds->compact) MPI_Unpack(ds->work,size,&off,ds->rmat[DS_MAT_T],ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds));
632:     else MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
633:     if (ds->state>DS_STATE_RAW) MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
634:     if (eigr) MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
635:   }
636:   PetscFunctionReturn(0);
637: }

639: PetscErrorCode DSCond_HEP(DS ds,PetscReal *cond)
640: {
641:   PetscScalar    *work;
642:   PetscReal      *rwork;
643:   PetscBLASInt   *ipiv;
644:   PetscBLASInt   lwork,info,n,ld;
645:   PetscReal      hn,hin;
646:   PetscScalar    *A;

648:   PetscBLASIntCast(ds->n,&n);
649:   PetscBLASIntCast(ds->ld,&ld);
650:   lwork = 8*ld;
651:   DSAllocateWork_Private(ds,lwork,ld,ld);
652:   work  = ds->work;
653:   rwork = ds->rwork;
654:   ipiv  = ds->iwork;
655:   DSSwitchFormat_HEP(ds);

657:   /* use workspace matrix W to avoid overwriting A */
658:   DSAllocateMat_Private(ds,DS_MAT_W);
659:   A = ds->mat[DS_MAT_W];
660:   PetscArraycpy(A,ds->mat[DS_MAT_A],ds->ld*ds->ld);

662:   /* norm of A */
663:   hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);

665:   /* norm of inv(A) */
666:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
667:   SlepcCheckLapackInfo("getrf",info);
668:   PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
669:   SlepcCheckLapackInfo("getri",info);
670:   hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);

672:   *cond = hn*hin;
673:   PetscFunctionReturn(0);
674: }

676: PetscErrorCode DSTranslateRKS_HEP(DS ds,PetscScalar alpha)
677: {
678:   PetscInt       i,j,k=ds->k;
679:   PetscScalar    *Q,*A,*R,*tau,*work;
680:   PetscBLASInt   ld,n1,n0,lwork,info;

682:   PetscBLASIntCast(ds->ld,&ld);
683:   DSAllocateWork_Private(ds,ld*ld,0,0);
684:   tau = ds->work;
685:   work = ds->work+ld;
686:   PetscBLASIntCast(ld*(ld-1),&lwork);
687:   DSAllocateMat_Private(ds,DS_MAT_W);
688:   A  = ds->mat[DS_MAT_A];
689:   Q  = ds->mat[DS_MAT_Q];
690:   R  = ds->mat[DS_MAT_W];

692:   /* copy I+alpha*A */
693:   PetscArrayzero(Q,ld*ld);
694:   PetscArrayzero(R,ld*ld);
695:   for (i=0;i<k;i++) {
696:     Q[i+i*ld] = 1.0 + alpha*A[i+i*ld];
697:     Q[k+i*ld] = alpha*A[k+i*ld];
698:   }

700:   /* compute qr */
701:   PetscBLASIntCast(k+1,&n1);
702:   PetscBLASIntCast(k,&n0);
703:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&n1,&n0,Q,&ld,tau,work,&lwork,&info));
704:   SlepcCheckLapackInfo("geqrf",info);

706:   /* copy R from Q */
707:   for (j=0;j<k;j++)
708:     for (i=0;i<=j;i++)
709:       R[i+j*ld] = Q[i+j*ld];

711:   /* compute orthogonal matrix in Q */
712:   PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&n1,&n1,&n0,Q,&ld,tau,work,&lwork,&info));
713:   SlepcCheckLapackInfo("orgqr",info);

715:   /* compute the updated matrix of projected problem */
716:   for (j=0;j<k;j++)
717:     for (i=0;i<k+1;i++)
718:       A[j*ld+i] = Q[i*ld+j];
719:   alpha = -1.0/alpha;
720:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n1,&n0,&alpha,R,&ld,A,&ld));
721:   for (i=0;i<k;i++)
722:     A[ld*i+i] -= alpha;
723:   PetscFunctionReturn(0);
724: }

726: PetscErrorCode DSHermitian_HEP(DS ds,DSMatType m,PetscBool *flg)
727: {
728:   if (m==DS_MAT_A && !ds->extrarow) *flg = PETSC_TRUE;
729:   else *flg = PETSC_FALSE;
730:   PetscFunctionReturn(0);
731: }

733: /*MC
734:    DSHEP - Dense Hermitian Eigenvalue Problem.

736:    Level: beginner

738:    Notes:
739:    The problem is expressed as A*X = X*Lambda, where A is real symmetric
740:    (or complex Hermitian). Lambda is a diagonal matrix whose diagonal
741:    elements are the arguments of DSSolve(). After solve, A is overwritten
742:    with Lambda.

744:    In the intermediate state A is reduced to tridiagonal form. In compact
745:    storage format, the symmetric tridiagonal matrix is stored in T.

747:    Used DS matrices:
748: +  DS_MAT_A - problem matrix
749: .  DS_MAT_T - symmetric tridiagonal matrix
750: -  DS_MAT_Q - orthogonal/unitary transformation that reduces to tridiagonal form
751:    (intermediate step) or matrix of orthogonal eigenvectors, which is equal to X

753:    Implemented methods:
754: +  0 - Implicit QR (_steqr)
755: .  1 - Multiple Relatively Robust Representations (_stevr)
756: .  2 - Divide and Conquer (_stedc)
757: -  3 - Block Divide and Conquer (real scalars only)

759: .seealso: DSCreate(), DSSetType(), DSType
760: M*/
761: SLEPC_EXTERN PetscErrorCode DSCreate_HEP(DS ds)
762: {
763:   ds->ops->allocate      = DSAllocate_HEP;
764:   ds->ops->view          = DSView_HEP;
765:   ds->ops->vectors       = DSVectors_HEP;
766:   ds->ops->solve[0]      = DSSolve_HEP_QR;
767:   ds->ops->solve[1]      = DSSolve_HEP_MRRR;
768:   ds->ops->solve[2]      = DSSolve_HEP_DC;
769: #if !defined(PETSC_USE_COMPLEX)
770:   ds->ops->solve[3]      = DSSolve_HEP_BDC;
771: #endif
772:   ds->ops->sort          = DSSort_HEP;
773:   ds->ops->synchronize   = DSSynchronize_HEP;
774:   ds->ops->truncate      = DSTruncate_HEP;
775:   ds->ops->update        = DSUpdateExtraRow_HEP;
776:   ds->ops->cond          = DSCond_HEP;
777:   ds->ops->transrks      = DSTranslateRKS_HEP;
778:   ds->ops->hermitian     = DSHermitian_HEP;
779:   PetscFunctionReturn(0);
780: }