Actual source code: dssvd.c

  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: typedef struct {
 15:   PetscInt m;              /* number of columns */
 16:   PetscInt t;              /* number of rows of V after truncating */
 17: } DS_SVD;

 19: static PetscErrorCode DSAllocate_SVD(DS ds,PetscInt ld)
 20: {
 21:   PetscFunctionBegin;
 22:   if (!ds->compact) PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
 23:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_U));
 24:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_V));
 25:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_T));
 26:   PetscCall(PetscFree(ds->perm));
 27:   PetscCall(PetscMalloc1(ld,&ds->perm));
 28:   PetscFunctionReturn(PETSC_SUCCESS);
 29: }

 31: /*   0       l           k                 m-1
 32:     -----------------------------------------
 33:     |*       .           .                  |
 34:     |  *     .           .                  |
 35:     |    *   .           .                  |
 36:     |      * .           .                  |
 37:     |        o           o                  |
 38:     |          o         o                  |
 39:     |            o       o                  |
 40:     |              o     o                  |
 41:     |                o   o                  |
 42:     |                  o o                  |
 43:     |                    o x                |
 44:     |                      x x              |
 45:     |                        x x            |
 46:     |                          x x          |
 47:     |                            x x        |
 48:     |                              x x      |
 49:     |                                x x    |
 50:     |                                  x x  |
 51:     |                                    x x|
 52: n-1 |                                      x|
 53:     -----------------------------------------
 54: */

 56: static PetscErrorCode DSSwitchFormat_SVD(DS ds)
 57: {
 58:   DS_SVD         *ctx = (DS_SVD*)ds->data;
 59:   PetscReal      *T;
 60:   PetscScalar    *A;
 61:   PetscInt       i,m=ctx->m,k=ds->k,ld=ds->ld;

 63:   PetscFunctionBegin;
 64:   PetscCheck(m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
 65:   PetscCheck(ds->compact,PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Must have compact storage");
 66:   /* switch from compact (arrow) to dense storage */
 67:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
 68:   PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_A],&A));
 69:   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
 70:   PetscCall(PetscArrayzero(A,ld*ld));
 71:   for (i=0;i<k;i++) {
 72:     A[i+i*ld] = T[i];
 73:     A[i+k*ld] = T[i+ld];
 74:   }
 75:   A[k+k*ld] = T[k];
 76:   for (i=k+1;i<m;i++) {
 77:     A[i+i*ld]   = T[i];
 78:     A[i-1+i*ld] = T[i-1+ld];
 79:   }
 80:   PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_A],&A));
 81:   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
 82:   PetscFunctionReturn(PETSC_SUCCESS);
 83: }

 85: static PetscErrorCode DSView_SVD(DS ds,PetscViewer viewer)
 86: {
 87:   DS_SVD            *ctx = (DS_SVD*)ds->data;
 88:   PetscViewerFormat format;
 89:   PetscInt          i,j,r,c,m=ctx->m,rows,cols;
 90:   PetscReal         *T,value;
 91:   const char        *methodname[] = {
 92:                      "Implicit zero-shift QR for bidiagonals (_bdsqr)",
 93:                      "Divide and Conquer (_bdsdc or _gesdd)"
 94:   };
 95:   const int         nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);

 97:   PetscFunctionBegin;
 98:   PetscCall(PetscViewerGetFormat(viewer,&format));
 99:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
100:     PetscCall(PetscViewerASCIIPrintf(viewer,"number of columns: %" PetscInt_FMT "\n",m));
101:     if (ds->method<nmeth) PetscCall(PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]));
102:     PetscFunctionReturn(PETSC_SUCCESS);
103:   }
104:   PetscCheck(m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
105:   if (ds->compact) {
106:     PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
107:     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
108:     rows = ds->n;
109:     cols = ds->extrarow? m+1: m;
110:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
111:       PetscCall(PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rows,cols));
112:       PetscCall(PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",2*ds->n));
113:       PetscCall(PetscViewerASCIIPrintf(viewer,"zzz = [\n"));
114:       for (i=0;i<PetscMin(ds->n,m);i++) PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,i+1,(double)T[i]));
115:       for (i=0;i<cols-1;i++) {
116:         r = PetscMax(i+2,ds->k+1);
117:         c = i+1;
118:         PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",c,r,(double)T[i+ds->ld]));
119:       }
120:       PetscCall(PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]));
121:     } else {
122:       for (i=0;i<rows;i++) {
123:         for (j=0;j<cols;j++) {
124:           if (i==j) value = T[i];
125:           else if (i<ds->k && j==ds->k) value = T[PetscMin(i,j)+ds->ld];
126:           else if (i+1==j && i>=ds->k) value = T[i+ds->ld];
127:           else value = 0.0;
128:           PetscCall(PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value));
129:         }
130:         PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
131:       }
132:     }
133:     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
134:     PetscCall(PetscViewerFlush(viewer));
135:     PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
136:   } else PetscCall(DSViewMat(ds,viewer,DS_MAT_A));
137:   if (ds->state>DS_STATE_INTERMEDIATE) {
138:     PetscCall(DSViewMat(ds,viewer,DS_MAT_U));
139:     PetscCall(DSViewMat(ds,viewer,DS_MAT_V));
140:   }
141:   PetscFunctionReturn(PETSC_SUCCESS);
142: }

144: static PetscErrorCode DSVectors_SVD(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
145: {
146:   PetscFunctionBegin;
147:   switch (mat) {
148:     case DS_MAT_U:
149:     case DS_MAT_V:
150:       if (rnorm) *rnorm = 0.0;
151:       break;
152:     default:
153:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
154:   }
155:   PetscFunctionReturn(PETSC_SUCCESS);
156: }

158: static PetscErrorCode DSSort_SVD(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
159: {
160:   DS_SVD         *ctx = (DS_SVD*)ds->data;
161:   PetscInt       n,l,i,*perm,ld=ds->ld;
162:   PetscScalar    *A;
163:   PetscReal      *d;

165:   PetscFunctionBegin;
166:   if (!ds->sc) PetscFunctionReturn(PETSC_SUCCESS);
167:   PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
168:   l = ds->l;
169:   n = PetscMin(ds->n,ctx->m);
170:   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
171:   perm = ds->perm;
172:   if (!rr) PetscCall(DSSortEigenvaluesReal_Private(ds,d,perm));
173:   else PetscCall(DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE));
174:   for (i=l;i<n;i++) wr[i] = d[perm[i]];
175:   PetscCall(DSPermuteBoth_Private(ds,l,n,ds->n,ctx->m,DS_MAT_U,DS_MAT_V,perm));
176:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
177:   if (!ds->compact) {
178:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
179:     for (i=l;i<n;i++) A[i+i*ld] = wr[i];
180:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
181:   }
182:   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
183:   PetscFunctionReturn(PETSC_SUCCESS);
184: }

186: static PetscErrorCode DSUpdateExtraRow_SVD(DS ds)
187: {
188:   DS_SVD            *ctx = (DS_SVD*)ds->data;
189:   PetscInt          i;
190:   PetscBLASInt      n=0,m=0,ld,incx=1;
191:   PetscScalar       *A,*x,*y,one=1.0,zero=0.0;
192:   PetscReal         *T,*e,beta;
193:   const PetscScalar *U;

195:   PetscFunctionBegin;
196:   PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
197:   PetscCall(PetscBLASIntCast(ds->n,&n));
198:   PetscCall(PetscBLASIntCast(ctx->m,&m));
199:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
200:   PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_U],&U));
201:   if (ds->compact) {
202:     PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
203:     e = T+ld;
204:     beta = e[m-1];   /* in compact, we assume all entries are zero except the last one */
205:     for (i=0;i<n;i++) e[i] = PetscRealPart(beta*U[n-1+i*ld]);
206:     ds->k = m;
207:     PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
208:   } else {
209:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
210:     PetscCall(DSAllocateWork_Private(ds,2*ld,0,0));
211:     x = ds->work;
212:     y = ds->work+ld;
213:     for (i=0;i<n;i++) x[i] = PetscConj(A[i+m*ld]);
214:     PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,U,&ld,x,&incx,&zero,y,&incx));
215:     for (i=0;i<n;i++) A[i+m*ld] = PetscConj(y[i]);
216:     ds->k = m;
217:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
218:   }
219:   PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_U],&U));
220:   PetscFunctionReturn(PETSC_SUCCESS);
221: }

223: static PetscErrorCode DSTruncate_SVD(DS ds,PetscInt n,PetscBool trim)
224: {
225:   PetscInt    i,ld=ds->ld,l=ds->l;
226:   PetscScalar *A;
227:   DS_SVD      *ctx = (DS_SVD*)ds->data;

229:   PetscFunctionBegin;
230:   if (!ds->compact && ds->extrarow) PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
231:   if (trim) {
232:     if (!ds->compact && ds->extrarow) {   /* clean extra column */
233:       for (i=l;i<ds->n;i++) A[i+ctx->m*ld] = 0.0;
234:     }
235:     ds->l  = 0;
236:     ds->k  = 0;
237:     ds->n  = n;
238:     ctx->m = n;
239:     ds->t  = ds->n;   /* truncated length equal to the new dimension */
240:     ctx->t = ctx->m;  /* must also keep the previous dimension of V */
241:   } else {
242:     if (!ds->compact && ds->extrarow && ds->k==ds->n) {
243:       /* copy entries of extra column to the new position, then clean last row */
244:       for (i=l;i<n;i++) A[i+n*ld] = A[i+ctx->m*ld];
245:       for (i=l;i<ds->n;i++) A[i+ctx->m*ld] = 0.0;
246:     }
247:     ds->k  = (ds->extrarow)? n: 0;
248:     ds->t  = ds->n;   /* truncated length equal to previous dimension */
249:     ctx->t = ctx->m;  /* must also keep the previous dimension of V */
250:     ds->n  = n;
251:     ctx->m = n;
252:   }
253:   if (!ds->compact && ds->extrarow) PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
254:   PetscFunctionReturn(PETSC_SUCCESS);
255: }

257: /*
258:   DSArrowBidiag reduces a real square arrowhead matrix of the form

260:                 [ d 0 0 0 e ]
261:                 [ 0 d 0 0 e ]
262:             A = [ 0 0 d 0 e ]
263:                 [ 0 0 0 d e ]
264:                 [ 0 0 0 0 d ]

266:   to upper bidiagonal form

268:                 [ d e 0 0 0 ]
269:                 [ 0 d e 0 0 ]
270:    B = Q'*A*P = [ 0 0 d e 0 ],
271:                 [ 0 0 0 d e ]
272:                 [ 0 0 0 0 d ]

274:   where P,Q are orthogonal matrices. Uses plane rotations with a bulge chasing scheme.
275:   On input, P and Q must be initialized to the identity matrix.
276: */
277: static PetscErrorCode DSArrowBidiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ldq,PetscScalar *P,PetscBLASInt ldp)
278: {
279:   PetscBLASInt i,j,j2,one=1;
280:   PetscReal    c,s,ct,st,off,temp0,temp1,temp2;

282:   PetscFunctionBegin;
283:   if (n<=2) PetscFunctionReturn(PETSC_SUCCESS);

285:   for (j=0;j<n-2;j++) {

287:     /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
288:     temp0 = e[j+1];
289:     PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp0,&e[j],&c,&s,&e[j+1]));
290:     s = -s;

292:     /* Apply rotation to Q */
293:     j2 = j+2;
294:     PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+j*ldq,&one,Q+(j+1)*ldq,&one,&c,&s));

296:     /* Apply rotation to diagonal elements, eliminate newly introduced entry A(j+1,j) */
297:     temp0 = d[j+1];
298:     temp1 = c*temp0;
299:     temp2 = -s*d[j];
300:     PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp1,&temp2,&ct,&st,&d[j+1]));
301:     st = -st;
302:     e[j] = -c*st*d[j] + s*ct*temp0;
303:     d[j] = c*ct*d[j] + s*st*temp0;

305:     /* Apply rotation to P */
306:     PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,P+j*ldp,&one,P+(j+1)*ldp,&one,&ct,&st));

308:     /* Chase newly introduced off-diagonal entry to the top left corner */
309:     for (i=j-1;i>=0;i--) {

311:       /* Upper bulge */
312:       off   = -st*e[i];
313:       e[i]  = ct*e[i];
314:       temp0 = e[i+1];
315:       PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp0,&off,&c,&s,&e[i+1]));
316:       s = -s;
317:       PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+i*ldq,&one,Q+(i+1)*ldq,&one,&c,&s));

319:       /* Lower bulge */
320:       temp0 = d[i+1];
321:       temp1 = -s*e[i] + c*temp0;
322:       temp2 = c*e[i] + s*temp0;
323:       off   = -s*d[i];
324:       PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp1,&off,&ct,&st,&d[i+1]));
325:       st = -st;
326:       e[i] = -c*st*d[i] + ct*temp2;
327:       d[i] = c*ct*d[i] + st*temp2;
328:       PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,P+i*ldp,&one,P+(i+1)*ldp,&one,&ct,&st));
329:     }
330:   }
331:   PetscFunctionReturn(PETSC_SUCCESS);
332: }

334: /*
335:    Reduce to bidiagonal form by means of DSArrowBidiag.
336: */
337: static PetscErrorCode DSIntermediate_SVD(DS ds)
338: {
339:   DS_SVD        *ctx = (DS_SVD*)ds->data;
340:   PetscInt      i,j;
341:   PetscBLASInt  n1 = 0,n2,m2,lwork,info,l = 0,n = 0,m = 0,nm,ld,off;
342:   PetscScalar   *A,*U,*V,*W,*work,*tauq,*taup;
343:   PetscReal     *d,*e;

345:   PetscFunctionBegin;
346:   PetscCall(PetscBLASIntCast(ds->n,&n));
347:   PetscCall(PetscBLASIntCast(ctx->m,&m));
348:   PetscCall(PetscBLASIntCast(ds->l,&l));
349:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
350:   PetscCall(PetscBLASIntCast(PetscMax(0,ds->k-l+1),&n1)); /* size of leading block, excl. locked */
351:   n2 = n-l;     /* n2 = n1 + size of trailing block */
352:   m2 = m-l;
353:   off = l+l*ld;
354:   nm = PetscMin(n,m);
355:   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
356:   e = d+ld;
357:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_U],&U));
358:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_V],&V));
359:   PetscCall(PetscArrayzero(U,ld*ld));
360:   for (i=0;i<n;i++) U[i+i*ld] = 1.0;
361:   PetscCall(PetscArrayzero(V,ld*ld));
362:   for (i=0;i<m;i++) V[i+i*ld] = 1.0;

364:   if (ds->compact) {

366:     if (ds->state<DS_STATE_INTERMEDIATE) PetscCall(DSArrowBidiag(n1,d+l,e+l,U+off,ld,V+off,ld));

368:   } else {

370:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
371:     for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }

373:     if (ds->state<DS_STATE_INTERMEDIATE) {
374:       lwork = (m+n)*16;
375:       PetscCall(DSAllocateWork_Private(ds,2*nm+ld*ld+lwork,0,0));
376:       tauq = ds->work;
377:       taup = ds->work+nm;
378:       W    = ds->work+2*nm;
379:       work = ds->work+2*nm+ld*ld;
380:       for (j=0;j<m;j++) PetscCall(PetscArraycpy(W+j*ld,A+j*ld,n));
381:       PetscCallBLAS("LAPACKgebrd",LAPACKgebrd_(&n2,&m2,W+off,&ld,d+l,e+l,tauq,taup,work,&lwork,&info));
382:       SlepcCheckLapackInfo("gebrd",info);
383:       PetscCallBLAS("LAPACKormbr",LAPACKormbr_("Q","L","N",&n2,&n2,&m2,W+off,&ld,tauq,U+off,&ld,work,&lwork,&info));
384:       SlepcCheckLapackInfo("ormbr",info);
385:       PetscCallBLAS("LAPACKormbr",LAPACKormbr_("P","R","N",&m2,&m2,&n2,W+off,&ld,taup,V+off,&ld,work,&lwork,&info));
386:       SlepcCheckLapackInfo("ormbr",info);
387:     } else {
388:       /* copy bidiagonal to d,e */
389:       for (i=l;i<nm;i++)   d[i] = PetscRealPart(A[i+i*ld]);
390:       for (i=l;i<nm-1;i++) e[i] = PetscRealPart(A[i+(i+1)*ld]);
391:     }
392:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
393:   }
394:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_U],&U));
395:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_V],&V));
396:   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
397:   PetscFunctionReturn(PETSC_SUCCESS);
398: }

400: static PetscErrorCode DSSolve_SVD_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
401: {
402:   DS_SVD         *ctx = (DS_SVD*)ds->data;
403:   PetscInt       i,j,neig=PetscMin(ds->n,ctx->m);
404:   PetscBLASInt   n1,m1,info,l = 0,n = 0,m = 0,nm,ld,off,zero=0;
405:   PetscScalar    *A,*U,*V,*Vt;
406:   PetscReal      *d,*e;

408:   PetscFunctionBegin;
409:   PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
410:   PetscCall(PetscBLASIntCast(ds->n,&n));
411:   PetscCall(PetscBLASIntCast(ctx->m,&m));
412:   PetscCall(PetscBLASIntCast(ds->l,&l));
413:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
414:   n1 = n-l;     /* n1 = size of leading block, excl. locked + size of trailing block */
415:   m1 = m-l;
416:   nm = PetscMin(n1,m1);
417:   off = l+l*ld;
418:   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
419:   e = d+ld;

421:   /* Reduce to bidiagonal form */
422:   PetscCall(DSIntermediate_SVD(ds));

424:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_U],&U));
425:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_V],&V));

427:   /* solve bidiagonal SVD problem */
428:   for (i=0;i<l;i++) wr[i] = d[i];
429:   PetscCall(DSAllocateWork_Private(ds,ld*ld,4*n1,0));
430:   Vt = ds->work;
431:   for (i=l;i<m;i++) {
432:     for (j=l;j<m;j++) {
433:       Vt[i+j*ld] = PetscConj(V[j+i*ld]);  /* Lapack expects transposed VT */
434:     }
435:   }
436:   PetscCallBLAS("LAPACKbdsqr",LAPACKbdsqr_(n>=m?"U":"L",&nm,&m1,&n1,&zero,d+l,e+l,Vt+off,&ld,U+off,&ld,NULL,&ld,ds->rwork,&info));
437:   SlepcCheckLapackInfo("bdsqr",info);
438:   for (i=l;i<m;i++) {
439:     for (j=l;j<m;j++) {
440:       V[i+j*ld] = PetscConj(Vt[j+i*ld]);  /* transpose VT returned by Lapack */
441:     }
442:   }
443:   for (i=l;i<neig;i++) wr[i] = d[i];

445:   /* create diagonal matrix as a result */
446:   if (ds->compact) PetscCall(PetscArrayzero(e,n-1));
447:   else {
448:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
449:     for (i=l;i<m;i++) PetscCall(PetscArrayzero(A+l+i*ld,n-l));
450:     for (i=l;i<neig;i++) A[i+i*ld] = d[i];
451:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
452:   }
453:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_U],&U));
454:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_V],&V));
455:   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));

457:   /* Set wi to zero */
458:   if (wi) for (i=l;i<neig;i++) wi[i] = 0.0;
459:   PetscFunctionReturn(PETSC_SUCCESS);
460: }

462: static PetscErrorCode DSSolve_SVD_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
463: {
464:   DS_SVD         *ctx = (DS_SVD*)ds->data;
465:   PetscInt       i,j,neig=PetscMin(ds->n,ctx->m);
466:   PetscBLASInt   n1,m1,info,l = 0,n = 0,m = 0,ld,off,lwork;
467:   PetscScalar    *A,*U,*V,*W,qwork;
468:   PetscReal      *d,*e,*Ur,*Vr;

470:   PetscFunctionBegin;
471:   PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
472:   PetscCall(PetscBLASIntCast(ds->n,&n));
473:   PetscCall(PetscBLASIntCast(ctx->m,&m));
474:   PetscCall(PetscBLASIntCast(ds->l,&l));
475:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
476:   n1 = n-l;     /* n1 = size of leading block, excl. locked + size of trailing block */
477:   m1 = m-l;
478:   off = l+l*ld;
479:   if (ds->compact) PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
480:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
481:   PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_U],&U));
482:   PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_V],&V));
483:   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
484:   e = d+ld;
485:   PetscCall(PetscArrayzero(U,ld*ld));
486:   for (i=0;i<l;i++) U[i+i*ld] = 1.0;
487:   PetscCall(PetscArrayzero(V,ld*ld));
488:   for (i=0;i<l;i++) V[i+i*ld] = 1.0;

490:   if (ds->state>DS_STATE_RAW) {
491:     /* solve bidiagonal SVD problem */
492:     for (i=0;i<l;i++) wr[i] = d[i];
493: #if defined(PETSC_USE_COMPLEX)
494:     PetscCall(DSAllocateWork_Private(ds,0,3*n1*n1+4*n1+2*ld*ld,8*n1));
495:     Ur = ds->rwork+3*n1*n1+4*n1;
496:     Vr = ds->rwork+3*n1*n1+4*n1+ld*ld;
497: #else
498:     PetscCall(DSAllocateWork_Private(ds,0,3*n1*n1+4*n1+ld*ld,8*n1));
499:     Ur = U;
500:     Vr = ds->rwork+3*n1*n1+4*n1;
501: #endif
502:     PetscCallBLAS("LAPACKbdsdc",LAPACKbdsdc_("U","I",&n1,d+l,e+l,Ur+off,&ld,Vr+off,&ld,NULL,NULL,ds->rwork,ds->iwork,&info));
503:     SlepcCheckLapackInfo("bdsdc",info);
504:     for (i=l;i<n;i++) {
505:       for (j=l;j<n;j++) {
506: #if defined(PETSC_USE_COMPLEX)
507:         U[i+j*ld] = Ur[i+j*ld];
508: #endif
509:         V[i+j*ld] = PetscConj(Vr[j+i*ld]);  /* transpose VT returned by Lapack */
510:       }
511:     }
512:   } else {
513:     /* solve general rectangular SVD problem */
514:     PetscCall(DSAllocateMat_Private(ds,DS_MAT_W));
515:     PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_W],&W));
516:     if (ds->compact) PetscCall(DSSwitchFormat_SVD(ds));
517:     for (i=0;i<l;i++) wr[i] = d[i];
518:     PetscCall(DSAllocateWork_Private(ds,0,0,8*neig));
519:     lwork = -1;
520: #if defined(PETSC_USE_COMPLEX)
521:     PetscCall(DSAllocateWork_Private(ds,0,5*neig*neig+7*neig,0));
522:     PetscCallBLAS("LAPACKgesdd",LAPACKgesdd_("A",&n1,&m1,A+off,&ld,d+l,U+off,&ld,W+off,&ld,&qwork,&lwork,ds->rwork,ds->iwork,&info));
523: #else
524:     PetscCallBLAS("LAPACKgesdd",LAPACKgesdd_("A",&n1,&m1,A+off,&ld,d+l,U+off,&ld,W+off,&ld,&qwork,&lwork,ds->iwork,&info));
525: #endif
526:     SlepcCheckLapackInfo("gesdd",info);
527:     PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(qwork),&lwork));
528:     PetscCall(DSAllocateWork_Private(ds,lwork,0,0));
529: #if defined(PETSC_USE_COMPLEX)
530:     PetscCallBLAS("LAPACKgesdd",LAPACKgesdd_("A",&n1,&m1,A+off,&ld,d+l,U+off,&ld,W+off,&ld,ds->work,&lwork,ds->rwork,ds->iwork,&info));
531: #else
532:     PetscCallBLAS("LAPACKgesdd",LAPACKgesdd_("A",&n1,&m1,A+off,&ld,d+l,U+off,&ld,W+off,&ld,ds->work,&lwork,ds->iwork,&info));
533: #endif
534:     SlepcCheckLapackInfo("gesdd",info);
535:     for (i=l;i<m;i++) {
536:       for (j=l;j<m;j++) V[i+j*ld] = PetscConj(W[j+i*ld]);  /* transpose VT returned by Lapack */
537:     }
538:     PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_W],&W));
539:   }
540:   for (i=l;i<neig;i++) wr[i] = d[i];

542:   /* create diagonal matrix as a result */
543:   if (ds->compact) PetscCall(PetscArrayzero(e,n-1));
544:   else {
545:     for (i=l;i<m;i++) PetscCall(PetscArrayzero(A+l+i*ld,n-l));
546:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
547:   }
548:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
549:   PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_U],&U));
550:   PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_V],&V));
551:   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));

553:   /* Set wi to zero */
554:   if (wi) for (i=l;i<neig;i++) wi[i] = 0.0;
555:   PetscFunctionReturn(PETSC_SUCCESS);
556: }

558: #if !defined(PETSC_HAVE_MPIUNI)
559: static PetscErrorCode DSSynchronize_SVD(DS ds,PetscScalar eigr[],PetscScalar eigi[])
560: {
561:   PetscInt       ld=ds->ld,l=ds->l,k=0,kr=0;
562:   PetscMPIInt    n,rank,off=0,size,ldn,ld3;
563:   PetscScalar    *A,*U,*V;
564:   PetscReal      *T;

566:   PetscFunctionBegin;
567:   if (ds->compact) kr = 3*ld;
568:   else k = (ds->n-l)*ld;
569:   if (ds->state>DS_STATE_RAW) k += 2*(ds->n-l)*ld;
570:   if (eigr) k += ds->n-l;
571:   PetscCall(DSAllocateWork_Private(ds,k+kr,0,0));
572:   PetscCall(PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size));
573:   PetscCall(PetscMPIIntCast(ds->n-l,&n));
574:   PetscCall(PetscMPIIntCast(ld*(ds->n-l),&ldn));
575:   PetscCall(PetscMPIIntCast(3*ld,&ld3));
576:   if (ds->compact) PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
577:   else PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
578:   if (ds->state>DS_STATE_RAW) {
579:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_U],&U));
580:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_V],&V));
581:   }
582:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank));
583:   if (!rank) {
584:     if (ds->compact) PetscCallMPI(MPI_Pack(T,ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
585:     else PetscCallMPI(MPI_Pack(A+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
586:     if (ds->state>DS_STATE_RAW) {
587:       PetscCallMPI(MPI_Pack(U+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
588:       PetscCallMPI(MPI_Pack(V+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
589:     }
590:     if (eigr) PetscCallMPI(MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
591:   }
592:   PetscCallMPI(MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds)));
593:   if (rank) {
594:     if (ds->compact) PetscCallMPI(MPI_Unpack(ds->work,size,&off,T,ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds)));
595:     else PetscCallMPI(MPI_Unpack(ds->work,size,&off,A+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
596:     if (ds->state>DS_STATE_RAW) {
597:       PetscCallMPI(MPI_Unpack(ds->work,size,&off,U+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
598:       PetscCallMPI(MPI_Unpack(ds->work,size,&off,V+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
599:     }
600:     if (eigr) PetscCallMPI(MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
601:   }
602:   if (ds->compact) PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
603:   else PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
604:   if (ds->state>DS_STATE_RAW) {
605:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_U],&U));
606:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_V],&V));
607:   }
608:   PetscFunctionReturn(PETSC_SUCCESS);
609: }
610: #endif

612: static PetscErrorCode DSMatGetSize_SVD(DS ds,DSMatType t,PetscInt *rows,PetscInt *cols)
613: {
614:   DS_SVD *ctx = (DS_SVD*)ds->data;

616:   PetscFunctionBegin;
617:   PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
618:   switch (t) {
619:     case DS_MAT_A:
620:       *rows = ds->n;
621:       *cols = ds->extrarow? ctx->m+1: ctx->m;
622:       break;
623:     case DS_MAT_T:
624:       *rows = ds->n;
625:       *cols = PetscDefined(USE_COMPLEX)? 2: 3;
626:       break;
627:     case DS_MAT_U:
628:       *rows = ds->state==DS_STATE_TRUNCATED? ds->t: ds->n;
629:       *cols = ds->n;
630:       break;
631:     case DS_MAT_V:
632:       *rows = ds->state==DS_STATE_TRUNCATED? ctx->t: ctx->m;
633:       *cols = ctx->m;
634:       break;
635:     default:
636:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid t parameter");
637:   }
638:   PetscFunctionReturn(PETSC_SUCCESS);
639: }

641: static PetscErrorCode DSSVDSetDimensions_SVD(DS ds,PetscInt m)
642: {
643:   DS_SVD *ctx = (DS_SVD*)ds->data;

645:   PetscFunctionBegin;
646:   DSCheckAlloc(ds,1);
647:   if (m==PETSC_DECIDE || m==PETSC_DEFAULT) {
648:     ctx->m = ds->ld;
649:   } else {
650:     PetscCheck(m>0 && m<=ds->ld,PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Illegal value of m. Must be between 1 and ld");
651:     ctx->m = m;
652:   }
653:   PetscFunctionReturn(PETSC_SUCCESS);
654: }

656: /*@
657:    DSSVDSetDimensions - Sets the number of columns for a `DSSVD`.

659:    Logically Collective

661:    Input Parameters:
662: +  ds - the direct solver context
663: -  m  - the number of columns

665:    Notes:
666:    This call is complementary to `DSSetDimensions()`, to provide a dimension
667:    that is specific to this `DS` type.

669:    Level: intermediate

671: .seealso: [](sec:ds), `DSSVD`, `DSSVDGetDimensions()`, `DSSetDimensions()`
672: @*/
673: PetscErrorCode DSSVDSetDimensions(DS ds,PetscInt m)
674: {
675:   PetscFunctionBegin;
678:   PetscTryMethod(ds,"DSSVDSetDimensions_C",(DS,PetscInt),(ds,m));
679:   PetscFunctionReturn(PETSC_SUCCESS);
680: }

682: static PetscErrorCode DSSVDGetDimensions_SVD(DS ds,PetscInt *m)
683: {
684:   DS_SVD *ctx = (DS_SVD*)ds->data;

686:   PetscFunctionBegin;
687:   *m = ctx->m;
688:   PetscFunctionReturn(PETSC_SUCCESS);
689: }

691: /*@
692:    DSSVDGetDimensions - Returns the number of columns for a `DSSVD`.

694:    Not Collective

696:    Input Parameter:
697: .  ds - the direct solver context

699:    Output Parameter:
700: .  m - the number of columns

702:    Level: intermediate

704: .seealso: [](sec:ds), `DSSVD`, `DSSVDSetDimensions()`
705: @*/
706: PetscErrorCode DSSVDGetDimensions(DS ds,PetscInt *m)
707: {
708:   PetscFunctionBegin;
710:   PetscAssertPointer(m,2);
711:   PetscUseMethod(ds,"DSSVDGetDimensions_C",(DS,PetscInt*),(ds,m));
712:   PetscFunctionReturn(PETSC_SUCCESS);
713: }

715: static PetscErrorCode DSDestroy_SVD(DS ds)
716: {
717:   PetscFunctionBegin;
718:   PetscCall(PetscFree(ds->data));
719:   PetscCall(PetscObjectComposeFunction((PetscObject)ds,"DSSVDSetDimensions_C",NULL));
720:   PetscCall(PetscObjectComposeFunction((PetscObject)ds,"DSSVDGetDimensions_C",NULL));
721:   PetscFunctionReturn(PETSC_SUCCESS);
722: }

724: static PetscErrorCode DSSetCompact_SVD(DS ds,PetscBool comp)
725: {
726:   PetscFunctionBegin;
727:   if (!comp) PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
728:   PetscFunctionReturn(PETSC_SUCCESS);
729: }

731: static PetscErrorCode DSReallocate_SVD(DS ds,PetscInt ld)
732: {
733:   PetscInt i,*perm=ds->perm;

735:   PetscFunctionBegin;
736:   for (i=0;i<DS_NUM_MAT;i++) {
737:     if (!ds->compact && i==DS_MAT_A) continue;
738:     if (i!=DS_MAT_U && i!=DS_MAT_V && i!=DS_MAT_T) PetscCall(MatDestroy(&ds->omat[i]));
739:   }

741:   if (!ds->compact) PetscCall(DSReallocateMat_Private(ds,DS_MAT_A,ld));
742:   PetscCall(DSReallocateMat_Private(ds,DS_MAT_U,ld));
743:   PetscCall(DSReallocateMat_Private(ds,DS_MAT_V,ld));
744:   PetscCall(DSReallocateMat_Private(ds,DS_MAT_T,ld));

746:   PetscCall(PetscMalloc1(ld,&ds->perm));
747:   PetscCall(PetscArraycpy(ds->perm,perm,ds->ld));
748:   PetscCall(PetscFree(perm));
749:   PetscFunctionReturn(PETSC_SUCCESS);
750: }

752: /*MC
753:    DSSVD - Dense Singular Value Decomposition.

755:    Notes:
756:    The problem is expressed as $A = U\Sigma V^*$, where $A$ is rectangular in
757:    general, with $n$ rows and $m$ columns. $\Sigma$ is a diagonal matrix whose diagonal
758:    elements are the arguments of `DSSolve()`. After solve, $A$ is overwritten
759:    with $\Sigma$.

761:    The orthogonal (or unitary) matrices of left and right singular vectors, $U$
762:    and $V$, have size $n$ and $m$, respectively. The number of columns $m$ must
763:    be specified via `DSSVDSetDimensions()`.

765:    If the `DS` object is in the intermediate state, $A$ is assumed to be in upper
766:    bidiagonal form (possibly with an arrow) and is stored in compact format
767:    on matrix $T$. Otherwise, no particular structure is assumed. The compact
768:    storage is implemented for the square case only, $m=n$. The extra row should
769:    be interpreted in this case as an extra column.

771:    Used DS matrices:
772: +  `DS_MAT_A` - problem matrix (used only if `compact=PETSC_FALSE`)
773: .  `DS_MAT_T` - upper bidiagonal matrix
774: .  `DS_MAT_U` - left singular vectors
775: -  `DS_MAT_V` - right singular vectors

777:    Implemented methods:
778: +  0 - Implicit zero-shift QR for bidiagonals (`_bdsqr`)
779: -  1 - Divide and Conquer (`_bdsdc` or `_gesdd`)

781:    Level: beginner

783: .seealso: [](sec:ds), `DSCreate()`, `DSSetType()`, `DSType`, `DSSVDSetDimensions()`, `DSSetCompact()`
784: M*/
785: SLEPC_EXTERN PetscErrorCode DSCreate_SVD(DS ds)
786: {
787:   DS_SVD         *ctx;

789:   PetscFunctionBegin;
790:   PetscCall(PetscNew(&ctx));
791:   ds->data = (void*)ctx;

793:   ds->ops->allocate      = DSAllocate_SVD;
794:   ds->ops->view          = DSView_SVD;
795:   ds->ops->vectors       = DSVectors_SVD;
796:   ds->ops->solve[0]      = DSSolve_SVD_QR;
797:   ds->ops->solve[1]      = DSSolve_SVD_DC;
798:   ds->ops->sort          = DSSort_SVD;
799:   ds->ops->truncate      = DSTruncate_SVD;
800:   ds->ops->update        = DSUpdateExtraRow_SVD;
801:   ds->ops->destroy       = DSDestroy_SVD;
802:   ds->ops->matgetsize    = DSMatGetSize_SVD;
803: #if !defined(PETSC_HAVE_MPIUNI)
804:   ds->ops->synchronize   = DSSynchronize_SVD;
805: #endif
806:   ds->ops->setcompact    = DSSetCompact_SVD;
807:   ds->ops->reallocate    = DSReallocate_SVD;
808:   PetscCall(PetscObjectComposeFunction((PetscObject)ds,"DSSVDSetDimensions_C",DSSVDSetDimensions_SVD));
809:   PetscCall(PetscObjectComposeFunction((PetscObject)ds,"DSSVDGetDimensions_C",DSSVDGetDimensions_SVD));
810:   PetscFunctionReturn(PETSC_SUCCESS);
811: }