00001 /************************************************************************************* 00002 * MechSys - A C++ library to simulate (Continuum) Mechanical Systems * 00003 * Copyright (C) 2005 Dorival de Moraes Pedroso <dorival.pedroso at gmail.com> * 00004 * Copyright (C) 2005 Raul Dario Durand Farfan <raul.durand at gmail.com> * 00005 * * 00006 * This file is part of MechSys. * 00007 * * 00008 * MechSys is free software; you can redistribute it and/or modify it under the * 00009 * terms of the GNU General Public License as published by the Free Software * 00010 * Foundation; either version 2 of the License, or (at your option) any later * 00011 * version. * 00012 * * 00013 * MechSys is distributed in the hope that it will be useful, but WITHOUT ANY * 00014 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A * 00015 * PARTICULAR PURPOSE. See the GNU General Public License for more details. * 00016 * * 00017 * You should have received a copy of the GNU General Public License along with * 00018 * MechSys; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, * 00019 * Fifth Floor, Boston, MA 02110-1301, USA * 00020 *************************************************************************************/ 00021 00022 #ifndef MECHSYS_SCLAPK_H 00023 #define MECHSYS_SCLAPK_H 00024 00025 // MechSys 00026 #include "linalg/matrix.h" 00027 #include "util/util.h" 00028 00029 extern "C" // {{{ 00030 { 00031 void Cblacs_pinfo (int* mypnum, int* nprocs); 00032 void Cblacs_get (int context, int request, int* value); 00033 int Cblacs_gridinit (int* context, char * order, int np_row, int np_col); 00034 void Cblacs_gridinfo (int context, int* np_row, int* np_col, int* my_row, int* my_col); 00035 void Cblacs_gridexit (int context); 00036 void Cblacs_exit (int error_code); 00037 void Cblacs_barrier (int context, char *scope); 00038 void Cigebs2d (int context, char *scope, char *top, int m, int n, int *A, int lda); 00039 void Cigebr2d (int context, char *scope, char *top, int m, int n, int *A, int lda, int rsrc, int csrc); 00040 void Cdgebs2d (int context, char *scope, char *top, int m, int n, double *A, int lda); 00041 void Cdgebr2d (int context, char *scope, char *top, int m, int n, double *A, int lda, int rsrc, int csrc); 00042 void Cpdgemr2d (int M, int N, 00043 double *A, int IA, int JA, int *ADESC, 00044 double *B, int IB, int JB, int *BDESC, 00045 int CTXT); 00046 int numroc_ (int *n, int *nb, int *iproc, int *isrcproc, int *nprocs); 00047 void descinit_ (int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int *icsrc, int *ictxt, int *lld, int *info); 00048 int descset_ (int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int *icsrc, int *ictxt, int *lld); 00049 double pdlamch_ (int *ictxt , char *cmach); 00050 double pdlange_ (char *norm, int *m, int *n, double *A, int *ia, int *ja, int *desca, double *work); 00051 void pdlacpy_ (char *uplo, int *m, int *n, double *a, int *ia, int *ja, int *desca, double *b, int *ib, int *jb, int *descb); 00052 void pdgesv_ (int *n, int *nrhs, double *A, int *ia, int *ja, int *desca, int* ipiv, double *B, int *ib, int *jb, int *descb, int *info); 00053 void pdgemm_ (char *TRANSA, char *TRANSB, int * M, int * N, int * K, double * ALPHA, 00054 double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB, 00055 double * BETA, double * C, int * IC, int * JC, int * DESCC ); 00056 int indxg2p_ (int *indxglob, int *nb, int *iproc, int *isrcproc, int *nprocs); 00057 int indxl2g_ (int *indxloc , int *nb, int *iproc, int *isrcproc, int *nprocs); 00058 void pdgeadd_ (char *TRANS, int * M, int * N, 00059 double * ALPHA, 00060 double * A, int * IA, int * JA, int * DESCA, 00061 double * BETA, 00062 double * C, int * IC, int * JC, int * DESCC); 00063 } // }}} 00064 00065 inline int IndxL2G(int idxloc, int nb, int iproc, int isrcproc, int nprocs) 00066 { // {{{ 00067 00068 // (FORTRAN) indxl2g_ function uses indexes which starts at 1 (one) 00069 int fortran_idxloc = idxloc + 1; 00070 return indxl2g_(&fortran_idxloc, &nb, &iproc, &isrcproc, &nprocs) - 1; 00071 00072 } // }}} 00073 00074 inline void PDLAFill(LinAlg::Matrix<double> const & AA, double *A, int *DescA, int iRread, int iCread, double *Work) 00075 { // {{{ 00076 00077 /* 00078 This function was based in a function inside file pdlaread.f 00079 That function (PDLAREAD) has the following caption: 00080 00081 -- ScaLAPACK auxiliary routine (version 2.0) -- 00082 University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00083 and University of California, Berkeley. 00084 August 12, 2001 00085 00086 Purpose 00087 ======= 00088 00089 PDLAREAD reads from a file named FILNAM a matrix and distribute 00090 it to the process grid. 00091 00092 Only the process of coordinates {IRREAD, ICREAD} read the file. 00093 00094 WORK must be of size >= MB_ = DESCA( MB_ ). 00095 00096 Further Details 00097 =============== 00098 00099 Contributed by Song Jin, University of Tennessee, 1996. 00100 */ 00101 // Parameters 00102 //int block_cyclic_2d = 1; 00103 //int dtype_ = 1; 00104 int ctxt_ = 2; 00105 //int m_ = 3; 00106 //int n_ = 4; 00107 int mb_ = 5; 00108 //int nb_ = 6; 00109 //int rsrc_ = 7; 00110 //int csrc_ = 8; 00111 //int lld_ = 9; 00112 int one = 1; 00113 00114 // Local scalars 00115 bool isioprocessor = false; 00116 int ictxt = DescA[ctxt_-1]; 00117 int lwork = DescA[mb_-1]; 00118 int mycol = 0; 00119 int myrow = 0; 00120 int npcol = 0; 00121 int nprow = 0; 00122 00123 // Check if this is the IO processor 00124 Cblacs_gridinfo (ictxt, &nprow, &npcol, &myrow, &mycol); 00125 isioprocessor = ((myrow==iRread)&&(mycol==iCread)); 00126 00127 // Get number of rows and columns 00128 int iwork[2]; 00129 if (isioprocessor) 00130 { 00131 iwork[0] = AA.Rows(); 00132 iwork[1] = AA.Cols(); 00133 //igebs2d_(&ictxt, "All", " ", 2, 1, &iwork, 2); 00134 Cigebs2d(ictxt, "All", " ", 2, 1, iwork, 2); 00135 } 00136 else 00137 { 00138 //igebr2d_(&ictxt, "All", " ", 2, 1, &iwork, 2, &iRread, &iCread); 00139 Cigebr2d(ictxt, "All", " ", 2, 1, iwork, 2, iRread, iCread); 00140 } 00141 int m = iwork[0]; 00142 int n = iwork[1]; 00143 00144 // DESCSET initializes a descriptor vector 00145 int descwork[9]; 00146 int mm = Util::Max(1, Util::Min(m, lwork)); 00147 int nn = Util::Max(1, static_cast<int>(lwork/mm)); 00148 int mb = mm; 00149 int nb = nn; 00150 int rsrc = iRread; 00151 int csrc = iCread; 00152 int ldd = Util::Max(1, mm); 00153 descset_(descwork, &mm, &nn, &mb, &nb, &rsrc, &csrc, &ictxt, &ldd); 00154 00155 // Fill matrix 00156 for (int jstart=0; jstart<n; jstart+=nn) 00157 { 00158 int jend = Util::Min(n, jstart+nn); 00159 int jsize = jend - jstart; 00160 for (int istart=0; istart<m; istart+=mm) 00161 { 00162 int iend = Util::Min(m, istart+mm); 00163 int isize = iend - istart; 00164 double alpha = 1.0; 00165 double beta = 0.0; 00166 if (isioprocessor) 00167 { 00168 for (int j=0; j<jsize; j++) 00169 for (int i=0; i<isize; i++) 00170 Work[i+j*ldd] = AA(i,j); 00171 } 00172 pdgeadd_("N", &isize, &jsize, 00173 &alpha, 00174 Work, &one, &one, descwork, 00175 &beta, 00176 A, &istart, &jstart, DescA); 00177 } 00178 } 00179 00180 // Flag (?) 00181 Work[0] = DescA[mb_]; 00182 00183 } // }}} 00184 00185 #endif // MECHSYS_SCLAPK_H 00186 00187 /* {{{ MANUAL 00188 00189 00190 These notes were 'borrowed' from ScaLAPACK manual: 00191 00192 -- ScaLAPACK routine (version 1.7) -- 00193 University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00194 and University of California, Berkeley. 00195 Jan 30, 2006 00196 00197 00198 ====================================================================== 00199 00200 INTEGER FUNCTION INDXL2G( INDXLOC, NB, IPROC, ISRCPROC, NPROCS ) 00201 00202 Purpose 00203 ======= 00204 00205 INDXL2G computes the global index of a distributed matrix entry 00206 pointed to by the local index INDXLOC of the process indicated by 00207 IPROC. 00208 00209 Arguments 00210 ========= 00211 00212 INDXLOC (global input) INTEGER 00213 The local index of the distributed matrix entry. 00214 00215 NB (global input) INTEGER 00216 Block size, size of the blocks the distributed matrix is 00217 split into. 00218 00219 IPROC (local input) INTEGER 00220 The coordinate of the process whose local array row or 00221 column is to be determined. 00222 00223 ISRCPROC (global input) INTEGER 00224 The coordinate of the process that possesses the first 00225 row/column of the distributed matrix. 00226 00227 NPROCS (global input) INTEGER 00228 The total number processes over which the distributed 00229 matrix is distributed. 00230 00231 ====================================================================== 00232 00233 00234 ====================================================================== 00235 00236 INTEGER FUNCTION NUMROC(N, NB, IPROC, ISRCPROC, NPROCS) 00237 00238 NUMROC computes the NUMber of Rows Or Columns of a distributed 00239 matrix owned by the process indicated by IPROC. 00240 00241 Arguments 00242 ========= 00243 00244 N (global input) INTEGER 00245 The number of rows/columns in distributed matrix. 00246 00247 NB (global input) INTEGER 00248 Block size, size of the blocks the distributed matrix is 00249 split into. 00250 00251 IPROC (local input) INTEGER 00252 The coordinate of the process whose local array row or 00253 column is to be determined. 00254 00255 ISRCPROC (global input) INTEGER 00256 The coordinate of the process that possesses the first 00257 row or column of the distributed matrix. 00258 00259 NPROCS (global input) INTEGER 00260 The total number processes over which the matrix is 00261 distributed. 00262 00263 ====================================================================== 00264 00265 00266 ====================================================================== 00267 00268 SUBROUTINE 00269 DESCINIT(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO) 00270 00271 DESCINIT initializes the descriptor vector with the 8 input arguments 00272 M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD. 00273 00274 Notes 00275 ===== 00276 00277 Each global data object is described by an associated description 00278 vector. This vector stores the information required to establish 00279 the mapping between an object element and its corresponding process 00280 and memory location. 00281 00282 Let A be a generic term for any 2D block cyclicly distributed array. 00283 Such a global array has an associated description vector DESCA. 00284 In the following comments, the character _ should be read as 00285 "of the global array". 00286 00287 NOTATION STORED IN EXPLANATION 00288 --------------- -------------- -------------------------------------- 00289 DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, 00290 DTYPE_A = 1. 00291 CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating 00292 the BLACS process grid A is distribu- 00293 ted over. The context itself is glo- 00294 bal, but the handle (the integer 00295 value) may vary. 00296 M_A (global) DESCA( M_ ) The number of rows in the global 00297 array A. 00298 N_A (global) DESCA( N_ ) The number of columns in the global 00299 array A. 00300 MB_A (global) DESCA( MB_ ) The blocking factor used to distribute 00301 the rows of the array. 00302 NB_A (global) DESCA( NB_ ) The blocking factor used to distribute 00303 the columns of the array. 00304 RSRC_A (global) DESCA( RSRC_ ) The process row over which the first 00305 row of the array A is distributed. 00306 CSRC_A (global) DESCA( CSRC_ ) The process column over which the 00307 first column of the array A is 00308 distributed. 00309 LLD_A (local) DESCA( LLD_ ) The leading dimension of the local 00310 array. LLD_A >= MAX(1,LOCr(M_A)). 00311 00312 Let K be the number of rows or columns of a distributed matrix, 00313 and assume that its process grid has dimension p x q. 00314 LOCr( K ) denotes the number of elements of K that a process 00315 would receive if K were distributed over the p processes of its 00316 process column. 00317 Similarly, LOCc( K ) denotes the number of elements of K that a 00318 process would receive if K were distributed over the q processes of 00319 its process row. 00320 The values of LOCr() and LOCc() may be determined via a call to the 00321 ScaLAPACK tool function, NUMROC: 00322 LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), 00323 LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). 00324 An upper bound for these quantities may be computed by: 00325 LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A 00326 LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A 00327 00328 Arguments 00329 ========= 00330 00331 DESC (output) INTEGER array of dimension DLEN_. 00332 The array descriptor of a distributed matrix to be set. 00333 00334 M (global input) INTEGER 00335 The number of rows in the distributed matrix. M >= 0. 00336 00337 N (global input) INTEGER 00338 The number of columns in the distributed matrix. N >= 0. 00339 00340 MB (global input) INTEGER 00341 The blocking factor used to distribute the rows of the 00342 matrix. MB >= 1. 00343 00344 NB (global input) INTEGER 00345 The blocking factor used to distribute the columns of the 00346 matrix. NB >= 1. 00347 00348 IRSRC (global input) INTEGER 00349 The process row over which the first row of the matrix is 00350 distributed. 0 <= IRSRC < NPROW. 00351 00352 ICSRC (global input) INTEGER 00353 The process column over which the first column of the 00354 matrix is distributed. 0 <= ICSRC < NPCOL. 00355 00356 ICTXT (global input) INTEGER 00357 The BLACS context handle, indicating the global context of 00358 the operation on the matrix. The context itself is global. 00359 00360 LLD (local input) INTEGER 00361 The leading dimension of the local array storing the local 00362 blocks of the distributed matrix. LLD >= MAX(1,LOCr(M)). 00363 00364 INFO (output) INTEGER 00365 = 0: successful exit 00366 < 0: if INFO = -i, the i-th argument had an illegal value 00367 00368 Note 00369 ==== 00370 00371 If the routine can recover from an erroneous input argument, it will 00372 return an acceptable descriptor vector. For example, if LLD = 0 on 00373 input, DESC(LLD_) will contain the smallest leading dimension 00374 required to store the specified M-by-N distributed matrix, INFO 00375 will be set -9 in that case. 00376 00377 ====================================================================== 00378 00379 00380 ====================================================================== 00381 00382 SUBROUTINE 00383 PDGEADD (TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC) 00384 00385 Purpose 00386 ======= 00387 00388 PDGEADD adds a matrix to another 00389 00390 sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) 00391 00392 where 00393 00394 sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of 00395 00396 op( X ) = X or op( X ) = X'. 00397 00398 Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if TRANS = 'N', 00399 A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'T', 00400 A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'C'. 00401 00402 Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n 00403 submatrices. 00404 00405 Notes 00406 ===== 00407 00408 A description vector is associated with each 2D block-cyclicly dis- 00409 tributed matrix. This vector stores the information required to 00410 establish the mapping between a matrix entry and its corresponding 00411 process and memory location. 00412 00413 In the following comments, the character _ should be read as 00414 "of the distributed matrix". Let A be a generic term for any 2D 00415 block cyclicly distributed matrix. Its description vector is DESC_A: 00416 00417 NOTATION STORED IN EXPLANATION 00418 ---------------- --------------- ------------------------------------ 00419 DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. 00420 CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating 00421 the NPROW x NPCOL BLACS process grid 00422 A is distributed over. The context 00423 itself is global, but the handle 00424 (the integer value) may vary. 00425 M_A (global) DESCA[ M_ ] The number of rows in the distribu- 00426 ted matrix A, M_A >= 0. 00427 N_A (global) DESCA[ N_ ] The number of columns in the distri- 00428 buted matrix A, N_A >= 0. 00429 IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left 00430 block of the matrix A, IMB_A > 0. 00431 INB_A (global) DESCA[ INB_ ] The number of columns of the upper 00432 left block of the matrix A, 00433 INB_A > 0. 00434 MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- 00435 bute the last M_A-IMB_A rows of A, 00436 MB_A > 0. 00437 NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- 00438 bute the last N_A-INB_A columns of 00439 A, NB_A > 0. 00440 RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first 00441 row of the matrix A is distributed, 00442 NPROW > RSRC_A >= 0. 00443 CSRC_A (global) DESCA[ CSRC_ ] The process column over which the 00444 first column of A is distributed. 00445 NPCOL > CSRC_A >= 0. 00446 LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local 00447 array storing the local blocks of 00448 the distributed matrix A, 00449 IF( Lc( 1, N_A ) > 0 ) 00450 LLD_A >= MAX( 1, Lr( 1, M_A ) ) 00451 ELSE 00452 LLD_A >= 1. 00453 00454 Let K be the number of rows of a matrix A starting at the global in- 00455 dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows 00456 that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would 00457 receive if these K rows were distributed over NPROW processes. If K 00458 is the number of columns of a matrix A starting at the global index 00459 JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- 00460 lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if 00461 these K columns were distributed over NPCOL processes. 00462 00463 The values of Lr() and Lc() may be determined via a call to the func- 00464 tion PB_Cnumroc: 00465 Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) 00466 Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) 00467 00468 Arguments 00469 ========= 00470 00471 TRANS (global input) CHARACTER*1 00472 On entry, TRANS specifies the form of op( sub( A ) ) to be 00473 used in the matrix addition as follows: 00474 00475 TRANS = 'N' or 'n' op( sub( A ) ) = sub( A ), 00476 00477 TRANS = 'T' or 't' op( sub( A ) ) = sub( A )', 00478 00479 TRANS = 'C' or 'c' op( sub( A ) ) = sub( A )'. 00480 00481 M (global input) INTEGER 00482 On entry, M specifies the number of rows of the submatrix 00483 sub( C ) and the number of columns of the submatrix sub( A ). 00484 M must be at least zero. 00485 00486 N (global input) INTEGER 00487 On entry, N specifies the number of columns of the submatrix 00488 sub( C ) and the number of rows of the submatrix sub( A ). N 00489 must be at least zero. 00490 00491 ALPHA (global input) DOUBLE PRECISION 00492 On entry, ALPHA specifies the scalar alpha. When ALPHA is 00493 supplied as zero then the local entries of the array A 00494 corresponding to the entries of the submatrix sub( A ) need 00495 not be set on input. 00496 00497 A (local input) DOUBLE PRECISION array 00498 On entry, A is an array of dimension (LLD_A, Ka), where Ka is 00499 at least Lc( 1, JA+M-1 ). Before entry, this array contains 00500 the local entries of the matrix A. 00501 00502 IA (global input) INTEGER 00503 On entry, IA specifies A's global row index, which points to 00504 the beginning of the submatrix sub( A ). 00505 00506 JA (global input) INTEGER 00507 On entry, JA specifies A's global column index, which points 00508 to the beginning of the submatrix sub( A ). 00509 00510 DESCA (global and local input) INTEGER array 00511 On entry, DESCA is an integer array of dimension DLEN_. This 00512 is the array descriptor for the matrix A. 00513 00514 BETA (global input) DOUBLE PRECISION 00515 On entry, BETA specifies the scalar beta. When BETA is 00516 supplied as zero then the local entries of the array C 00517 corresponding to the entries of the submatrix sub( C ) need 00518 not be set on input. 00519 00520 C (local input/local output) DOUBLE PRECISION array 00521 On entry, C is an array of dimension (LLD_C, Kc), where Kc is 00522 at least Lc( 1, JC+N-1 ). Before entry, this array contains 00523 the local entries of the matrix C. 00524 On exit, the entries of this array corresponding to the local 00525 entries of the submatrix sub( C ) are overwritten by the 00526 local entries of the m by n updated submatrix. 00527 00528 IC (global input) INTEGER 00529 On entry, IC specifies C's global row index, which points to 00530 the beginning of the submatrix sub( C ). 00531 00532 JC (global input) INTEGER 00533 On entry, JC specifies C's global column index, which points 00534 to the beginning of the submatrix sub( C ). 00535 00536 DESCC (global and local input) INTEGER array 00537 On entry, DESCC is an integer array of dimension DLEN_. This 00538 is the array descriptor for the matrix C. 00539 00540 -- Written on April 1, 1998 by 00541 Antoine Petitet, University of Tennessee, Knoxville 37996, USA. 00542 00543 ====================================================================== 00544 00545 00546 ====================================================================== 00547 00548 SUBROUTINE PDGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, DESCB, INFO ) 00549 00550 Purpose 00551 ======= 00552 00553 PDGESV computes the solution to a real system of linear equations 00554 00555 sub( A ) * X = sub( B ), 00556 00557 where sub( A ) = A(IA:IA+N-1,JA:JA+N-1) is an N-by-N distributed 00558 matrix and X and sub( B ) = B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS 00559 distributed matrices. 00560 00561 The LU decomposition with partial pivoting and row interchanges is 00562 used to factor sub( A ) as sub( A ) = P * L * U, where P is a permu- 00563 tation matrix, L is unit lower triangular, and U is upper triangular. 00564 L and U are stored in sub( A ). The factored form of sub( A ) is then 00565 used to solve the system of equations sub( A ) * X = sub( B ). 00566 00567 Notes 00568 ===== 00569 00570 Each global data object is described by an associated description 00571 vector. This vector stores the information required to establish 00572 the mapping between an object element and its corresponding process 00573 and memory location. 00574 00575 Let A be a generic term for any 2D block cyclicly distributed array. 00576 Such a global array has an associated description vector DESCA. 00577 In the following comments, the character _ should be read as 00578 "of the global array". 00579 00580 NOTATION STORED IN EXPLANATION 00581 --------------- -------------- -------------------------------------- 00582 DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, 00583 DTYPE_A = 1. 00584 CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating 00585 the BLACS process grid A is distribu- 00586 ted over. The context itself is glo- 00587 bal, but the handle (the integer 00588 value) may vary. 00589 M_A (global) DESCA( M_ ) The number of rows in the global 00590 array A. 00591 N_A (global) DESCA( N_ ) The number of columns in the global 00592 array A. 00593 MB_A (global) DESCA( MB_ ) The blocking factor used to distribute 00594 the rows of the array. 00595 NB_A (global) DESCA( NB_ ) The blocking factor used to distribute 00596 the columns of the array. 00597 RSRC_A (global) DESCA( RSRC_ ) The process row over which the first 00598 row of the array A is distributed. 00599 CSRC_A (global) DESCA( CSRC_ ) The process column over which the 00600 first column of the array A is 00601 distributed. 00602 LLD_A (local) DESCA( LLD_ ) The leading dimension of the local 00603 array. LLD_A >= MAX(1,LOCr(M_A)). 00604 00605 Let K be the number of rows or columns of a distributed matrix, 00606 and assume that its process grid has dimension p x q. 00607 LOCr( K ) denotes the number of elements of K that a process 00608 would receive if K were distributed over the p processes of its 00609 process column. 00610 Similarly, LOCc( K ) denotes the number of elements of K that a 00611 process would receive if K were distributed over the q processes of 00612 its process row. 00613 The values of LOCr() and LOCc() may be determined via a call to the 00614 ScaLAPACK tool function, NUMROC: 00615 LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), 00616 LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). 00617 An upper bound for these quantities may be computed by: 00618 LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A 00619 LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A 00620 00621 This routine requires square block decomposition ( MB_A = NB_A ). 00622 00623 Arguments 00624 ========= 00625 00626 N (global input) INTEGER 00627 The number of rows and columns to be operated on, i.e. the 00628 order of the distributed submatrix sub( A ). N >= 0. 00629 00630 NRHS (global input) INTEGER 00631 The number of right hand sides, i.e., the number of columns 00632 of the distributed submatrix sub( B ). NRHS >= 0. 00633 00634 A (local input/local output) DOUBLE PRECISION pointer into the 00635 local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). 00636 On entry, the local pieces of the N-by-N distributed matrix 00637 sub( A ) to be factored. On exit, this array contains the 00638 local pieces of the factors L and U from the factorization 00639 sub( A ) = P*L*U; the unit diagonal elements of L are not 00640 stored. 00641 00642 IA (global input) INTEGER 00643 The row index in the global array A indicating the first 00644 row of sub( A ). 00645 00646 JA (global input) INTEGER 00647 The column index in the global array A indicating the 00648 first column of sub( A ). 00649 00650 DESCA (global and local input) INTEGER array of dimension DLEN_. 00651 The array descriptor for the distributed matrix A. 00652 00653 IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) 00654 This array contains the pivoting information. 00655 IPIV(i) -> The global row local row i was swapped with. 00656 This array is tied to the distributed matrix A. 00657 00658 B (local input/local output) DOUBLE PRECISION pointer into the 00659 local memory to an array of dimension 00660 (LLD_B,LOCc(JB+NRHS-1)). On entry, the right hand side 00661 distributed matrix sub( B ). On exit, if INFO = 0, sub( B ) 00662 is overwritten by the solution distributed matrix X. 00663 00664 IB (global input) INTEGER 00665 The row index in the global array B indicating the first 00666 row of sub( B ). 00667 00668 JB (global input) INTEGER 00669 The column index in the global array B indicating the 00670 first column of sub( B ). 00671 00672 DESCB (global and local input) INTEGER array of dimension DLEN_. 00673 The array descriptor for the distributed matrix B. 00674 00675 INFO (global output) INTEGER 00676 = 0: successful exit 00677 < 0: If the i-th argument is an array and the j-entry had 00678 an illegal value, then INFO = -(i*100+j), if the i-th 00679 argument is a scalar and had an illegal value, then 00680 INFO = -i. 00681 > 0: If INFO = K, U(IA+K-1,JA+K-1) is exactly zero. 00682 The factorization has been completed, but the factor U 00683 is exactly singular, so the solution could not be 00684 computed. 00685 00686 ===================================================================== 00687 00688 00689 ===================================================================== 00690 00691 SUBROUTINE PDGEMR2D( M, N, 00692 A, IA, JA, ADESC, 00693 B, IB, JB, BDESC, 00694 CTXT) 00695 Purpose 00696 ======= 00697 00698 PDGEMR2D copies a submatrix of A on a submatrix of B. 00699 A and B can have different distributions: they can be on different 00700 processor grids, they can have different blocksizes, the beginning 00701 of the area to be copied can be at a different places on A and B. 00702 00703 The parameters can be confusing when the grids of A and B are 00704 partially or completly disjoint, in the case a processor calls 00705 this routines but is either not in the A context or B context, the 00706 ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the 00707 routine recognise this situation. 00708 To summarize the rule: 00709 - If a processor is in A context, all parameters related to A must be valid. 00710 - If a processor is in B context, all parameters related to B must be valid. 00711 - ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1. 00712 - M and N must be valid for everyone. 00713 - other parameters are not examined. 00714 00715 Notes 00716 ===== 00717 00718 A description vector is associated with each 2D block-cyclicly dis- 00719 tributed matrix. This vector stores the information required to 00720 establish the mapping between a matrix entry and its corresponding 00721 process and memory location. 00722 00723 In the following comments, the character _ should be read as 00724 "of the distributed matrix". Let A be a generic term for any 2D 00725 block cyclicly distributed matrix. Its description vector is DESC_A: 00726 00727 NOTATION STORED IN EXPLANATION 00728 --------------- -------------- -------------------------------------- 00729 DT_A (global) DESCA( DT_ ) The descriptor type. 00730 CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating 00731 the BLACS process grid A is distribu- 00732 ted over. The context itself is glo- 00733 bal, but the handle (the integer 00734 value) may vary. 00735 M_A (global) DESCA( M_ ) The number of rows in the distributed 00736 matrix A. 00737 N_A (global) DESCA( N_ ) The number of columns in the distri- 00738 buted matrix A. 00739 MB_A (global) DESCA( MB_ ) The blocking factor used to distribute 00740 the rows of A. 00741 NB_A (global) DESCA( NB_ ) The blocking factor used to distribute 00742 the columns of A. 00743 RSRC_A (global) DESCA( RSRC_ ) The process row over which the first 00744 row of the matrix A is distributed. 00745 CSRC_A (global) DESCA( CSRC_ ) The process column over which the 00746 first column of A is distributed. 00747 LLD_A (local) DESCA( LLD_ ) The leading dimension of the local 00748 array storing the local blocks of the 00749 distributed matrix A. 00750 LLD_A >= MAX(1,LOCp(M_A)). 00751 Important notice 00752 ================ 00753 The parameters of the routine have changed in April 1996 00754 There is a new last argument. It must be a context englobing 00755 all processors involved in the initial and final distribution. 00756 00757 Be aware that all processors included in this 00758 context must call the redistribution routine. 00759 00760 Parameters 00761 ========== 00762 00763 00764 M (input) INTEGER. 00765 On entry, M specifies the number of rows of the 00766 submatrix to be copied. M must be at least zero. 00767 Unchanged on exit. 00768 00769 N (input) INTEGER. 00770 On entry, N specifies the number of cols of the submatrix 00771 to be redistributed.rows of B. M must be at least zero. 00772 Unchanged on exit. 00773 00774 A (input) DOUBLE PRECISION 00775 On entry, the source matrix. 00776 Unchanged on exit. 00777 00778 IA,JA (input) INTEGER 00779 On entry,the coordinates of the beginning of the submatrix 00780 of A to copy. 00781 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1, 00782 Unchanged on exit. 00783 00784 ADESC (input) A description vector (see Notes above) 00785 If the current processor is not part of the context of A 00786 the ADESC[CTXT] must be equal to -1. 00787 00788 00789 B (output) DOUBLE PRECISION 00790 On entry, the destination matrix. 00791 The portion corresponding to the defined submatrix are updated. 00792 00793 IB,JB (input) INTEGER 00794 On entry,the coordinates of the beginning of the submatrix 00795 of B that will be updated. 00796 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1, 00797 Unchanged on exit. 00798 00799 BDESC (input) B description vector (see Notes above) 00800 For processors not part of the context of B 00801 BDESC[CTXT] must be equal to -1. 00802 00803 CTXT (input) a context englobing at least all processors included 00804 in either A context or B context 00805 00806 Memory requirement : 00807 ==================== 00808 00809 for the processors belonging to grid 0, one buffer of size block 0 00810 and for the processors belonging to grid 1, also one buffer of size 00811 block 1. 00812 00813 ===================================================================== 00814 00815 }}} */ 00816 00817 // vim:fdm=marker