$extrastylesheet
00001 // The libMesh Finite Element Library. 00002 // Copyright (C) 2002-2014 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner 00003 00004 // This library is free software; you can redistribute it and/or 00005 // modify it under the terms of the GNU Lesser General Public 00006 // License as published by the Free Software Foundation; either 00007 // version 2.1 of the License, or (at your option) any later version. 00008 00009 // This library is distributed in the hope that it will be useful, 00010 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00012 // Lesser General Public License for more details. 00013 00014 // You should have received a copy of the GNU Lesser General Public 00015 // License along with this library; if not, write to the Free Software 00016 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00017 00018 00019 // Local Includes 00020 #include "libmesh/dense_matrix.h" 00021 #include "libmesh/dense_vector.h" 00022 00023 00024 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS) 00025 #include "libmesh/petsc_macro.h" 00026 00027 EXTERN_C_FOR_PETSC_BEGIN 00028 #include <petscblaslapack.h> 00029 EXTERN_C_FOR_PETSC_END 00030 #endif 00031 00032 #if (LIBMESH_HAVE_SLEPC && LIBMESH_USE_REAL_NUMBERS) 00033 #include <slepcblaslapack.h> 00034 #endif 00035 00036 namespace libMesh 00037 { 00038 00039 00040 00041 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS) 00042 00043 template<typename T> 00044 void DenseMatrix<T>::_multiply_blas(const DenseMatrixBase<T>& other, 00045 _BLAS_Multiply_Flag flag) 00046 { 00047 int result_size = 0; 00048 00049 // For each case, determine the size of the final result make sure 00050 // that the inner dimensions match 00051 switch (flag) 00052 { 00053 case LEFT_MULTIPLY: 00054 { 00055 result_size = other.m() * this->n(); 00056 if (other.n() == this->m()) 00057 break; 00058 } 00059 case RIGHT_MULTIPLY: 00060 { 00061 result_size = other.n() * this->m(); 00062 if (other.m() == this->n()) 00063 break; 00064 } 00065 case LEFT_MULTIPLY_TRANSPOSE: 00066 { 00067 result_size = other.n() * this->n(); 00068 if (other.m() == this->m()) 00069 break; 00070 } 00071 case RIGHT_MULTIPLY_TRANSPOSE: 00072 { 00073 result_size = other.m() * this->m(); 00074 if (other.n() == this->n()) 00075 break; 00076 } 00077 default: 00078 libmesh_error_msg("Unknown flag selected or matrices are incompatible for multiplication."); 00079 } 00080 00081 // For this to work, the passed arg. must actually be a DenseMatrix<T> 00082 const DenseMatrix<T>* const_that = cast_ptr< const DenseMatrix<T>* >(&other); 00083 00084 // Also, although 'that' is logically const in this BLAS routine, 00085 // the PETSc BLAS interface does not specify that any of the inputs are 00086 // const. To use it, I must cast away const-ness. 00087 DenseMatrix<T>* that = const_cast< DenseMatrix<T>* > (const_that); 00088 00089 // Initialize A, B pointers for LEFT_MULTIPLY* cases 00090 DenseMatrix<T> 00091 *A = this, 00092 *B = that; 00093 00094 // For RIGHT_MULTIPLY* cases, swap the meaning of A and B. 00095 // Here is a full table of combinations we can pass to BLASgemm, and what the answer is when finished: 00096 // pass A B -> (Fortran) -> A^T B^T -> (C++) -> (A^T B^T)^T -> (identity) -> B A "lt multiply" 00097 // pass B A -> (Fortran) -> B^T A^T -> (C++) -> (B^T A^T)^T -> (identity) -> A B "rt multiply" 00098 // pass A B^T -> (Fortran) -> A^T B -> (C++) -> (A^T B)^T -> (identity) -> B^T A "lt multiply t" 00099 // pass B^T A -> (Fortran) -> B A^T -> (C++) -> (B A^T)^T -> (identity) -> A B^T "rt multiply t" 00100 if (flag==RIGHT_MULTIPLY || flag==RIGHT_MULTIPLY_TRANSPOSE) 00101 std::swap(A,B); 00102 00103 // transa, transb values to pass to blas 00104 char 00105 transa[] = "n", 00106 transb[] = "n"; 00107 00108 // Integer values to pass to BLAS: 00109 // 00110 // M 00111 // In Fortran, the number of rows of op(A), 00112 // In the BLAS documentation, typically known as 'M'. 00113 // 00114 // In C/C++, we set: 00115 // M = n_cols(A) if (transa='n') 00116 // n_rows(A) if (transa='t') 00117 int M = static_cast<int>( A->n() ); 00118 00119 // N 00120 // In Fortran, the number of cols of op(B), and also the number of cols of C. 00121 // In the BLAS documentation, typically known as 'N'. 00122 // 00123 // In C/C++, we set: 00124 // N = n_rows(B) if (transb='n') 00125 // n_cols(B) if (transb='t') 00126 int N = static_cast<int>( B->m() ); 00127 00128 // K 00129 // In Fortran, the number of cols of op(A), and also 00130 // the number of rows of op(B). In the BLAS documentation, 00131 // typically known as 'K'. 00132 // 00133 // In C/C++, we set: 00134 // K = n_rows(A) if (transa='n') 00135 // n_cols(A) if (transa='t') 00136 int K = static_cast<int>( A->m() ); 00137 00138 // LDA (leading dimension of A). In our cases, 00139 // LDA is always the number of columns of A. 00140 int LDA = static_cast<int>( A->n() ); 00141 00142 // LDB (leading dimension of B). In our cases, 00143 // LDB is always the number of columns of B. 00144 int LDB = static_cast<int>( B->n() ); 00145 00146 if (flag == LEFT_MULTIPLY_TRANSPOSE) 00147 { 00148 transb[0] = 't'; 00149 N = static_cast<int>( B->n() ); 00150 } 00151 00152 else if (flag == RIGHT_MULTIPLY_TRANSPOSE) 00153 { 00154 transa[0] = 't'; 00155 std::swap(M,K); 00156 } 00157 00158 // LDC (leading dimension of C). LDC is the 00159 // number of columns in the solution matrix. 00160 int LDC = M; 00161 00162 // Scalar values to pass to BLAS 00163 // 00164 // scalar multiplying the whole product AB 00165 T alpha = 1.; 00166 00167 // scalar multiplying C, which is the original matrix. 00168 T beta = 0.; 00169 00170 // Storage for the result 00171 std::vector<T> result (result_size); 00172 00173 // Finally ready to call the BLAS 00174 BLASgemm_(transa, transb, &M, &N, &K, &alpha, &(A->_val[0]), &LDA, &(B->_val[0]), &LDB, &beta, &result[0], &LDC); 00175 00176 // Update the relevant dimension for this matrix. 00177 switch (flag) 00178 { 00179 case LEFT_MULTIPLY: { this->_m = other.m(); break; } 00180 case RIGHT_MULTIPLY: { this->_n = other.n(); break; } 00181 case LEFT_MULTIPLY_TRANSPOSE: { this->_m = other.n(); break; } 00182 case RIGHT_MULTIPLY_TRANSPOSE: { this->_n = other.m(); break; } 00183 default: 00184 libmesh_error_msg("Unknown flag selected."); 00185 } 00186 00187 // Swap my data vector with the result 00188 this->_val.swap(result); 00189 } 00190 00191 #else 00192 00193 template<typename T> 00194 void DenseMatrix<T>::_multiply_blas(const DenseMatrixBase<T>& , 00195 _BLAS_Multiply_Flag ) 00196 { 00197 libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!"); 00198 } 00199 00200 #endif 00201 00202 00203 00204 00205 00206 00207 00208 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS) 00209 00210 template<typename T> 00211 void DenseMatrix<T>::_lu_decompose_lapack () 00212 { 00213 // If this function was called, there better not be any 00214 // previous decomposition of the matrix. 00215 libmesh_assert_equal_to (this->_decomposition_type, NONE); 00216 00217 // The calling sequence for dgetrf is: 00218 // dgetrf(M, N, A, lda, ipiv, info) 00219 00220 // M (input) int* 00221 // The number of rows of the matrix A. M >= 0. 00222 // In C/C++, pass the number of *cols* of A 00223 int M = this->n(); 00224 00225 // N (input) int* 00226 // The number of columns of the matrix A. N >= 0. 00227 // In C/C++, pass the number of *rows* of A 00228 int N = this->m(); 00229 00230 // A (input/output) double precision array, dimension (LDA,N) 00231 // On entry, the M-by-N matrix to be factored. 00232 // On exit, the factors L and U from the factorization 00233 // A = P*L*U; the unit diagonal elements of L are not stored. 00234 // Here, we pass &(_val[0]). 00235 00236 // LDA (input) int* 00237 // The leading dimension of the array A. LDA >= max(1,M). 00238 int LDA = M; 00239 00240 // ipiv (output) integer array, dimension (min(m,n)) 00241 // The pivot indices; for 1 <= i <= min(m,n), row i of the 00242 // matrix was interchanged with row IPIV(i). 00243 // Here, we pass &(_pivots[0]), a private class member used to store pivots 00244 this->_pivots.resize( std::min(M,N) ); 00245 00246 // info (output) int* 00247 // = 0: successful exit 00248 // < 0: if INFO = -i, the i-th argument had an illegal value 00249 // > 0: if INFO = i, U(i,i) is exactly zero. The factorization 00250 // has been completed, but the factor U is exactly 00251 // singular, and division by zero will occur if it is used 00252 // to solve a system of equations. 00253 int INFO = 0; 00254 00255 // Ready to call the actual factorization routine through PETSc's interface 00256 LAPACKgetrf_(&M, &N, &(this->_val[0]), &LDA, &(_pivots[0]), &INFO); 00257 00258 // Check return value for errors 00259 if (INFO != 0) 00260 libmesh_error_msg("INFO=" << INFO << ", Error during Lapack LU factorization!"); 00261 00262 // Set the flag for LU decomposition 00263 this->_decomposition_type = LU_BLAS_LAPACK; 00264 } 00265 00266 #else 00267 00268 template<typename T> 00269 void DenseMatrix<T>::_lu_decompose_lapack () 00270 { 00271 libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!"); 00272 } 00273 00274 #endif 00275 00276 00277 00278 template<typename T> 00279 void DenseMatrix<T>::_svd_lapack (DenseVector<T>& sigma) 00280 { 00281 // The calling sequence for dgetrf is: 00282 // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO ) 00283 00284 00285 // JOBU (input) CHARACTER*1 00286 // Specifies options for computing all or part of the matrix U: 00287 // = 'A': all M columns of U are returned in array U: 00288 // = 'S': the first min(m,n) columns of U (the left singular 00289 // vectors) are returned in the array U; 00290 // = 'O': the first min(m,n) columns of U (the left singular 00291 // vectors) are overwritten on the array A; 00292 // = 'N': no columns of U (no left singular vectors) are 00293 // computed. 00294 char JOBU = 'N'; 00295 00296 // JOBVT (input) CHARACTER*1 00297 // Specifies options for computing all or part of the matrix 00298 // V**T: 00299 // = 'A': all N rows of V**T are returned in the array VT; 00300 // = 'S': the first min(m,n) rows of V**T (the right singular 00301 // vectors) are returned in the array VT; 00302 // = 'O': the first min(m,n) rows of V**T (the right singular 00303 // vectors) are overwritten on the array A; 00304 // = 'N': no rows of V**T (no right singular vectors) are 00305 // computed. 00306 char JOBVT = 'N'; 00307 00308 std::vector<T> sigma_val; 00309 std::vector<T> U_val; 00310 std::vector<T> VT_val; 00311 00312 _svd_helper(JOBU, JOBVT, sigma_val, U_val, VT_val); 00313 00314 // Load the singular values into sigma, ignore U_val and VT_val 00315 const unsigned int n_sigma_vals = 00316 cast_int<unsigned int>(sigma_val.size()); 00317 sigma.resize(n_sigma_vals); 00318 for(unsigned int i=0; i<n_sigma_vals; i++) 00319 sigma(i) = sigma_val[i]; 00320 00321 } 00322 00323 template<typename T> 00324 void DenseMatrix<T>::_svd_lapack (DenseVector<T>& sigma, DenseMatrix<T>& U, DenseMatrix<T>& VT) 00325 { 00326 // The calling sequence for dgetrf is: 00327 // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO ) 00328 00329 00330 // JOBU (input) CHARACTER*1 00331 // Specifies options for computing all or part of the matrix U: 00332 // = 'A': all M columns of U are returned in array U: 00333 // = 'S': the first min(m,n) columns of U (the left singular 00334 // vectors) are returned in the array U; 00335 // = 'O': the first min(m,n) columns of U (the left singular 00336 // vectors) are overwritten on the array A; 00337 // = 'N': no columns of U (no left singular vectors) are 00338 // computed. 00339 char JOBU = 'S'; 00340 00341 // JOBVT (input) CHARACTER*1 00342 // Specifies options for computing all or part of the matrix 00343 // V**T: 00344 // = 'A': all N rows of V**T are returned in the array VT; 00345 // = 'S': the first min(m,n) rows of V**T (the right singular 00346 // vectors) are returned in the array VT; 00347 // = 'O': the first min(m,n) rows of V**T (the right singular 00348 // vectors) are overwritten on the array A; 00349 // = 'N': no rows of V**T (no right singular vectors) are 00350 // computed. 00351 char JOBVT = 'S'; 00352 00353 std::vector<T> sigma_val; 00354 std::vector<T> U_val; 00355 std::vector<T> VT_val; 00356 00357 _svd_helper(JOBU, JOBVT, sigma_val, U_val, VT_val); 00358 00359 // Load the singular values into sigma, ignore U_val and VT_val 00360 const unsigned int n_sigma_vals = 00361 cast_int<unsigned int>(sigma_val.size()); 00362 sigma.resize(n_sigma_vals); 00363 for(unsigned int i=0; i<n_sigma_vals; i++) 00364 sigma(i) = sigma_val[i]; 00365 00366 int M = this->n(); 00367 int N = this->m(); 00368 int min_MN = (M < N) ? M : N; 00369 U.resize(M,min_MN); 00370 for(unsigned int i=0; i<U.m(); i++) 00371 for(unsigned int j=0; j<U.n(); j++) 00372 { 00373 unsigned int index = i + j*U.n(); // Column major storage 00374 U(i,j) = U_val[index]; 00375 } 00376 00377 VT.resize(min_MN,N); 00378 for(unsigned int i=0; i<VT.m(); i++) 00379 for(unsigned int j=0; j<VT.n(); j++) 00380 { 00381 unsigned int index = i + j*U.n(); // Column major storage 00382 VT(i,j) = VT_val[index]; 00383 } 00384 00385 } 00386 00387 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS) 00388 00389 template<typename T> 00390 void DenseMatrix<T>::_svd_helper (char JOBU, 00391 char JOBVT, 00392 std::vector<T>& sigma_val, 00393 std::vector<T>& U_val, 00394 std::vector<T>& VT_val) 00395 { 00396 00397 // M (input) int* 00398 // The number of rows of the matrix A. M >= 0. 00399 // In C/C++, pass the number of *cols* of A 00400 int M = this->n(); 00401 00402 // N (input) int* 00403 // The number of columns of the matrix A. N >= 0. 00404 // In C/C++, pass the number of *rows* of A 00405 int N = this->m(); 00406 00407 int min_MN = (M < N) ? M : N; 00408 int max_MN = (M > N) ? M : N; 00409 00410 // A (input/output) DOUBLE PRECISION array, dimension (LDA,N) 00411 // On entry, the M-by-N matrix A. 00412 // On exit, 00413 // if JOBU = 'O', A is overwritten with the first min(m,n) 00414 // columns of U (the left singular vectors, 00415 // stored columnwise); 00416 // if JOBVT = 'O', A is overwritten with the first min(m,n) 00417 // rows of V**T (the right singular vectors, 00418 // stored rowwise); 00419 // if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A 00420 // are destroyed. 00421 // Here, we pass &(_val[0]). 00422 00423 // LDA (input) int* 00424 // The leading dimension of the array A. LDA >= max(1,M). 00425 int LDA = M; 00426 00427 // S (output) DOUBLE PRECISION array, dimension (min(M,N)) 00428 // The singular values of A, sorted so that S(i) >= S(i+1). 00429 sigma_val.resize( min_MN ); 00430 00431 // LDU (input) INTEGER 00432 // The leading dimension of the array U. LDU >= 1; if 00433 // JOBU = 'S' or 'A', LDU >= M. 00434 int LDU = M; 00435 00436 // U (output) DOUBLE PRECISION array, dimension (LDU,UCOL) 00437 // (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. 00438 // If JOBU = 'A', U contains the M-by-M orthogonal matrix U; 00439 // if JOBU = 'S', U contains the first min(m,n) columns of U 00440 // (the left singular vectors, stored columnwise); 00441 // if JOBU = 'N' or 'O', U is not referenced. 00442 U_val.resize( LDU*M ); 00443 00444 // LDVT (input) INTEGER 00445 // The leading dimension of the array VT. LDVT >= 1; if 00446 // JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). 00447 int LDVT = N; 00448 00449 // VT (output) DOUBLE PRECISION array, dimension (LDVT,N) 00450 // If JOBVT = 'A', VT contains the N-by-N orthogonal matrix 00451 // V**T; 00452 // if JOBVT = 'S', VT contains the first min(m,n) rows of 00453 // V**T (the right singular vectors, stored rowwise); 00454 // if JOBVT = 'N' or 'O', VT is not referenced. 00455 VT_val.resize( LDVT*N ); 00456 00457 // LWORK (input) INTEGER 00458 // The dimension of the array WORK. 00459 // LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)). 00460 // For good performance, LWORK should generally be larger. 00461 // 00462 // If LWORK = -1, then a workspace query is assumed; the routine 00463 // only calculates the optimal size of the WORK array, returns 00464 // this value as the first entry of the WORK array, and no error 00465 // message related to LWORK is issued by XERBLA. 00466 int larger = (3*min_MN+max_MN > 5*min_MN) ? 3*min_MN+max_MN : 5*min_MN; 00467 int LWORK = (larger > 1) ? larger : 1; 00468 00469 00470 // WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) 00471 // On exit, if INFO = 0, WORK(1) returns the optimal LWORK; 00472 // if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged 00473 // superdiagonal elements of an upper bidiagonal matrix B 00474 // whose diagonal is in S (not necessarily sorted). B 00475 // satisfies A = U * B * VT, so it has the same singular values 00476 // as A, and singular vectors related by U and VT. 00477 std::vector<T> WORK( LWORK ); 00478 00479 // INFO (output) INTEGER 00480 // = 0: successful exit. 00481 // < 0: if INFO = -i, the i-th argument had an illegal value. 00482 // > 0: if DBDSQR did not converge, INFO specifies how many 00483 // superdiagonals of an intermediate bidiagonal form B 00484 // did not converge to zero. See the description of WORK 00485 // above for details. 00486 int INFO = 0; 00487 00488 // Ready to call the actual factorization routine through PETSc's interface 00489 LAPACKgesvd_(&JOBU, &JOBVT, &M, &N, &(_val[0]), &LDA, &(sigma_val[0]), &(U_val[0]), 00490 &LDU, &(VT_val[0]), &LDVT, &(WORK[0]), &LWORK, &INFO); 00491 00492 // Check return value for errors 00493 if (INFO != 0) 00494 libmesh_error_msg("INFO=" << INFO << ", Error during Lapack SVD calculation!"); 00495 } 00496 00497 00498 #else 00499 00500 template<typename T> 00501 void DenseMatrix<T>::_svd_helper (char, 00502 char, 00503 std::vector<T>&, 00504 std::vector<T>&, 00505 std::vector<T>&) 00506 { 00507 libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!"); 00508 } 00509 00510 #endif 00511 00512 00513 00514 00515 #if (LIBMESH_HAVE_SLEPC && LIBMESH_USE_REAL_NUMBERS) 00516 00517 template<typename T> 00518 void DenseMatrix<T>::_evd_lapack (DenseVector<T>& lambda_real, DenseVector<T>& lambda_imag) 00519 { 00520 // The calling sequence for dgeev is: 00521 // DGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, VL, LDVL, VR, 00522 // LDVR, ILO, IHI, SCALE, ABNRM, RCONDE, RCONDV, WORK, LWORK, IWORK, INFO ) 00523 00524 00525 // BALANC (input) CHARACTER*1 00526 // Indicates how the input matrix should be diagonally scaled 00527 // and/or permuted to improve the conditioning of its 00528 // eigenvalues. 00529 // = 'N': Do not diagonally scale or permute; 00530 char BALANC = 'N'; 00531 00532 // JOBVL (input) CHARACTER*1 00533 // = 'N': left eigenvectors of A are not computed; 00534 // = 'V': left eigenvectors of A are computed. 00535 char JOBVL = 'N'; 00536 00537 // JOBVR (input) CHARACTER*1 00538 // = 'N': right eigenvectors of A are not computed; 00539 // = 'V': right eigenvectors of A are computed. 00540 char JOBVR = 'N'; 00541 00542 // SENSE (input) CHARACTER*1 00543 // Determines which reciprocal condition numbers are computed. 00544 // = 'N': None are computed; 00545 // = 'E': Computed for eigenvalues only; 00546 // = 'V': Computed for right eigenvectors only; 00547 // = 'B': Computed for eigenvalues and right eigenvectors. 00548 char SENSE = 'N'; 00549 00550 // N (input) int* 00551 // The number of rows/cols of the matrix A. N >= 0. 00552 libmesh_assert( this->m() == this->n() ); 00553 int N = this->m(); 00554 00555 // A (input/output) DOUBLE PRECISION array, dimension (LDA,N) 00556 // On entry, the N-by-N matrix A. 00557 // On exit, A has been overwritten. 00558 // Here, we pass &(_val[0]). 00559 00560 // LDA (input) int* 00561 // The leading dimension of the array A. LDA >= max(1,N). 00562 int LDA = N; 00563 00564 // WR (output) DOUBLE PRECISION array, dimension (N) 00565 // WI (output) DOUBLE PRECISION array, dimension (N) 00566 // WR and WI contain the real and imaginary parts, 00567 // respectively, of the computed eigenvalues. Complex 00568 // conjugate pairs of eigenvalues appear consecutively 00569 // with the eigenvalue having the positive imaginary part 00570 // first. 00571 lambda_real.resize(N); 00572 lambda_imag.resize(N); 00573 00574 // VL (output) DOUBLE PRECISION array, dimension (LDVL,N) 00575 // If JOBVL = 'V', the left eigenvectors u(j) are stored one 00576 // after another in the columns of VL, in the same order 00577 // as their eigenvalues. 00578 // If JOBVL = 'N', VL is not referenced. 00579 // If the j-th eigenvalue is real, then u(j) = VL(:,j), 00580 // the j-th column of VL. 00581 // If the j-th and (j+1)-st eigenvalues form a complex 00582 // conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and 00583 // u(j+1) = VL(:,j) - i*VL(:,j+1). 00584 // Just set to NULL here. 00585 00586 // LDVL (input) INTEGER 00587 // The leading dimension of the array VL. LDVL >= 1; if 00588 // JOBVL = 'V', LDVL >= N. 00589 int LDVL = 1; 00590 00591 // VR (output) DOUBLE PRECISION array, dimension (LDVR,N) 00592 // If JOBVR = 'V', the right eigenvectors v(j) are stored one 00593 // after another in the columns of VR, in the same order 00594 // as their eigenvalues. 00595 // If JOBVR = 'N', VR is not referenced. 00596 // If the j-th eigenvalue is real, then v(j) = VR(:,j), 00597 // the j-th column of VR. 00598 // If the j-th and (j+1)-st eigenvalues form a complex 00599 // conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and 00600 // v(j+1) = VR(:,j) - i*VR(:,j+1). 00601 // Just set to NULL here. 00602 00603 // LDVR (input) INTEGER 00604 // The leading dimension of the array VR. LDVR >= 1; if 00605 // JOBVR = 'V', LDVR >= N. 00606 int LDVR = 1; 00607 00608 // Outputs (unused) 00609 int ILO = 0; 00610 int IHI = 0; 00611 std::vector<T> SCALE(N); 00612 T ABNRM; 00613 std::vector<T> RCONDE(N); 00614 std::vector<T> RCONDV(N); 00615 00616 // WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) 00617 // On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 00618 // 00619 // LWORK (input) INTEGER 00620 // The dimension of the array WORK. 00621 int LWORK = 3*N; 00622 std::vector<T> WORK( LWORK ); 00623 00624 // IWORK (workspace) INTEGER array, dimension (2*N-2) 00625 // If SENSE = 'N' or 'E', not referenced. 00626 // Just set to NULL 00627 00628 00629 // INFO (output) INTEGER 00630 // = 0: successful exit 00631 // < 0: if INFO = -i, the i-th argument had an illegal value. 00632 // > 0: if INFO = i, the QR algorithm failed to compute all the 00633 // eigenvalues, and no eigenvectors or condition numbers 00634 // have been computed; elements 1:ILO-1 and i+1:N of WR 00635 // and WI contain eigenvalues which have converged. 00636 int INFO = 0; 00637 00638 // Get references to raw data 00639 std::vector<T>& lambda_real_val = lambda_real.get_values(); 00640 std::vector<T>& lambda_imag_val = lambda_imag.get_values(); 00641 00642 // Ready to call the actual factorization routine through SLEPc's interface 00643 LAPACKgeevx_( &BALANC, &JOBVL, &JOBVR, &SENSE, &N, &(_val[0]), &LDA, &lambda_real_val[0], 00644 &lambda_imag_val[0], NULL, &LDVL, NULL, &LDVR, &ILO, &IHI, &SCALE[0], &ABNRM, 00645 &RCONDE[0], &RCONDV[0], &WORK[0], &LWORK, NULL, &INFO ); 00646 00647 // Check return value for errors 00648 if (INFO != 0) 00649 libmesh_error_msg("INFO=" << INFO << ", Error during Lapack eigenvalue calculation!"); 00650 } 00651 00652 #else 00653 00654 template<typename T> 00655 void DenseMatrix<T>::_evd_lapack (DenseVector<T>& , DenseVector<T>& ) 00656 { 00657 libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!"); 00658 } 00659 00660 #endif 00661 00662 00663 00664 00665 00666 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS) 00667 00668 template<typename T> 00669 void DenseMatrix<T>::_lu_back_substitute_lapack (const DenseVector<T>& b, 00670 DenseVector<T>& x) 00671 { 00672 // The calling sequence for getrs is: 00673 // dgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO) 00674 00675 // trans (input) char* 00676 // 'n' for no tranpose, 't' for transpose 00677 char TRANS[] = "t"; 00678 00679 // N (input) int* 00680 // The order of the matrix A. N >= 0. 00681 int N = this->m(); 00682 00683 00684 // NRHS (input) int* 00685 // The number of right hand sides, i.e., the number of columns 00686 // of the matrix B. NRHS >= 0. 00687 int NRHS = 1; 00688 00689 // A (input) DOUBLE PRECISION array, dimension (LDA,N) 00690 // The factors L and U from the factorization A = P*L*U 00691 // as computed by dgetrf. 00692 // Here, we pass &(_val[0]) 00693 00694 // LDA (input) int* 00695 // The leading dimension of the array A. LDA >= max(1,N). 00696 int LDA = N; 00697 00698 // ipiv (input) int array, dimension (N) 00699 // The pivot indices from DGETRF; for 1<=i<=N, row i of the 00700 // matrix was interchanged with row IPIV(i). 00701 // Here, we pass &(_pivots[0]) which was computed in _lu_decompose_lapack 00702 00703 // B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) 00704 // On entry, the right hand side matrix B. 00705 // On exit, the solution matrix X. 00706 // Here, we pass a copy of the rhs vector's data array in x, so that the 00707 // passed right-hand side b is unmodified. I don't see a way around this 00708 // copy if we want to maintain an unmodified rhs in LibMesh. 00709 x = b; 00710 std::vector<T>& x_vec = x.get_values(); 00711 00712 // We can avoid the copy if we don't care about overwriting the RHS: just 00713 // pass b to the Lapack routine and then swap with x before exiting 00714 // std::vector<T>& x_vec = b.get_values(); 00715 00716 // LDB (input) int* 00717 // The leading dimension of the array B. LDB >= max(1,N). 00718 int LDB = N; 00719 00720 // INFO (output) int* 00721 // = 0: successful exit 00722 // < 0: if INFO = -i, the i-th argument had an illegal value 00723 int INFO = 0; 00724 00725 // Finally, ready to call the Lapack getrs function 00726 LAPACKgetrs_(TRANS, &N, &NRHS, &(_val[0]), &LDA, &(_pivots[0]), &(x_vec[0]), &LDB, &INFO); 00727 00728 // Check return value for errors 00729 if (INFO != 0) 00730 libmesh_error_msg("INFO=" << INFO << ", Error during Lapack LU solve!"); 00731 00732 // Don't do this if you already made a copy of b above 00733 // Swap b and x. The solution will then be in x, and whatever was originally 00734 // in x, maybe garbage, maybe nothing, will be in b. 00735 // FIXME: Rewrite the LU and Cholesky solves to just take one input, and overwrite 00736 // the input. This *should* make user code simpler, as they don't have to create 00737 // an extra vector just to pass it in to the solve function! 00738 // b.swap(x); 00739 } 00740 00741 #else 00742 00743 template<typename T> 00744 void DenseMatrix<T>::_lu_back_substitute_lapack (const DenseVector<T>& , 00745 DenseVector<T>& ) 00746 { 00747 libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!"); 00748 } 00749 00750 #endif 00751 00752 00753 00754 00755 00756 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS) 00757 00758 template<typename T> 00759 void DenseMatrix<T>::_matvec_blas(T alpha, T beta, 00760 DenseVector<T>& dest, 00761 const DenseVector<T>& arg, 00762 bool trans) const 00763 { 00764 // Ensure that dest and arg sizes are compatible 00765 if (!trans) 00766 { 00767 // dest ~ A * arg 00768 // (mx1) (mxn) * (nx1) 00769 if ((dest.size() != this->m()) || (arg.size() != this->n())) 00770 libmesh_error_msg("Improper input argument sizes!"); 00771 } 00772 00773 else // trans == true 00774 { 00775 // Ensure that dest and arg are proper size 00776 // dest ~ A^T * arg 00777 // (nx1) (nxm) * (mx1) 00778 if ((dest.size() != this->n()) || (arg.size() != this->m())) 00779 libmesh_error_msg("Improper input argument sizes!"); 00780 } 00781 00782 // Calling sequence for dgemv: 00783 // 00784 // dgemv(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) 00785 00786 // TRANS - CHARACTER*1, 't' for transpose, 'n' for non-transpose multiply 00787 // We store everything in row-major order, so pass the transpose flag for 00788 // non-transposed matvecs and the 'n' flag for transposed matvecs 00789 char TRANS[] = "t"; 00790 if (trans) 00791 TRANS[0] = 'n'; 00792 00793 // M - INTEGER. 00794 // On entry, M specifies the number of rows of the matrix A. 00795 // In C/C++, pass the number of *cols* of A 00796 int M = this->n(); 00797 00798 // N - INTEGER. 00799 // On entry, N specifies the number of columns of the matrix A. 00800 // In C/C++, pass the number of *rows* of A 00801 int N = this->m(); 00802 00803 // ALPHA - DOUBLE PRECISION. 00804 // The scalar constant passed to this function 00805 00806 // A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). 00807 // Before entry, the leading m by n part of the array A must 00808 // contain the matrix of coefficients. 00809 // The matrix, *this. Note that _matvec_blas is called from 00810 // a const function, vector_mult(), and so we have made this function const 00811 // as well. Since BLAS knows nothing about const, we have to cast it away 00812 // now. 00813 DenseMatrix<T>& a_ref = const_cast< DenseMatrix<T>& > ( *this ); 00814 std::vector<T>& a = a_ref.get_values(); 00815 00816 // LDA - INTEGER. 00817 // On entry, LDA specifies the first dimension of A as declared 00818 // in the calling (sub) program. LDA must be at least 00819 // max( 1, m ). 00820 int LDA = M; 00821 00822 // X - DOUBLE PRECISION array of DIMENSION at least 00823 // ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' 00824 // and at least 00825 // ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. 00826 // Before entry, the incremented array X must contain the 00827 // vector x. 00828 // Here, we must cast away the const-ness of "arg" since BLAS knows 00829 // nothing about const 00830 DenseVector<T>& x_ref = const_cast< DenseVector<T>& > ( arg ); 00831 std::vector<T>& x = x_ref.get_values(); 00832 00833 // INCX - INTEGER. 00834 // On entry, INCX specifies the increment for the elements of 00835 // X. INCX must not be zero. 00836 int INCX = 1; 00837 00838 // BETA - DOUBLE PRECISION. 00839 // On entry, BETA specifies the scalar beta. When BETA is 00840 // supplied as zero then Y need not be set on input. 00841 // The second scalar constant passed to this function 00842 00843 // Y - DOUBLE PRECISION array of DIMENSION at least 00844 // ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' 00845 // and at least 00846 // ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. 00847 // Before entry with BETA non-zero, the incremented array Y 00848 // must contain the vector y. On exit, Y is overwritten by the 00849 // updated vector y. 00850 // The input vector "dest" 00851 std::vector<T>& y = dest.get_values(); 00852 00853 // INCY - INTEGER. 00854 // On entry, INCY specifies the increment for the elements of 00855 // Y. INCY must not be zero. 00856 int INCY = 1; 00857 00858 // Finally, ready to call the BLAS function 00859 BLASgemv_(TRANS, &M, &N, &alpha, &(a[0]), &LDA, &(x[0]), &INCX, &beta, &(y[0]), &INCY); 00860 } 00861 00862 00863 #else 00864 00865 00866 template<typename T> 00867 void DenseMatrix<T>::_matvec_blas(T , T, 00868 DenseVector<T>& , 00869 const DenseVector<T>&, 00870 bool ) const 00871 { 00872 libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!"); 00873 } 00874 00875 00876 #endif 00877 00878 00879 //-------------------------------------------------------------- 00880 // Explicit instantiations 00881 template void DenseMatrix<Real>::_multiply_blas(const DenseMatrixBase<Real>&, _BLAS_Multiply_Flag); 00882 template void DenseMatrix<Real>::_lu_decompose_lapack(); 00883 template void DenseMatrix<Real>::_lu_back_substitute_lapack(const DenseVector<Real>& , 00884 DenseVector<Real>&); 00885 template void DenseMatrix<Real>::_matvec_blas(Real, Real, 00886 DenseVector<Real>& , 00887 const DenseVector<Real>&, 00888 bool ) const; 00889 template void DenseMatrix<Real>::_svd_lapack(DenseVector<Real>&); 00890 template void DenseMatrix<Real>::_svd_lapack(DenseVector<Real>&, DenseMatrix<Real>&, DenseMatrix<Real>&); 00891 template void DenseMatrix<Real>::_svd_helper (char, char, std::vector<Real>&, 00892 std::vector<Real>&, 00893 std::vector<Real>& ); 00894 template void DenseMatrix<Real>::_evd_lapack(DenseVector<Real>&, DenseVector<Real>&); 00895 00896 #if !(LIBMESH_USE_REAL_NUMBERS) 00897 template void DenseMatrix<Number>::_multiply_blas(const DenseMatrixBase<Number>&, _BLAS_Multiply_Flag); 00898 template void DenseMatrix<Number>::_lu_decompose_lapack(); 00899 template void DenseMatrix<Number>::_lu_back_substitute_lapack(const DenseVector<Number>& , 00900 DenseVector<Number>&); 00901 template void DenseMatrix<Number>::_matvec_blas(Number, Number, 00902 DenseVector<Number>& , 00903 const DenseVector<Number>&, 00904 bool ) const; 00905 template void DenseMatrix<Number>::_svd_lapack(DenseVector<Number>&); 00906 template void DenseMatrix<Number>::_svd_lapack(DenseVector<Number>&, DenseMatrix<Number>&, DenseMatrix<Number>&); 00907 template void DenseMatrix<Number>::_svd_helper (char, char, std::vector<Number>&, 00908 std::vector<Number>&, 00909 std::vector<Number>& ); 00910 template void DenseMatrix<Number>::_evd_lapack(DenseVector<Number>&, DenseVector<Number>&); 00911 00912 #endif 00913 00914 } // namespace libMesh