$extrastylesheet
dense_matrix_blas_lapack.C
Go to the documentation of this file.
00001 // The libMesh Finite Element Library.
00002 // Copyright (C) 2002-2014 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
00003 
00004 // This library is free software; you can redistribute it and/or
00005 // modify it under the terms of the GNU Lesser General Public
00006 // License as published by the Free Software Foundation; either
00007 // version 2.1 of the License, or (at your option) any later version.
00008 
00009 // This library is distributed in the hope that it will be useful,
00010 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012 // Lesser General Public License for more details.
00013 
00014 // You should have received a copy of the GNU Lesser General Public
00015 // License along with this library; if not, write to the Free Software
00016 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00017 
00018 
00019 // Local Includes
00020 #include "libmesh/dense_matrix.h"
00021 #include "libmesh/dense_vector.h"
00022 
00023 
00024 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
00025 #include "libmesh/petsc_macro.h"
00026 
00027 EXTERN_C_FOR_PETSC_BEGIN
00028 #include <petscblaslapack.h>
00029 EXTERN_C_FOR_PETSC_END
00030 #endif
00031 
00032 #if (LIBMESH_HAVE_SLEPC && LIBMESH_USE_REAL_NUMBERS)
00033 #include <slepcblaslapack.h>
00034 #endif
00035 
00036 namespace libMesh
00037 {
00038 
00039 
00040 
00041 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
00042 
00043 template<typename T>
00044 void DenseMatrix<T>::_multiply_blas(const DenseMatrixBase<T>& other,
00045                                     _BLAS_Multiply_Flag flag)
00046 {
00047   int result_size = 0;
00048 
00049   // For each case, determine the size of the final result make sure
00050   // that the inner dimensions match
00051   switch (flag)
00052     {
00053     case LEFT_MULTIPLY:
00054       {
00055         result_size = other.m() * this->n();
00056         if (other.n() == this->m())
00057           break;
00058       }
00059     case RIGHT_MULTIPLY:
00060       {
00061         result_size = other.n() * this->m();
00062         if (other.m() == this->n())
00063           break;
00064       }
00065     case LEFT_MULTIPLY_TRANSPOSE:
00066       {
00067         result_size = other.n() * this->n();
00068         if (other.m() == this->m())
00069           break;
00070       }
00071     case RIGHT_MULTIPLY_TRANSPOSE:
00072       {
00073         result_size = other.m() * this->m();
00074         if (other.n() == this->n())
00075           break;
00076       }
00077     default:
00078       libmesh_error_msg("Unknown flag selected or matrices are incompatible for multiplication.");
00079     }
00080 
00081   // For this to work, the passed arg. must actually be a DenseMatrix<T>
00082   const DenseMatrix<T>* const_that = cast_ptr< const DenseMatrix<T>* >(&other);
00083 
00084   // Also, although 'that' is logically const in this BLAS routine,
00085   // the PETSc BLAS interface does not specify that any of the inputs are
00086   // const.  To use it, I must cast away const-ness.
00087   DenseMatrix<T>* that = const_cast< DenseMatrix<T>* > (const_that);
00088 
00089   // Initialize A, B pointers for LEFT_MULTIPLY* cases
00090   DenseMatrix<T>
00091     *A = this,
00092     *B = that;
00093 
00094   // For RIGHT_MULTIPLY* cases, swap the meaning of A and B.
00095   // Here is a full table of combinations we can pass to BLASgemm, and what the answer is when finished:
00096   // pass A B   -> (Fortran) -> A^T B^T -> (C++) -> (A^T B^T)^T -> (identity) -> B A   "lt multiply"
00097   // pass B A   -> (Fortran) -> B^T A^T -> (C++) -> (B^T A^T)^T -> (identity) -> A B   "rt multiply"
00098   // pass A B^T -> (Fortran) -> A^T B   -> (C++) -> (A^T B)^T   -> (identity) -> B^T A "lt multiply t"
00099   // pass B^T A -> (Fortran) -> B A^T   -> (C++) -> (B A^T)^T   -> (identity) -> A B^T "rt multiply t"
00100   if (flag==RIGHT_MULTIPLY || flag==RIGHT_MULTIPLY_TRANSPOSE)
00101     std::swap(A,B);
00102 
00103   // transa, transb values to pass to blas
00104   char
00105     transa[] = "n",
00106     transb[] = "n";
00107 
00108   // Integer values to pass to BLAS:
00109   //
00110   // M
00111   // In Fortran, the number of rows of op(A),
00112   // In the BLAS documentation, typically known as 'M'.
00113   //
00114   // In C/C++, we set:
00115   // M = n_cols(A) if (transa='n')
00116   //     n_rows(A) if (transa='t')
00117   int M = static_cast<int>( A->n() );
00118 
00119   // N
00120   // In Fortran, the number of cols of op(B), and also the number of cols of C.
00121   // In the BLAS documentation, typically known as 'N'.
00122   //
00123   // In C/C++, we set:
00124   // N = n_rows(B) if (transb='n')
00125   //     n_cols(B) if (transb='t')
00126   int N = static_cast<int>( B->m() );
00127 
00128   // K
00129   // In Fortran, the number of cols of op(A), and also
00130   // the number of rows of op(B). In the BLAS documentation,
00131   // typically known as 'K'.
00132   //
00133   // In C/C++, we set:
00134   // K = n_rows(A) if (transa='n')
00135   //     n_cols(A) if (transa='t')
00136   int K = static_cast<int>( A->m() );
00137 
00138   // LDA (leading dimension of A). In our cases,
00139   // LDA is always the number of columns of A.
00140   int LDA = static_cast<int>( A->n() );
00141 
00142   // LDB (leading dimension of B).  In our cases,
00143   // LDB is always the number of columns of B.
00144   int LDB = static_cast<int>( B->n() );
00145 
00146   if (flag == LEFT_MULTIPLY_TRANSPOSE)
00147     {
00148       transb[0] = 't';
00149       N = static_cast<int>( B->n() );
00150     }
00151 
00152   else if (flag == RIGHT_MULTIPLY_TRANSPOSE)
00153     {
00154       transa[0] = 't';
00155       std::swap(M,K);
00156     }
00157 
00158   // LDC (leading dimension of C).  LDC is the
00159   // number of columns in the solution matrix.
00160   int LDC = M;
00161 
00162   // Scalar values to pass to BLAS
00163   //
00164   // scalar multiplying the whole product AB
00165   T alpha = 1.;
00166 
00167   // scalar multiplying C, which is the original matrix.
00168   T beta  = 0.;
00169 
00170   // Storage for the result
00171   std::vector<T> result (result_size);
00172 
00173   // Finally ready to call the BLAS
00174   BLASgemm_(transa, transb, &M, &N, &K, &alpha, &(A->_val[0]), &LDA, &(B->_val[0]), &LDB, &beta, &result[0], &LDC);
00175 
00176   // Update the relevant dimension for this matrix.
00177   switch (flag)
00178     {
00179     case LEFT_MULTIPLY:            { this->_m = other.m(); break; }
00180     case RIGHT_MULTIPLY:           { this->_n = other.n(); break; }
00181     case LEFT_MULTIPLY_TRANSPOSE:  { this->_m = other.n(); break; }
00182     case RIGHT_MULTIPLY_TRANSPOSE: { this->_n = other.m(); break; }
00183     default:
00184       libmesh_error_msg("Unknown flag selected.");
00185     }
00186 
00187   // Swap my data vector with the result
00188   this->_val.swap(result);
00189 }
00190 
00191 #else
00192 
00193 template<typename T>
00194 void DenseMatrix<T>::_multiply_blas(const DenseMatrixBase<T>& ,
00195                                     _BLAS_Multiply_Flag )
00196 {
00197   libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
00198 }
00199 
00200 #endif
00201 
00202 
00203 
00204 
00205 
00206 
00207 
00208 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
00209 
00210 template<typename T>
00211 void DenseMatrix<T>::_lu_decompose_lapack ()
00212 {
00213   // If this function was called, there better not be any
00214   // previous decomposition of the matrix.
00215   libmesh_assert_equal_to (this->_decomposition_type, NONE);
00216 
00217   // The calling sequence for dgetrf is:
00218   // dgetrf(M, N, A, lda, ipiv, info)
00219 
00220   //    M       (input) int*
00221   //            The number of rows of the matrix A.  M >= 0.
00222   // In C/C++, pass the number of *cols* of A
00223   int M = this->n();
00224 
00225   //    N       (input) int*
00226   //            The number of columns of the matrix A.  N >= 0.
00227   // In C/C++, pass the number of *rows* of A
00228   int N = this->m();
00229 
00230   //    A (input/output) double precision array, dimension (LDA,N)
00231   //      On entry, the M-by-N matrix to be factored.
00232   //      On exit, the factors L and U from the factorization
00233   //      A = P*L*U; the unit diagonal elements of L are not stored.
00234   // Here, we pass &(_val[0]).
00235 
00236   //    LDA     (input) int*
00237   //            The leading dimension of the array A.  LDA >= max(1,M).
00238   int LDA = M;
00239 
00240   //    ipiv    (output) integer array, dimension (min(m,n))
00241   //            The pivot indices; for 1 <= i <= min(m,n), row i of the
00242   //            matrix was interchanged with row IPIV(i).
00243   // Here, we pass &(_pivots[0]), a private class member used to store pivots
00244   this->_pivots.resize( std::min(M,N) );
00245 
00246   //    info    (output) int*
00247   //            = 0:  successful exit
00248   //            < 0:  if INFO = -i, the i-th argument had an illegal value
00249   //            > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
00250   //                  has been completed, but the factor U is exactly
00251   //                  singular, and division by zero will occur if it is used
00252   //                  to solve a system of equations.
00253   int INFO = 0;
00254 
00255   // Ready to call the actual factorization routine through PETSc's interface
00256   LAPACKgetrf_(&M, &N, &(this->_val[0]), &LDA, &(_pivots[0]), &INFO);
00257 
00258   // Check return value for errors
00259   if (INFO != 0)
00260     libmesh_error_msg("INFO=" << INFO << ", Error during Lapack LU factorization!");
00261 
00262   // Set the flag for LU decomposition
00263   this->_decomposition_type = LU_BLAS_LAPACK;
00264 }
00265 
00266 #else
00267 
00268 template<typename T>
00269 void DenseMatrix<T>::_lu_decompose_lapack ()
00270 {
00271   libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
00272 }
00273 
00274 #endif
00275 
00276 
00277 
00278 template<typename T>
00279 void DenseMatrix<T>::_svd_lapack (DenseVector<T>& sigma)
00280 {
00281   // The calling sequence for dgetrf is:
00282   // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
00283 
00284 
00285   //  JOBU    (input) CHARACTER*1
00286   //          Specifies options for computing all or part of the matrix U:
00287   //          = 'A':  all M columns of U are returned in array U:
00288   //          = 'S':  the first min(m,n) columns of U (the left singular
00289   //                  vectors) are returned in the array U;
00290   //          = 'O':  the first min(m,n) columns of U (the left singular
00291   //                  vectors) are overwritten on the array A;
00292   //          = 'N':  no columns of U (no left singular vectors) are
00293   //                  computed.
00294   char JOBU = 'N';
00295 
00296   //  JOBVT   (input) CHARACTER*1
00297   //          Specifies options for computing all or part of the matrix
00298   //          V**T:
00299   //          = 'A':  all N rows of V**T are returned in the array VT;
00300   //          = 'S':  the first min(m,n) rows of V**T (the right singular
00301   //                  vectors) are returned in the array VT;
00302   //          = 'O':  the first min(m,n) rows of V**T (the right singular
00303   //                  vectors) are overwritten on the array A;
00304   //          = 'N':  no rows of V**T (no right singular vectors) are
00305   //                  computed.
00306   char JOBVT = 'N';
00307 
00308   std::vector<T> sigma_val;
00309   std::vector<T> U_val;
00310   std::vector<T> VT_val;
00311 
00312   _svd_helper(JOBU, JOBVT, sigma_val, U_val, VT_val);
00313 
00314   // Load the singular values into sigma, ignore U_val and VT_val
00315   const unsigned int n_sigma_vals =
00316     cast_int<unsigned int>(sigma_val.size());
00317   sigma.resize(n_sigma_vals);
00318   for(unsigned int i=0; i<n_sigma_vals; i++)
00319     sigma(i) = sigma_val[i];
00320 
00321 }
00322 
00323 template<typename T>
00324 void DenseMatrix<T>::_svd_lapack (DenseVector<T>& sigma, DenseMatrix<T>& U, DenseMatrix<T>& VT)
00325 {
00326   // The calling sequence for dgetrf is:
00327   // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
00328 
00329 
00330   //  JOBU    (input) CHARACTER*1
00331   //          Specifies options for computing all or part of the matrix U:
00332   //          = 'A':  all M columns of U are returned in array U:
00333   //          = 'S':  the first min(m,n) columns of U (the left singular
00334   //                  vectors) are returned in the array U;
00335   //          = 'O':  the first min(m,n) columns of U (the left singular
00336   //                  vectors) are overwritten on the array A;
00337   //          = 'N':  no columns of U (no left singular vectors) are
00338   //                  computed.
00339   char JOBU = 'S';
00340 
00341   //  JOBVT   (input) CHARACTER*1
00342   //          Specifies options for computing all or part of the matrix
00343   //          V**T:
00344   //          = 'A':  all N rows of V**T are returned in the array VT;
00345   //          = 'S':  the first min(m,n) rows of V**T (the right singular
00346   //                  vectors) are returned in the array VT;
00347   //          = 'O':  the first min(m,n) rows of V**T (the right singular
00348   //                  vectors) are overwritten on the array A;
00349   //          = 'N':  no rows of V**T (no right singular vectors) are
00350   //                  computed.
00351   char JOBVT = 'S';
00352 
00353   std::vector<T> sigma_val;
00354   std::vector<T> U_val;
00355   std::vector<T> VT_val;
00356 
00357   _svd_helper(JOBU, JOBVT, sigma_val, U_val, VT_val);
00358 
00359   // Load the singular values into sigma, ignore U_val and VT_val
00360   const unsigned int n_sigma_vals =
00361     cast_int<unsigned int>(sigma_val.size());
00362   sigma.resize(n_sigma_vals);
00363   for(unsigned int i=0; i<n_sigma_vals; i++)
00364     sigma(i) = sigma_val[i];
00365 
00366   int M = this->n();
00367   int N = this->m();
00368   int min_MN = (M < N) ? M : N;
00369   U.resize(M,min_MN);
00370   for(unsigned int i=0; i<U.m(); i++)
00371     for(unsigned int j=0; j<U.n(); j++)
00372       {
00373         unsigned int index = i + j*U.n();  // Column major storage
00374         U(i,j) = U_val[index];
00375       }
00376 
00377   VT.resize(min_MN,N);
00378   for(unsigned int i=0; i<VT.m(); i++)
00379     for(unsigned int j=0; j<VT.n(); j++)
00380       {
00381         unsigned int index = i + j*U.n(); // Column major storage
00382         VT(i,j) = VT_val[index];
00383       }
00384 
00385 }
00386 
00387 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
00388 
00389 template<typename T>
00390 void DenseMatrix<T>::_svd_helper (char JOBU,
00391                                   char JOBVT,
00392                                   std::vector<T>& sigma_val,
00393                                   std::vector<T>& U_val,
00394                                   std::vector<T>& VT_val)
00395 {
00396 
00397   //    M       (input) int*
00398   //            The number of rows of the matrix A.  M >= 0.
00399   // In C/C++, pass the number of *cols* of A
00400   int M = this->n();
00401 
00402   //    N       (input) int*
00403   //            The number of columns of the matrix A.  N >= 0.
00404   // In C/C++, pass the number of *rows* of A
00405   int N = this->m();
00406 
00407   int min_MN = (M < N) ? M : N;
00408   int max_MN = (M > N) ? M : N;
00409 
00410   //  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
00411   //          On entry, the M-by-N matrix A.
00412   //          On exit,
00413   //          if JOBU = 'O',  A is overwritten with the first min(m,n)
00414   //                          columns of U (the left singular vectors,
00415   //                          stored columnwise);
00416   //          if JOBVT = 'O', A is overwritten with the first min(m,n)
00417   //                          rows of V**T (the right singular vectors,
00418   //                          stored rowwise);
00419   //          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
00420   //                          are destroyed.
00421   // Here, we pass &(_val[0]).
00422 
00423   //    LDA     (input) int*
00424   //            The leading dimension of the array A.  LDA >= max(1,M).
00425   int LDA = M;
00426 
00427   //  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
00428   //          The singular values of A, sorted so that S(i) >= S(i+1).
00429   sigma_val.resize( min_MN );
00430 
00431   //  LDU     (input) INTEGER
00432   //          The leading dimension of the array U.  LDU >= 1; if
00433   //          JOBU = 'S' or 'A', LDU >= M.
00434   int LDU = M;
00435 
00436   //  U       (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
00437   //          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
00438   //          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
00439   //          if JOBU = 'S', U contains the first min(m,n) columns of U
00440   //          (the left singular vectors, stored columnwise);
00441   //          if JOBU = 'N' or 'O', U is not referenced.
00442   U_val.resize( LDU*M );
00443 
00444   //  LDVT    (input) INTEGER
00445   //          The leading dimension of the array VT.  LDVT >= 1; if
00446   //          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
00447   int LDVT = N;
00448 
00449   //  VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
00450   //          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
00451   //          V**T;
00452   //          if JOBVT = 'S', VT contains the first min(m,n) rows of
00453   //          V**T (the right singular vectors, stored rowwise);
00454   //          if JOBVT = 'N' or 'O', VT is not referenced.
00455   VT_val.resize( LDVT*N );
00456 
00457   //  LWORK   (input) INTEGER
00458   //          The dimension of the array WORK.
00459   //          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
00460   //          For good performance, LWORK should generally be larger.
00461   //
00462   //          If LWORK = -1, then a workspace query is assumed; the routine
00463   //          only calculates the optimal size of the WORK array, returns
00464   //          this value as the first entry of the WORK array, and no error
00465   //          message related to LWORK is issued by XERBLA.
00466   int larger = (3*min_MN+max_MN > 5*min_MN) ? 3*min_MN+max_MN : 5*min_MN;
00467   int LWORK  = (larger > 1) ? larger : 1;
00468 
00469 
00470   //  WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
00471   //          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
00472   //          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
00473   //          superdiagonal elements of an upper bidiagonal matrix B
00474   //          whose diagonal is in S (not necessarily sorted). B
00475   //          satisfies A = U * B * VT, so it has the same singular values
00476   //          as A, and singular vectors related by U and VT.
00477   std::vector<T> WORK( LWORK );
00478 
00479   //  INFO    (output) INTEGER
00480   //          = 0:  successful exit.
00481   //          < 0:  if INFO = -i, the i-th argument had an illegal value.
00482   //          > 0:  if DBDSQR did not converge, INFO specifies how many
00483   //                superdiagonals of an intermediate bidiagonal form B
00484   //                did not converge to zero. See the description of WORK
00485   //                above for details.
00486   int INFO = 0;
00487 
00488   // Ready to call the actual factorization routine through PETSc's interface
00489   LAPACKgesvd_(&JOBU, &JOBVT, &M, &N, &(_val[0]), &LDA, &(sigma_val[0]), &(U_val[0]),
00490                &LDU, &(VT_val[0]), &LDVT, &(WORK[0]), &LWORK, &INFO);
00491 
00492   // Check return value for errors
00493   if (INFO != 0)
00494     libmesh_error_msg("INFO=" << INFO << ", Error during Lapack SVD calculation!");
00495 }
00496 
00497 
00498 #else
00499 
00500 template<typename T>
00501 void DenseMatrix<T>::_svd_helper (char,
00502                                   char,
00503                                   std::vector<T>&,
00504                                   std::vector<T>&,
00505                                   std::vector<T>&)
00506 {
00507   libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
00508 }
00509 
00510 #endif
00511 
00512 
00513 
00514 
00515 #if (LIBMESH_HAVE_SLEPC && LIBMESH_USE_REAL_NUMBERS)
00516 
00517 template<typename T>
00518 void DenseMatrix<T>::_evd_lapack (DenseVector<T>& lambda_real, DenseVector<T>& lambda_imag)
00519 {
00520   // The calling sequence for dgeev is:
00521   // DGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, VL, LDVL, VR,
00522   //         LDVR, ILO, IHI, SCALE, ABNRM, RCONDE, RCONDV, WORK, LWORK, IWORK, INFO )
00523 
00524 
00525   //  BALANC  (input) CHARACTER*1
00526   //          Indicates how the input matrix should be diagonally scaled
00527   //          and/or permuted to improve the conditioning of its
00528   //          eigenvalues.
00529   //          = 'N': Do not diagonally scale or permute;
00530   char BALANC = 'N';
00531 
00532   //  JOBVL   (input) CHARACTER*1
00533   //          = 'N': left eigenvectors of A are not computed;
00534   //          = 'V': left eigenvectors of A are computed.
00535   char JOBVL = 'N';
00536 
00537   //  JOBVR   (input) CHARACTER*1
00538   //          = 'N': right eigenvectors of A are not computed;
00539   //          = 'V': right eigenvectors of A are computed.
00540   char JOBVR = 'N';
00541 
00542   //  SENSE   (input) CHARACTER*1
00543   //          Determines which reciprocal condition numbers are computed.
00544   //          = 'N': None are computed;
00545   //          = 'E': Computed for eigenvalues only;
00546   //          = 'V': Computed for right eigenvectors only;
00547   //          = 'B': Computed for eigenvalues and right eigenvectors.
00548   char SENSE = 'N';
00549 
00550   //    N       (input) int*
00551   //            The number of rows/cols of the matrix A.  N >= 0.
00552   libmesh_assert( this->m() == this->n() );
00553   int N = this->m();
00554 
00555   //  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
00556   //          On entry, the N-by-N matrix A.
00557   //          On exit, A has been overwritten.
00558   // Here, we pass &(_val[0]).
00559 
00560   //    LDA     (input) int*
00561   //            The leading dimension of the array A.  LDA >= max(1,N).
00562   int LDA = N;
00563 
00564   //  WR      (output) DOUBLE PRECISION array, dimension (N)
00565   //  WI      (output) DOUBLE PRECISION array, dimension (N)
00566   //          WR and WI contain the real and imaginary parts,
00567   //          respectively, of the computed eigenvalues.  Complex
00568   //          conjugate pairs of eigenvalues appear consecutively
00569   //          with the eigenvalue having the positive imaginary part
00570   //          first.
00571   lambda_real.resize(N);
00572   lambda_imag.resize(N);
00573 
00574   //  VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)
00575   //          If JOBVL = 'V', the left eigenvectors u(j) are stored one
00576   //          after another in the columns of VL, in the same order
00577   //          as their eigenvalues.
00578   //          If JOBVL = 'N', VL is not referenced.
00579   //          If the j-th eigenvalue is real, then u(j) = VL(:,j),
00580   //          the j-th column of VL.
00581   //          If the j-th and (j+1)-st eigenvalues form a complex
00582   //          conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
00583   //          u(j+1) = VL(:,j) - i*VL(:,j+1).
00584   // Just set to NULL here.
00585 
00586   //  LDVL    (input) INTEGER
00587   //          The leading dimension of the array VL.  LDVL >= 1; if
00588   //          JOBVL = 'V', LDVL >= N.
00589   int LDVL = 1;
00590 
00591   //  VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)
00592   //          If JOBVR = 'V', the right eigenvectors v(j) are stored one
00593   //          after another in the columns of VR, in the same order
00594   //          as their eigenvalues.
00595   //          If JOBVR = 'N', VR is not referenced.
00596   //          If the j-th eigenvalue is real, then v(j) = VR(:,j),
00597   //          the j-th column of VR.
00598   //          If the j-th and (j+1)-st eigenvalues form a complex
00599   //          conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
00600   //          v(j+1) = VR(:,j) - i*VR(:,j+1).
00601   // Just set to NULL here.
00602 
00603   //  LDVR    (input) INTEGER
00604   //          The leading dimension of the array VR.  LDVR >= 1; if
00605   //          JOBVR = 'V', LDVR >= N.
00606   int LDVR = 1;
00607 
00608   // Outputs (unused)
00609   int ILO = 0;
00610   int IHI = 0;
00611   std::vector<T> SCALE(N);
00612   T ABNRM;
00613   std::vector<T> RCONDE(N);
00614   std::vector<T> RCONDV(N);
00615 
00616   //  WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
00617   //          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
00618   //
00619   //  LWORK   (input) INTEGER
00620   //          The dimension of the array WORK.
00621   int LWORK = 3*N;
00622   std::vector<T> WORK( LWORK );
00623 
00624   //  IWORK   (workspace) INTEGER array, dimension (2*N-2)
00625   //          If SENSE = 'N' or 'E', not referenced.
00626   // Just set to NULL
00627 
00628 
00629   //  INFO    (output) INTEGER
00630   //          = 0:  successful exit
00631   //          < 0:  if INFO = -i, the i-th argument had an illegal value.
00632   //          > 0:  if INFO = i, the QR algorithm failed to compute all the
00633   //                eigenvalues, and no eigenvectors or condition numbers
00634   //                have been computed; elements 1:ILO-1 and i+1:N of WR
00635   //                and WI contain eigenvalues which have converged.
00636   int INFO = 0;
00637 
00638   // Get references to raw data
00639   std::vector<T>& lambda_real_val = lambda_real.get_values();
00640   std::vector<T>& lambda_imag_val = lambda_imag.get_values();
00641 
00642   // Ready to call the actual factorization routine through SLEPc's interface
00643   LAPACKgeevx_( &BALANC, &JOBVL, &JOBVR, &SENSE, &N, &(_val[0]), &LDA, &lambda_real_val[0],
00644                 &lambda_imag_val[0], NULL, &LDVL, NULL, &LDVR, &ILO, &IHI, &SCALE[0], &ABNRM,
00645                 &RCONDE[0], &RCONDV[0], &WORK[0], &LWORK, NULL, &INFO );
00646 
00647   // Check return value for errors
00648   if (INFO != 0)
00649     libmesh_error_msg("INFO=" << INFO << ", Error during Lapack eigenvalue calculation!");
00650 }
00651 
00652 #else
00653 
00654 template<typename T>
00655 void DenseMatrix<T>::_evd_lapack (DenseVector<T>& , DenseVector<T>& )
00656 {
00657   libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
00658 }
00659 
00660 #endif
00661 
00662 
00663 
00664 
00665 
00666 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
00667 
00668 template<typename T>
00669 void DenseMatrix<T>::_lu_back_substitute_lapack (const DenseVector<T>& b,
00670                                                  DenseVector<T>& x)
00671 {
00672   // The calling sequence for getrs is:
00673   // dgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO)
00674 
00675   //    trans   (input) char*
00676   //            'n' for no tranpose, 't' for transpose
00677   char TRANS[] = "t";
00678 
00679   //    N       (input) int*
00680   //            The order of the matrix A.  N >= 0.
00681   int N = this->m();
00682 
00683 
00684   //    NRHS    (input) int*
00685   //            The number of right hand sides, i.e., the number of columns
00686   //            of the matrix B.  NRHS >= 0.
00687   int NRHS = 1;
00688 
00689   //    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
00690   //            The factors L and U from the factorization A = P*L*U
00691   //            as computed by dgetrf.
00692   // Here, we pass &(_val[0])
00693 
00694   //    LDA     (input) int*
00695   //            The leading dimension of the array A.  LDA >= max(1,N).
00696   int LDA = N;
00697 
00698   //    ipiv    (input) int array, dimension (N)
00699   //            The pivot indices from DGETRF; for 1<=i<=N, row i of the
00700   //            matrix was interchanged with row IPIV(i).
00701   // Here, we pass &(_pivots[0]) which was computed in _lu_decompose_lapack
00702 
00703   //    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
00704   //            On entry, the right hand side matrix B.
00705   //            On exit, the solution matrix X.
00706   // Here, we pass a copy of the rhs vector's data array in x, so that the
00707   // passed right-hand side b is unmodified.  I don't see a way around this
00708   // copy if we want to maintain an unmodified rhs in LibMesh.
00709   x = b;
00710   std::vector<T>& x_vec = x.get_values();
00711 
00712   // We can avoid the copy if we don't care about overwriting the RHS: just
00713   // pass b to the Lapack routine and then swap with x before exiting
00714   // std::vector<T>& x_vec = b.get_values();
00715 
00716   //    LDB     (input) int*
00717   //            The leading dimension of the array B.  LDB >= max(1,N).
00718   int LDB = N;
00719 
00720   //    INFO    (output) int*
00721   //            = 0:  successful exit
00722   //            < 0:  if INFO = -i, the i-th argument had an illegal value
00723   int INFO = 0;
00724 
00725   // Finally, ready to call the Lapack getrs function
00726   LAPACKgetrs_(TRANS, &N, &NRHS, &(_val[0]), &LDA, &(_pivots[0]), &(x_vec[0]), &LDB, &INFO);
00727 
00728   // Check return value for errors
00729   if (INFO != 0)
00730     libmesh_error_msg("INFO=" << INFO << ", Error during Lapack LU solve!");
00731 
00732   // Don't do this if you already made a copy of b above
00733   // Swap b and x.  The solution will then be in x, and whatever was originally
00734   // in x, maybe garbage, maybe nothing, will be in b.
00735   // FIXME: Rewrite the LU and Cholesky solves to just take one input, and overwrite
00736   // the input.  This *should* make user code simpler, as they don't have to create
00737   // an extra vector just to pass it in to the solve function!
00738   // b.swap(x);
00739 }
00740 
00741 #else
00742 
00743 template<typename T>
00744 void DenseMatrix<T>::_lu_back_substitute_lapack (const DenseVector<T>& ,
00745                                                  DenseVector<T>& )
00746 {
00747   libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
00748 }
00749 
00750 #endif
00751 
00752 
00753 
00754 
00755 
00756 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
00757 
00758 template<typename T>
00759 void DenseMatrix<T>::_matvec_blas(T alpha, T beta,
00760                                   DenseVector<T>& dest,
00761                                   const DenseVector<T>& arg,
00762                                   bool trans) const
00763 {
00764   // Ensure that dest and arg sizes are compatible
00765   if (!trans)
00766     {
00767       // dest  ~ A     * arg
00768       // (mx1)   (mxn) * (nx1)
00769       if ((dest.size() != this->m()) || (arg.size() != this->n()))
00770         libmesh_error_msg("Improper input argument sizes!");
00771     }
00772 
00773   else // trans == true
00774     {
00775       // Ensure that dest and arg are proper size
00776       // dest  ~ A^T   * arg
00777       // (nx1)   (nxm) * (mx1)
00778       if ((dest.size() != this->n()) || (arg.size() != this->m()))
00779         libmesh_error_msg("Improper input argument sizes!");
00780     }
00781 
00782   // Calling sequence for dgemv:
00783   //
00784   // dgemv(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
00785 
00786   //   TRANS  - CHARACTER*1, 't' for transpose, 'n' for non-transpose multiply
00787   // We store everything in row-major order, so pass the transpose flag for
00788   // non-transposed matvecs and the 'n' flag for transposed matvecs
00789   char TRANS[] = "t";
00790   if (trans)
00791     TRANS[0] = 'n';
00792 
00793   //   M      - INTEGER.
00794   //            On entry, M specifies the number of rows of the matrix A.
00795   // In C/C++, pass the number of *cols* of A
00796   int M = this->n();
00797 
00798   //   N      - INTEGER.
00799   //            On entry, N specifies the number of columns of the matrix A.
00800   // In C/C++, pass the number of *rows* of A
00801   int N = this->m();
00802 
00803   //   ALPHA  - DOUBLE PRECISION.
00804   // The scalar constant passed to this function
00805 
00806   //   A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
00807   //            Before entry, the leading m by n part of the array A must
00808   //            contain the matrix of coefficients.
00809   // The matrix, *this.  Note that _matvec_blas is called from
00810   // a const function, vector_mult(), and so we have made this function const
00811   // as well.  Since BLAS knows nothing about const, we have to cast it away
00812   // now.
00813   DenseMatrix<T>& a_ref = const_cast< DenseMatrix<T>& > ( *this );
00814   std::vector<T>& a = a_ref.get_values();
00815 
00816   //   LDA    - INTEGER.
00817   //            On entry, LDA specifies the first dimension of A as declared
00818   //            in the calling (sub) program. LDA must be at least
00819   //            max( 1, m ).
00820   int LDA = M;
00821 
00822   //   X      - DOUBLE PRECISION array of DIMENSION at least
00823   //            ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
00824   //            and at least
00825   //            ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
00826   //            Before entry, the incremented array X must contain the
00827   //            vector x.
00828   // Here, we must cast away the const-ness of "arg" since BLAS knows
00829   // nothing about const
00830   DenseVector<T>& x_ref = const_cast< DenseVector<T>& > ( arg );
00831   std::vector<T>& x = x_ref.get_values();
00832 
00833   //   INCX   - INTEGER.
00834   //            On entry, INCX specifies the increment for the elements of
00835   //            X. INCX must not be zero.
00836   int INCX = 1;
00837 
00838   //   BETA   - DOUBLE PRECISION.
00839   //            On entry, BETA specifies the scalar beta. When BETA is
00840   //            supplied as zero then Y need not be set on input.
00841   // The second scalar constant passed to this function
00842 
00843   //   Y      - DOUBLE PRECISION array of DIMENSION at least
00844   //            ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
00845   //            and at least
00846   //            ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
00847   //            Before entry with BETA non-zero, the incremented array Y
00848   //            must contain the vector y. On exit, Y is overwritten by the
00849   //            updated vector y.
00850   // The input vector "dest"
00851   std::vector<T>& y = dest.get_values();
00852 
00853   //   INCY   - INTEGER.
00854   //            On entry, INCY specifies the increment for the elements of
00855   //            Y. INCY must not be zero.
00856   int INCY = 1;
00857 
00858   // Finally, ready to call the BLAS function
00859   BLASgemv_(TRANS, &M, &N, &alpha, &(a[0]), &LDA, &(x[0]), &INCX, &beta, &(y[0]), &INCY);
00860 }
00861 
00862 
00863 #else
00864 
00865 
00866 template<typename T>
00867 void DenseMatrix<T>::_matvec_blas(T , T,
00868                                   DenseVector<T>& ,
00869                                   const DenseVector<T>&,
00870                                   bool ) const
00871 {
00872   libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
00873 }
00874 
00875 
00876 #endif
00877 
00878 
00879 //--------------------------------------------------------------
00880 // Explicit instantiations
00881 template void DenseMatrix<Real>::_multiply_blas(const DenseMatrixBase<Real>&, _BLAS_Multiply_Flag);
00882 template void DenseMatrix<Real>::_lu_decompose_lapack();
00883 template void DenseMatrix<Real>::_lu_back_substitute_lapack(const DenseVector<Real>& ,
00884                                                             DenseVector<Real>&);
00885 template void DenseMatrix<Real>::_matvec_blas(Real, Real,
00886                                               DenseVector<Real>& ,
00887                                               const DenseVector<Real>&,
00888                                               bool ) const;
00889 template void DenseMatrix<Real>::_svd_lapack(DenseVector<Real>&);
00890 template void DenseMatrix<Real>::_svd_lapack(DenseVector<Real>&, DenseMatrix<Real>&, DenseMatrix<Real>&);
00891 template void DenseMatrix<Real>::_svd_helper (char, char, std::vector<Real>&,
00892                                               std::vector<Real>&,
00893                                               std::vector<Real>& );
00894 template void DenseMatrix<Real>::_evd_lapack(DenseVector<Real>&, DenseVector<Real>&);
00895 
00896 #if !(LIBMESH_USE_REAL_NUMBERS)
00897 template void DenseMatrix<Number>::_multiply_blas(const DenseMatrixBase<Number>&, _BLAS_Multiply_Flag);
00898 template void DenseMatrix<Number>::_lu_decompose_lapack();
00899 template void DenseMatrix<Number>::_lu_back_substitute_lapack(const DenseVector<Number>& ,
00900                                                               DenseVector<Number>&);
00901 template void DenseMatrix<Number>::_matvec_blas(Number, Number,
00902                                                 DenseVector<Number>& ,
00903                                                 const DenseVector<Number>&,
00904                                                 bool ) const;
00905 template void DenseMatrix<Number>::_svd_lapack(DenseVector<Number>&);
00906 template void DenseMatrix<Number>::_svd_lapack(DenseVector<Number>&, DenseMatrix<Number>&, DenseMatrix<Number>&);
00907 template void DenseMatrix<Number>::_svd_helper (char, char, std::vector<Number>&,
00908                                                 std::vector<Number>&,
00909                                                 std::vector<Number>& );
00910 template void DenseMatrix<Number>::_evd_lapack(DenseVector<Number>&, DenseVector<Number>&);
00911 
00912 #endif
00913 
00914 } // namespace libMesh