|
libflame
revision_anchor
|
Functions | |
| FLA_Error | FLA_Bsvd_ext_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Svd_type jobu, FLA_Obj U, FLA_Svd_type jobv, FLA_Obj V, FLA_Bool apply_Uh2C, FLA_Obj C, dim_t b_alg) |
| FLA_Error | FLA_Bsvd_ext_ops_var1 (int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, float *buff_C, int rs_C, int cs_C, int b_alg) |
| FLA_Error | FLA_Bsvd_ext_opd_var1 (int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, double *buff_C, int rs_C, int cs_C, int b_alg) |
| FLA_Error | FLA_Bsvd_ext_opc_var1 (int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, scomplex *buff_C, int rs_C, int cs_C, int b_alg) |
| FLA_Error | FLA_Bsvd_ext_opz_var1 (int m_d, int m_U, int m_V, int m_C, int n_C, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, dcomplex *buff_C, int rs_C, int cs_C, int b_alg) |
| FLA_Error FLA_Bsvd_ext_opc_var1 | ( | int | m_d, |
| int | m_U, | ||
| int | m_V, | ||
| int | m_C, | ||
| int | n_C, | ||
| int | n_GH, | ||
| int | n_iter_max, | ||
| float * | buff_d, | ||
| int | inc_d, | ||
| float * | buff_e, | ||
| int | inc_e, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_H, | ||
| int | rs_H, | ||
| int | cs_H, | ||
| scomplex * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| scomplex * | buff_V, | ||
| int | rs_V, | ||
| int | cs_V, | ||
| scomplex * | buff_C, | ||
| int | rs_C, | ||
| int | cs_C, | ||
| int | b_alg | ||
| ) |
References bl1_c1(), bl1_csetm(), bl1_csscalv(), bl1_s0(), bl1_sm1(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blc_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), and FLA_Mach_params_ops().
Referenced by FLA_Bsvd_ext_opt_var1().
{
scomplex one = bl1_c1();
float rzero = bl1_s0();
int maxitr = 6;
float eps;
float tolmul;
float tol;
float thresh;
scomplex* G;
scomplex* H;
float* d1;
float* e1;
int r_val;
int done;
int m_GH_sweep_max;
int ij_begin;
int ijTL, ijBR;
int m_A11;
int n_iter_perf;
int n_UV_apply;
int total_deflations;
int n_deflations;
int n_iter_prev;
int n_iter_perf_sweep_max;
// Compute some convergence constants.
eps = FLA_Mach_params_ops( FLA_MACH_EPS );
tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) );
FLA_Bsvd_compute_tol_thresh_ops( m_d,
tolmul,
maxitr,
buff_d, inc_d,
buff_e, inc_e,
&tol,
&thresh );
// Initialize our completion flag.
done = FALSE;
// Initialize a counter that holds the maximum number of rows of G
// and H that we would need to initialize for the next sweep.
m_GH_sweep_max = m_d - 1;
// Initialize a counter for the total number of iterations performed.
n_iter_prev = 0;
// Iterate until the matrix has completely deflated.
for ( total_deflations = 0; done != TRUE; )
{
// Initialize G and H to contain only identity rotations.
bl1_csetm( m_GH_sweep_max,
n_GH,
&one,
buff_G, rs_G, cs_G );
bl1_csetm( m_GH_sweep_max,
n_GH,
&one,
buff_H, rs_H, cs_H );
// Keep track of the maximum number of iterations performed in the
// current sweep. This is used when applying the sweep's Givens
// rotations.
n_iter_perf_sweep_max = 0;
// Perform a sweep: Move through the matrix and perform a bidiagonal
// SVD on each non-zero submatrix that is encountered. During the
// first time through, ijTL will be 0 and ijBR will be m_d - 1.
for ( ij_begin = 0; ij_begin < m_d; )
{
// Search for the first submatrix along the diagonal that is
// bounded by zeroes (or endpoints of the matrix). If no
// submatrix is found (ie: if the entire superdiagonal is zero
// then FLA_FAILURE is returned. This function also inspects
// superdiagonal elements for proximity to zero. If a given
// element is close enough to zero, then it is deemed
// converged and manually set to zero.
r_val = FLA_Bsvd_find_submatrix_ops( m_d,
ij_begin,
buff_d, inc_d,
buff_e, inc_e,
&ijTL,
&ijBR );
// Verify that a submatrix was found. If one was not found,
// then we are done with the current sweep. Furthermore, if
// a submatrix was not found AND we began our search at the
// beginning of the matrix (ie: ij_begin == 0), then the
// matrix has completely deflated and so we are done with
// Francis step iteration.
if ( r_val == FLA_FAILURE )
{
if ( ij_begin == 0 )
{
done = TRUE;
}
// Break out of the current sweep so we can apply the last
// remaining Givens rotations.
break;
}
// If we got this far, then:
// (a) ijTL refers to the index of the first non-zero
// superdiagonal along the diagonal, and
// (b) ijBR refers to either:
// - the first zero element that occurs after ijTL, or
// - the the last diagonal element.
// Note that ijTL and ijBR also correspond to the first and
// last diagonal elements of the submatrix of interest. Thus,
// we may compute the dimension of this submatrix as:
m_A11 = ijBR - ijTL + 1;
// Adjust ij_begin, which gets us ready for the next submatrix
// search in the current sweep.
ij_begin = ijBR + 1;
// Index to the submatrices upon which we will operate.
d1 = buff_d + ijTL * inc_d;
e1 = buff_e + ijTL * inc_e;
G = buff_G + ijTL * rs_G;
H = buff_H + ijTL * rs_H;
// Search for a batch of singular values, recursing on deflated
// subproblems whenever a split occurs. Iteration continues as
// long as
// (a) there is still matrix left to operate on, and
// (b) the number of iterations performed in this batch is
// less than n_G.
// If/when either of the two above conditions fails to hold,
// the function returns.
n_deflations = FLA_Bsvd_iteracc_v_ops_var1( m_A11,
n_GH,
ijTL,
tol,
thresh,
d1, inc_d,
e1, inc_e,
G, rs_G, cs_G,
H, rs_H, cs_H,
&n_iter_perf );
// Record the number of deflations that were observed.
total_deflations += n_deflations;
// Update the maximum number of iterations performed in the
// current sweep.
n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
// Store the most recent value of ijBR in m_G_sweep_max.
// When the sweep is done, this value will contain the minimum
// number of rows of G and H we can apply and safely include all
// non-identity rotations that were computed during the
// singular value searches.
m_GH_sweep_max = ijBR;
// Make sure we haven't exceeded our maximum iteration count.
if ( n_iter_prev >= n_iter_max * m_d )
{
FLA_Abort();
//return FLA_FAILURE;
}
}
// The sweep is complete. Now we must apply the Givens rotations
// that were accumulated during the sweep.
// Recall that the number of columns of U and V to which we apply
// rotations is one more than the number of rotations.
n_UV_apply = m_GH_sweep_max + 1;
// Apply the Givens rotations. Note that we only apply k sets of
// rotations, where k = n_iter_perf_sweep_max. Also note that we only
// apply to n_UV_apply columns of U and V since this is the most we
// need to touch given the most recent value stored to m_GH_sweep_max.
if ( buff_U != NULL ) // ( rs_U == 0 && cs_U == 0 ) // if U is requested
FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max,
m_U,
n_UV_apply,
buff_G, rs_G, cs_G,
buff_U, rs_U, cs_U,
b_alg );
if ( buff_V != NULL ) // ( rs_V == 0 && cs_V == 0 ) // if V is requested
FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max,
m_V,
n_UV_apply,
buff_H, rs_H, cs_H,
buff_V, rs_V, cs_V,
b_alg );
// lf with non-flipped cs/rs m/n should be used;
// but the same operation can be achieved by using rf
// ( U^H C )^H = C^H U where U is G_0 G_1 G_2 .... G_{k-1}
if ( buff_C != NULL ) // ( rs_C == 0 && cs_C == 0 ) // if C is requested, flip
{
FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max,
n_C,
m_C,
buff_G, rs_G, cs_G,
buff_C, cs_C, rs_C,
b_alg );
}
// Increment the total number of iterations previously performed.
n_iter_prev += n_iter_perf_sweep_max;
}
// Make all the singular values positive.
{
int i;
float minus_one = bl1_sm1();
for ( i = 0; i < m_d; ++i )
{
if ( buff_d[ (i )*inc_d ] < rzero )
{
buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
// Scale the right singular vectors.
if ( buff_V != NULL )
bl1_csscalv( BLIS1_NO_CONJUGATE,
m_V,
&minus_one,
buff_V + (i )*cs_V, rs_V );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_ext_opd_var1 | ( | int | m_d, |
| int | m_U, | ||
| int | m_V, | ||
| int | m_C, | ||
| int | n_C, | ||
| int | n_GH, | ||
| int | n_iter_max, | ||
| double * | buff_d, | ||
| int | inc_d, | ||
| double * | buff_e, | ||
| int | inc_e, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_H, | ||
| int | rs_H, | ||
| int | cs_H, | ||
| double * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| double * | buff_V, | ||
| int | rs_V, | ||
| int | cs_V, | ||
| double * | buff_C, | ||
| int | rs_C, | ||
| int | cs_C, | ||
| int | b_alg | ||
| ) |
References bl1_d0(), bl1_dm1(), bl1_dscalv(), bl1_z1(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_ext_opt_var1().
{
dcomplex one = bl1_z1();
double rzero = bl1_d0();
int maxitr = 6;
double eps;
double tolmul;
double tol;
double thresh;
dcomplex* G;
dcomplex* H;
double* d1;
double* e1;
int r_val;
int done;
int m_GH_sweep_max;
int ij_begin;
int ijTL, ijBR;
int m_A11;
int n_iter_perf;
int n_UV_apply;
int total_deflations;
int n_deflations;
int n_iter_prev;
int n_iter_perf_sweep_max;
// Compute some convergence constants.
eps = FLA_Mach_params_opd( FLA_MACH_EPS );
tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
FLA_Bsvd_compute_tol_thresh_opd( m_d,
tolmul,
maxitr,
buff_d, inc_d,
buff_e, inc_e,
&tol,
&thresh );
// Initialize our completion flag.
done = FALSE;
// Initialize a counter that holds the maximum number of rows of G
// and H that we would need to initialize for the next sweep.
m_GH_sweep_max = m_d - 1;
// Initialize a counter for the total number of iterations performed.
n_iter_prev = 0;
// Iterate until the matrix has completely deflated.
for ( total_deflations = 0; done != TRUE; )
{
// Initialize G and H to contain only identity rotations.
bl1_zsetm( m_GH_sweep_max,
n_GH,
&one,
buff_G, rs_G, cs_G );
bl1_zsetm( m_GH_sweep_max,
n_GH,
&one,
buff_H, rs_H, cs_H );
// Keep track of the maximum number of iterations performed in the
// current sweep. This is used when applying the sweep's Givens
// rotations.
n_iter_perf_sweep_max = 0;
// Perform a sweep: Move through the matrix and perform a bidiagonal
// SVD on each non-zero submatrix that is encountered. During the
// first time through, ijTL will be 0 and ijBR will be m_d - 1.
for ( ij_begin = 0; ij_begin < m_d; )
{
// Search for the first submatrix along the diagonal that is
// bounded by zeroes (or endpoints of the matrix). If no
// submatrix is found (ie: if the entire superdiagonal is zero
// then FLA_FAILURE is returned. This function also inspects
// superdiagonal elements for proximity to zero. If a given
// element is close enough to zero, then it is deemed
// converged and manually set to zero.
r_val = FLA_Bsvd_find_submatrix_opd( m_d,
ij_begin,
buff_d, inc_d,
buff_e, inc_e,
&ijTL,
&ijBR );
// Verify that a submatrix was found. If one was not found,
// then we are done with the current sweep. Furthermore, if
// a submatrix was not found AND we began our search at the
// beginning of the matrix (ie: ij_begin == 0), then the
// matrix has completely deflated and so we are done with
// Francis step iteration.
if ( r_val == FLA_FAILURE )
{
if ( ij_begin == 0 )
{
done = TRUE;
}
// Break out of the current sweep so we can apply the last
// remaining Givens rotations.
break;
}
// If we got this far, then:
// (a) ijTL refers to the index of the first non-zero
// superdiagonal along the diagonal, and
// (b) ijBR refers to either:
// - the first zero element that occurs after ijTL, or
// - the the last diagonal element.
// Note that ijTL and ijBR also correspond to the first and
// last diagonal elements of the submatrix of interest. Thus,
// we may compute the dimension of this submatrix as:
m_A11 = ijBR - ijTL + 1;
// Adjust ij_begin, which gets us ready for the next submatrix
// search in the current sweep.
ij_begin = ijBR + 1;
// Index to the submatrices upon which we will operate.
d1 = buff_d + ijTL * inc_d;
e1 = buff_e + ijTL * inc_e;
G = buff_G + ijTL * rs_G;
H = buff_H + ijTL * rs_H;
// Search for a batch of singular values, recursing on deflated
// subproblems whenever a split occurs. Iteration continues as
// long as
// (a) there is still matrix left to operate on, and
// (b) the number of iterations performed in this batch is
// less than n_G.
// If/when either of the two above conditions fails to hold,
// the function returns.
n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
n_GH,
ijTL,
tol,
thresh,
d1, inc_d,
e1, inc_e,
G, rs_G, cs_G,
H, rs_H, cs_H,
&n_iter_perf );
// Record the number of deflations that were observed.
total_deflations += n_deflations;
// Update the maximum number of iterations performed in the
// current sweep.
n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
// Store the most recent value of ijBR in m_G_sweep_max.
// When the sweep is done, this value will contain the minimum
// number of rows of G and H we can apply and safely include all
// non-identity rotations that were computed during the
// singular value searches.
m_GH_sweep_max = ijBR;
// Make sure we haven't exceeded our maximum iteration count.
if ( n_iter_prev >= n_iter_max * m_d )
{
FLA_Abort();
//return FLA_FAILURE;
}
}
// The sweep is complete. Now we must apply the Givens rotations
// that were accumulated during the sweep.
// Recall that the number of columns of U and V to which we apply
// rotations is one more than the number of rotations.
n_UV_apply = m_GH_sweep_max + 1;
// Apply the Givens rotations. Note that we only apply k sets of
// rotations, where k = n_iter_perf_sweep_max. Also note that we only
// apply to n_UV_apply columns of U and V since this is the most we
// need to touch given the most recent value stored to m_GH_sweep_max.
if ( buff_U != NULL ) // ( rs_U == 0 && cs_U == 0 ) // if U is requested
FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
m_U,
n_UV_apply,
buff_G, rs_G, cs_G,
buff_U, rs_U, cs_U,
b_alg );
if ( buff_V != NULL ) // ( rs_V == 0 && cs_V == 0 ) // if V is requested
FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
m_V,
n_UV_apply,
buff_H, rs_H, cs_H,
buff_V, rs_V, cs_V,
b_alg );
// lf with non-flipped cs/rs m/n should be used;
// but the same operation can be achieved by using rf
// ( U^H C )^H = C^H U where U is G_0 G_1 G_2 .... G_{k-1}
if ( buff_C != NULL ) // ( rs_C == 0 && cs_C == 0 ) // if C is requested, flip
{
FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
n_C,
m_C,
buff_G, rs_G, cs_G,
buff_C, cs_C, rs_C,
b_alg );
}
// Increment the total number of iterations previously performed.
n_iter_prev += n_iter_perf_sweep_max;
}
// Make all the singular values positive.
{
int i;
double minus_one = bl1_dm1();
for ( i = 0; i < m_d; ++i )
{
if ( buff_d[ (i )*inc_d ] < rzero )
{
buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
// Scale the right singular vectors.
if ( buff_V != NULL )
bl1_dscalv( BLIS1_NO_CONJUGATE,
m_V,
&minus_one,
buff_V + (i )*cs_V, rs_V );
}
}
}
return n_iter_prev;
}
| FLA_Error FLA_Bsvd_ext_ops_var1 | ( | int | m_d, |
| int | m_U, | ||
| int | m_V, | ||
| int | m_C, | ||
| int | n_C, | ||
| int | n_GH, | ||
| int | n_iter_max, | ||
| float * | buff_d, | ||
| int | inc_d, | ||
| float * | buff_e, | ||
| int | inc_e, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_H, | ||
| int | rs_H, | ||
| int | cs_H, | ||
| float * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| float * | buff_V, | ||
| int | rs_V, | ||
| int | cs_V, | ||
| float * | buff_C, | ||
| int | rs_C, | ||
| int | cs_C, | ||
| int | b_alg | ||
| ) |
References bl1_c1(), bl1_csetm(), bl1_s0(), bl1_sm1(), bl1_sscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_G_rf_bls_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), and FLA_Mach_params_ops().
Referenced by FLA_Bsvd_ext_opt_var1().
{
scomplex one = bl1_c1();
float rzero = bl1_s0();
int maxitr = 6;
float eps;
float tolmul;
float tol;
float thresh;
scomplex* G;
scomplex* H;
float* d1;
float* e1;
int r_val;
int done;
int m_GH_sweep_max;
int ij_begin;
int ijTL, ijBR;
int m_A11;
int n_iter_perf;
int n_UV_apply;
int total_deflations;
int n_deflations;
int n_iter_prev;
int n_iter_perf_sweep_max;
// Compute some convergence constants.
eps = FLA_Mach_params_ops( FLA_MACH_EPS );
tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) );
FLA_Bsvd_compute_tol_thresh_ops( m_d,
tolmul,
maxitr,
buff_d, inc_d,
buff_e, inc_e,
&tol,
&thresh );
// Initialize our completion flag.
done = FALSE;
// Initialize a counter that holds the maximum number of rows of G
// and H that we would need to initialize for the next sweep.
m_GH_sweep_max = m_d - 1;
// Initialize a counter for the total number of iterations performed.
n_iter_prev = 0;
// Iterate until the matrix has completely deflated.
for ( total_deflations = 0; done != TRUE; )
{
// Initialize G and H to contain only identity rotations.
bl1_csetm( m_GH_sweep_max,
n_GH,
&one,
buff_G, rs_G, cs_G );
bl1_csetm( m_GH_sweep_max,
n_GH,
&one,
buff_H, rs_H, cs_H );
// Keep track of the maximum number of iterations performed in the
// current sweep. This is used when applying the sweep's Givens
// rotations.
n_iter_perf_sweep_max = 0;
// Perform a sweep: Move through the matrix and perform a bidiagonal
// SVD on each non-zero submatrix that is encountered. During the
// first time through, ijTL will be 0 and ijBR will be m_d - 1.
for ( ij_begin = 0; ij_begin < m_d; )
{
// Search for the first submatrix along the diagonal that is
// bounded by zeroes (or endpoints of the matrix). If no
// submatrix is found (ie: if the entire superdiagonal is zero
// then FLA_FAILURE is returned. This function also inspects
// superdiagonal elements for proximity to zero. If a given
// element is close enough to zero, then it is deemed
// converged and manually set to zero.
r_val = FLA_Bsvd_find_submatrix_ops( m_d,
ij_begin,
buff_d, inc_d,
buff_e, inc_e,
&ijTL,
&ijBR );
// Verify that a submatrix was found. If one was not found,
// then we are done with the current sweep. Furthermore, if
// a submatrix was not found AND we began our search at the
// beginning of the matrix (ie: ij_begin == 0), then the
// matrix has completely deflated and so we are done with
// Francis step iteration.
if ( r_val == FLA_FAILURE )
{
if ( ij_begin == 0 )
{
done = TRUE;
}
// Break out of the current sweep so we can apply the last
// remaining Givens rotations.
break;
}
// If we got this far, then:
// (a) ijTL refers to the index of the first non-zero
// superdiagonal along the diagonal, and
// (b) ijBR refers to either:
// - the first zero element that occurs after ijTL, or
// - the the last diagonal element.
// Note that ijTL and ijBR also correspond to the first and
// last diagonal elements of the submatrix of interest. Thus,
// we may compute the dimension of this submatrix as:
m_A11 = ijBR - ijTL + 1;
// Adjust ij_begin, which gets us ready for the next submatrix
// search in the current sweep.
ij_begin = ijBR + 1;
// Index to the submatrices upon which we will operate.
d1 = buff_d + ijTL * inc_d;
e1 = buff_e + ijTL * inc_e;
G = buff_G + ijTL * rs_G;
H = buff_H + ijTL * rs_H;
// Search for a batch of singular values, recursing on deflated
// subproblems whenever a split occurs. Iteration continues as
// long as
// (a) there is still matrix left to operate on, and
// (b) the number of iterations performed in this batch is
// less than n_G.
// If/when either of the two above conditions fails to hold,
// the function returns.
n_deflations = FLA_Bsvd_iteracc_v_ops_var1( m_A11,
n_GH,
ijTL,
tol,
thresh,
d1, inc_d,
e1, inc_e,
G, rs_G, cs_G,
H, rs_H, cs_H,
&n_iter_perf );
// Record the number of deflations that were observed.
total_deflations += n_deflations;
// Update the maximum number of iterations performed in the
// current sweep.
n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
// Store the most recent value of ijBR in m_G_sweep_max.
// When the sweep is done, this value will contain the minimum
// number of rows of G and H we can apply and safely include all
// non-identity rotations that were computed during the
// singular value searches.
m_GH_sweep_max = ijBR;
// Make sure we haven't exceeded our maximum iteration count.
if ( n_iter_prev >= n_iter_max * m_d )
{
//FLA_Abort();
return n_iter_prev;
}
}
// The sweep is complete. Now we must apply the Givens rotations
// that were accumulated during the sweep.
// Recall that the number of columns of U and V to which we apply
// rotations is one more than the number of rotations.
n_UV_apply = m_GH_sweep_max + 1;
// Apply the Givens rotations. Note that we only apply k sets of
// rotations, where k = n_iter_perf_sweep_max. Also note that we only
// apply to n_UV_apply columns of U and V since this is the most we
// need to touch given the most recent value stored to m_GH_sweep_max.
if ( buff_U != NULL ) // ( rs_U == 0 && cs_U == 0 ) // if U is requested
FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max,
m_U,
n_UV_apply,
buff_G, rs_G, cs_G,
buff_U, rs_U, cs_U,
b_alg );
if ( buff_V != NULL ) // ( rs_V == 0 && cs_V == 0 ) // if V is requested
FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max,
m_V,
n_UV_apply,
buff_H, rs_H, cs_H,
buff_V, rs_V, cs_V,
b_alg );
// lf with non-flipped cs/rs m/n should be used;
// but the same operation can be achieved by using rf
// ( U^H C )^H = C^H U where U is G_0 G_1 G_2 .... G_{k-1}
if ( buff_C != NULL ) // ( rs_C == 0 && cs_C == 0 ) // if C is requested, flip
{
FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max,
n_C,
m_C,
buff_G, rs_G, cs_G,
buff_C, cs_C, rs_C,
b_alg );
}
// Increment the total number of iterations previously performed.
n_iter_prev += n_iter_perf_sweep_max;
}
// Make all the singular values positive.
{
int i;
float minus_one = bl1_sm1();
for ( i = 0; i < m_d; ++i )
{
if ( buff_d[ (i )*inc_d ] < rzero )
{
buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
// Scale the right singular vectors.
if ( buff_V != NULL )
bl1_sscalv( BLIS1_NO_CONJUGATE,
m_V,
&minus_one,
buff_V + (i )*cs_V, rs_V );
}
}
}
return n_iter_prev;
}
| FLA_Error FLA_Bsvd_ext_opt_var1 | ( | dim_t | n_iter_max, |
| FLA_Obj | d, | ||
| FLA_Obj | e, | ||
| FLA_Obj | G, | ||
| FLA_Obj | H, | ||
| FLA_Svd_type | jobu, | ||
| FLA_Obj | U, | ||
| FLA_Svd_type | jobv, | ||
| FLA_Obj | V, | ||
| FLA_Bool | apply_Uh2C, | ||
| FLA_Obj | C, | ||
| dim_t | b_alg | ||
| ) |
References FLA_Bsvd_ext_opc_var1(), FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_ext_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Sort_bsvd_ext_b_opc(), FLA_Sort_bsvd_ext_b_opd(), FLA_Sort_bsvd_ext_b_ops(), and FLA_Sort_bsvd_ext_b_opz().
Referenced by FLA_Bsvd(), FLA_Bsvd_ext(), and FLA_Svd_ext_u_unb_var1().
{
FLA_Error r_val = FLA_SUCCESS;
FLA_Datatype datatype;
int m_U, m_V, m_C, n_C, n_GH;
int m_d, inc_d;
int inc_e;
int rs_G, cs_G;
int rs_H, cs_H;
int rs_U, cs_U;
int rs_V, cs_V;
int rs_C, cs_C;
if ( jobu != FLA_SVD_VECTORS_NONE )
datatype = FLA_Obj_datatype( U );
else if ( jobv != FLA_SVD_VECTORS_NONE )
datatype = FLA_Obj_datatype( V );
else if ( apply_Uh2C == TRUE )
datatype = FLA_Obj_datatype( C );
else
datatype = FLA_Obj_datatype( d );
m_d = FLA_Obj_vector_dim( d );
inc_d = FLA_Obj_vector_inc( d );
inc_e = FLA_Obj_vector_inc( e );
n_GH = FLA_Obj_width( G );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_H = FLA_Obj_row_stride( H );
cs_H = FLA_Obj_col_stride( H );
if ( jobu == FLA_SVD_VECTORS_NONE )
{
m_U = 0;
rs_U = 0;
cs_U = 0;
}
else
{
m_U = FLA_Obj_length( U );
rs_U = FLA_Obj_row_stride( U );
cs_U = FLA_Obj_col_stride( U );
}
if ( jobv == FLA_SVD_VECTORS_NONE )
{
m_V = 0;
rs_V = 0;
cs_V = 0;
}
else
{
m_V = FLA_Obj_length( V );
rs_V = FLA_Obj_row_stride( V );
cs_V = FLA_Obj_col_stride( V );
}
if ( apply_Uh2C == FALSE )
{
m_C = 0;
n_C = 0;
rs_C = 0;
cs_C = 0;
}
else
{
m_C = FLA_Obj_length( C );
n_C = FLA_Obj_width( C );
rs_C = FLA_Obj_row_stride( C );
cs_C = FLA_Obj_col_stride( C );
}
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
scomplex* buff_G = FLA_COMPLEX_PTR( G );
scomplex* buff_H = FLA_COMPLEX_PTR( H );
float* buff_U = ( jobu == FLA_SVD_VECTORS_NONE ? NULL : FLA_FLOAT_PTR( U ) );
float* buff_V = ( jobv == FLA_SVD_VECTORS_NONE ? NULL : FLA_FLOAT_PTR( V ) );
float* buff_C = ( apply_Uh2C == FALSE ? NULL : FLA_FLOAT_PTR( C ) );
r_val = FLA_Bsvd_ext_ops_var1( m_d,
m_U,
m_V,
m_C,
n_C,
n_GH,
n_iter_max,
buff_d, inc_d,
buff_e, inc_e,
buff_G, rs_G, cs_G,
buff_H, rs_H, cs_H,
buff_U, rs_U, cs_U,
buff_V, rs_V, cs_V,
buff_C, rs_C, cs_C,
b_alg );
FLA_Sort_bsvd_ext_b_ops( m_d, buff_d, inc_d,
m_U, buff_U, rs_U, cs_U,
m_V, buff_V, rs_V, cs_V,
n_C, buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE:
{
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
double* buff_U = ( jobu == FLA_SVD_VECTORS_NONE ? NULL : FLA_DOUBLE_PTR( U ) );
double* buff_V = ( jobv == FLA_SVD_VECTORS_NONE ? NULL : FLA_DOUBLE_PTR( V ) );
double* buff_C = ( apply_Uh2C == FALSE ? NULL : FLA_DOUBLE_PTR( C ) );
r_val = FLA_Bsvd_ext_opd_var1( m_d,
m_U,
m_V,
m_C,
n_C,
n_GH,
n_iter_max,
buff_d, inc_d,
buff_e, inc_e,
buff_G, rs_G, cs_G,
buff_H, rs_H, cs_H,
buff_U, rs_U, cs_U,
buff_V, rs_V, cs_V,
buff_C, rs_C, cs_C,
b_alg );
FLA_Sort_bsvd_ext_b_opd( m_d, buff_d, inc_d,
m_U, buff_U, rs_U, cs_U,
m_V, buff_V, rs_V, cs_V,
n_C, buff_C, rs_C, cs_C );
break;
}
case FLA_COMPLEX:
{
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
scomplex* buff_G = FLA_COMPLEX_PTR( G );
scomplex* buff_H = FLA_COMPLEX_PTR( H );
scomplex* buff_U = ( jobu == FLA_SVD_VECTORS_NONE ? NULL : FLA_COMPLEX_PTR( U ) );
scomplex* buff_V = ( jobv == FLA_SVD_VECTORS_NONE ? NULL : FLA_COMPLEX_PTR( V ) );
scomplex* buff_C = ( apply_Uh2C == FALSE ? NULL : FLA_COMPLEX_PTR( C ) );
r_val = FLA_Bsvd_ext_opc_var1( m_d,
m_U,
m_V,
m_C,
n_C,
n_GH,
n_iter_max,
buff_d, inc_d,
buff_e, inc_e,
buff_G, rs_G, cs_G,
buff_H, rs_H, cs_H,
buff_U, rs_U, cs_U,
buff_V, rs_V, cs_V,
buff_C, rs_C, cs_C,
b_alg );
FLA_Sort_bsvd_ext_b_opc( m_d, buff_d, inc_d,
m_U, buff_U, rs_U, cs_U,
m_V, buff_V, rs_V, cs_V,
n_C, buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
dcomplex* buff_U = ( jobu == FLA_SVD_VECTORS_NONE ? NULL : FLA_DOUBLE_COMPLEX_PTR( U ) );
dcomplex* buff_V = ( jobv == FLA_SVD_VECTORS_NONE ? NULL : FLA_DOUBLE_COMPLEX_PTR( V ) );
dcomplex* buff_C = ( apply_Uh2C == FALSE ? NULL : FLA_DOUBLE_COMPLEX_PTR( C ) );
r_val = FLA_Bsvd_ext_opz_var1( m_d,
m_U,
m_V,
m_C,
n_C,
n_GH,
n_iter_max,
buff_d, inc_d,
buff_e, inc_e,
buff_G, rs_G, cs_G,
buff_H, rs_H, cs_H,
buff_U, rs_U, cs_U,
buff_V, rs_V, cs_V,
buff_C, rs_C, cs_C,
b_alg );
FLA_Sort_bsvd_ext_b_opz( m_d, buff_d, inc_d,
m_U, buff_U, rs_U, cs_U,
m_V, buff_V, rs_V, cs_V,
n_C, buff_C, rs_C, cs_C );
break;
}
}
return r_val;
}
| FLA_Error FLA_Bsvd_ext_opz_var1 | ( | int | m_d, |
| int | m_U, | ||
| int | m_V, | ||
| int | m_C, | ||
| int | n_C, | ||
| int | n_GH, | ||
| int | n_iter_max, | ||
| double * | buff_d, | ||
| int | inc_d, | ||
| double * | buff_e, | ||
| int | inc_e, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_H, | ||
| int | rs_H, | ||
| int | cs_H, | ||
| dcomplex * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| dcomplex * | buff_V, | ||
| int | rs_V, | ||
| int | cs_V, | ||
| dcomplex * | buff_C, | ||
| int | rs_C, | ||
| int | cs_C, | ||
| int | b_alg | ||
| ) |
References bl1_d0(), bl1_dm1(), bl1_z1(), bl1_zdscalv(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_ext_opt_var1().
{
dcomplex one = bl1_z1();
double rzero = bl1_d0();
int maxitr = 6;
double eps;
double tolmul;
double tol;
double thresh;
dcomplex* G;
dcomplex* H;
double* d1;
double* e1;
int r_val;
int done;
int m_GH_sweep_max;
int ij_begin;
int ijTL, ijBR;
int m_A11;
int n_iter_perf;
int n_UV_apply;
int total_deflations;
int n_deflations;
int n_iter_prev;
int n_iter_perf_sweep_max;
// Compute some convergence constants.
eps = FLA_Mach_params_opd( FLA_MACH_EPS );
tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
FLA_Bsvd_compute_tol_thresh_opd( m_d,
tolmul,
maxitr,
buff_d, inc_d,
buff_e, inc_e,
&tol,
&thresh );
// Initialize our completion flag.
done = FALSE;
// Initialize a counter that holds the maximum number of rows of G
// and H that we would need to initialize for the next sweep.
m_GH_sweep_max = m_d - 1;
// Initialize a counter for the total number of iterations performed.
n_iter_prev = 0;
// Iterate until the matrix has completely deflated.
for ( total_deflations = 0; done != TRUE; )
{
// Initialize G and H to contain only identity rotations.
bl1_zsetm( m_GH_sweep_max,
n_GH,
&one,
buff_G, rs_G, cs_G );
bl1_zsetm( m_GH_sweep_max,
n_GH,
&one,
buff_H, rs_H, cs_H );
// Keep track of the maximum number of iterations performed in the
// current sweep. This is used when applying the sweep's Givens
// rotations.
n_iter_perf_sweep_max = 0;
// Perform a sweep: Move through the matrix and perform a bidiagonal
// SVD on each non-zero submatrix that is encountered. During the
// first time through, ijTL will be 0 and ijBR will be m_d - 1.
for ( ij_begin = 0; ij_begin < m_d; )
{
// Search for the first submatrix along the diagonal that is
// bounded by zeroes (or endpoints of the matrix). If no
// submatrix is found (ie: if the entire superdiagonal is zero
// then FLA_FAILURE is returned. This function also inspects
// superdiagonal elements for proximity to zero. If a given
// element is close enough to zero, then it is deemed
// converged and manually set to zero.
r_val = FLA_Bsvd_find_submatrix_opd( m_d,
ij_begin,
buff_d, inc_d,
buff_e, inc_e,
&ijTL,
&ijBR );
// Verify that a submatrix was found. If one was not found,
// then we are done with the current sweep. Furthermore, if
// a submatrix was not found AND we began our search at the
// beginning of the matrix (ie: ij_begin == 0), then the
// matrix has completely deflated and so we are done with
// Francis step iteration.
if ( r_val == FLA_FAILURE )
{
if ( ij_begin == 0 )
{
done = TRUE;
}
// Break out of the current sweep so we can apply the last
// remaining Givens rotations.
break;
}
// If we got this far, then:
// (a) ijTL refers to the index of the first non-zero
// superdiagonal along the diagonal, and
// (b) ijBR refers to either:
// - the first zero element that occurs after ijTL, or
// - the the last diagonal element.
// Note that ijTL and ijBR also correspond to the first and
// last diagonal elements of the submatrix of interest. Thus,
// we may compute the dimension of this submatrix as:
m_A11 = ijBR - ijTL + 1;
// Adjust ij_begin, which gets us ready for the next submatrix
// search in the current sweep.
ij_begin = ijBR + 1;
// Index to the submatrices upon which we will operate.
d1 = buff_d + ijTL * inc_d;
e1 = buff_e + ijTL * inc_e;
G = buff_G + ijTL * rs_G;
H = buff_H + ijTL * rs_H;
// Search for a batch of singular values, recursing on deflated
// subproblems whenever a split occurs. Iteration continues as
// long as
// (a) there is still matrix left to operate on, and
// (b) the number of iterations performed in this batch is
// less than n_G.
// If/when either of the two above conditions fails to hold,
// the function returns.
n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
n_GH,
ijTL,
tol,
thresh,
d1, inc_d,
e1, inc_e,
G, rs_G, cs_G,
H, rs_H, cs_H,
&n_iter_perf );
// Record the number of deflations that were observed.
total_deflations += n_deflations;
// Update the maximum number of iterations performed in the
// current sweep.
n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
// Store the most recent value of ijBR in m_G_sweep_max.
// When the sweep is done, this value will contain the minimum
// number of rows of G and H we can apply and safely include all
// non-identity rotations that were computed during the
// singular value searches.
m_GH_sweep_max = ijBR;
// Make sure we haven't exceeded our maximum iteration count.
if ( n_iter_prev >= n_iter_max * m_d )
{
FLA_Abort();
//return FLA_FAILURE;
}
}
// The sweep is complete. Now we must apply the Givens rotations
// that were accumulated during the sweep.
// Recall that the number of columns of U and V to which we apply
// rotations is one more than the number of rotations.
n_UV_apply = m_GH_sweep_max + 1;
// Apply the Givens rotations. Note that we only apply k sets of
// rotations, where k = n_iter_perf_sweep_max. Also note that we only
// apply to n_UV_apply columns of U and V since this is the most we
// need to touch given the most recent value stored to m_GH_sweep_max.
if ( buff_U != NULL ) // ( rs_U == 0 && cs_U == 0 ) // if U is requested
FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
m_U,
n_UV_apply,
buff_G, rs_G, cs_G,
buff_U, rs_U, cs_U,
b_alg );
if ( buff_V != NULL ) // ( rs_V == 0 && cs_V == 0 ) // if V is requested
FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
m_V,
n_UV_apply,
buff_H, rs_H, cs_H,
buff_V, rs_V, cs_V,
b_alg );
// lf with non-flipped cs/rs m/n should be used;
// but the same operation can be achieved by using rf
// ( U^H C )^H = C^H U where U is G_0 G_1 G_2 .... G_{k-1}
if ( buff_C != NULL ) // ( rs_C == 0 && cs_C == 0 ) // if C is requested, flip
{
FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
n_C,
m_C,
buff_G, rs_G, cs_G,
buff_C, cs_C, rs_C,
b_alg );
}
// Increment the total number of iterations previously performed.
n_iter_prev += n_iter_perf_sweep_max;
}
// Make all the singular values positive.
{
int i;
double minus_one = bl1_dm1();
for ( i = 0; i < m_d; ++i )
{
if ( buff_d[ (i )*inc_d ] < rzero )
{
buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
// Scale the right singular vectors.
if ( buff_V != NULL )
bl1_zdscalv( BLIS1_NO_CONJUGATE,
m_V,
&minus_one,
buff_V + (i )*cs_V, rs_V );
}
}
}
return n_iter_prev;
}
1.7.6.1