655 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			C++
		
	
	
		
		
			
		
	
	
			655 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			C++
		
	
	
|  | // =============================================================================
 | ||
|  | // === spqr_front ==============================================================
 | ||
|  | // =============================================================================
 | ||
|  | 
 | ||
|  | /* Given an m-by-n frontal matrix, use Householder reflections to reduce it
 | ||
|  |    to upper trapezoidal form.  Columns 0:npiv-1 are checked against tol. | ||
|  | 
 | ||
|  |         0    # x x x x x x | ||
|  |         1    # x x x x x x | ||
|  |         2    # x x x x x x | ||
|  |         3    # x x x x x x | ||
|  |         4    - # x x x x x   <- Stair [0] = 4 | ||
|  |         5      # x x x x x | ||
|  |         6      # x x x x x | ||
|  |         7      # x x x x x | ||
|  |         8      - # x x x x   <- Stair [1] = 8 | ||
|  |         9        # x x x x | ||
|  |        10        # x x x x | ||
|  |        11        # x x x x | ||
|  |        12        - # x x x   <- Stair [2] = 12 | ||
|  |        13          # x x x | ||
|  |        14          - # x x   <- Stair [3] = 14 | ||
|  |        15            - # x   <- Stair [4] = 15 | ||
|  |        16              - #   <- Stair [5] = 16 | ||
|  |                          -   <- Stair [6] = 17 | ||
|  | 
 | ||
|  |     Suppose npiv = 3, and no columns fall below tol: | ||
|  | 
 | ||
|  |         0    R r r r r r r | ||
|  |         1    h R r r r r r | ||
|  |         2    h h R r r r r | ||
|  |         3    h h h C c c c | ||
|  |         4    - h h h C c c   <- Stair [0] = 4 | ||
|  |         5      h h h h C c | ||
|  |         6      h h h h h C | ||
|  |         7      h h h h h h | ||
|  |         8      - h h h h h   <- Stair [1] = 8 | ||
|  |         9        h h h h h | ||
|  |        10        h h h h h | ||
|  |        11        h h h h h | ||
|  |        12        - h h h h   <- Stair [2] = 12 | ||
|  |        13          h h h h | ||
|  |        14          - h h h   <- Stair [3] = 14 | ||
|  |        15            - h h   <- Stair [4] = 15 | ||
|  |        16              - h   <- Stair [5] = 16 | ||
|  |                          -   <- Stair [6] = 17 | ||
|  | 
 | ||
|  |     where r is an entry in the R block, c is an entry in the C (contribution) | ||
|  |     block, and h is an entry in the H block (Householder vectors).  Diagonal | ||
|  |     entries are capitalized. | ||
|  | 
 | ||
|  |     Suppose the 2nd column has a norm <= tol; the result is a "squeezed" R: | ||
|  | 
 | ||
|  |         0    R r r r r r r   <- Stair [1] = 0 to denote a dead pivot column | ||
|  |         1    h - R r r r r | ||
|  |         2    h - h C c c c | ||
|  |         3    h - h h C c c | ||
|  |         4    - - h h h C c   <- Stair [0] = 4 | ||
|  |         5      - h h h h C | ||
|  |         6      - h h h h h | ||
|  |         7      - h h h h h | ||
|  |         8      - h h h h h | ||
|  |         9        h h h h h | ||
|  |        10        h h h h h | ||
|  |        11        h h h h h | ||
|  |        12        - h h h h   <- Stair [2] = 12 | ||
|  |        13          h h h h | ||
|  |        14          - h h h   <- Stair [3] = 14 | ||
|  |        15            - h h   <- Stair [4] = 15 | ||
|  |        16              - h   <- Stair [5] = 16 | ||
|  |                          -   <- Stair [6] = 17 | ||
|  | 
 | ||
|  |     where "diagonal" entries are capitalized.  The 2nd H vector is missing | ||
|  |     (it is H2 = I, identity).  The 2nd column of R is not deleted.  The | ||
|  |     contribution block is always triangular, but the first npiv columns of | ||
|  |     the R can become "staggered".  Columns npiv to n-1 in the R block are | ||
|  |     always the same length. | ||
|  | 
 | ||
|  |     If columns are found "dead", the staircase may be updated.  Stair[k] is | ||
|  |     set to zero if k is dead.  Also, Stair[k] is increased, if necessary, to | ||
|  |     ensure that R and C reside within the staircase.  For example: | ||
|  | 
 | ||
|  |         0 0 0 | ||
|  |         0 0 x | ||
|  | 
 | ||
|  |     with npiv = 2 has a Stair = [ 0 1 2 ] on output, to reflect the C block: | ||
|  | 
 | ||
|  |         - C c | ||
|  |         - - C | ||
|  | 
 | ||
|  |     A tol of zero means that any nonzero norm (however small) is accepted; | ||
|  |     only exact zero columns are flagged as dead.  A negative tol means that | ||
|  |     the norms are ignored; a column is never flagged dead.  The default tol | ||
|  |     is set elsewhere as 20 * (m+1) * eps * max column 2-norm of A. | ||
|  | 
 | ||
|  |     LAPACK's dlarf* routines are used to construct and apply the Householder | ||
|  |     reflections.  The panel size (block size) is provided as an input | ||
|  |     parameter, which defines the number of Householder vectors in a panel. | ||
|  |     However, when the front is small (or when the remaining part | ||
|  |     of a front is small) the block size is increased to include the entire | ||
|  |     front.  "Small" is defined, below, as fronts with fewer than 5000 entries. | ||
|  | 
 | ||
|  |     NOTE: this function does not check its inputs.  If the caller runs out of | ||
|  |     memory and passes NULL pointers, this function will segfault. | ||
|  | */ | ||
|  | 
 | ||
|  | #include "spqr_subset.hpp"
 | ||
|  | #include "iostream"
 | ||
|  | 
 | ||
|  | #define SMALL 5000
 | ||
|  | #define MINCHUNK 4
 | ||
|  | #define MINCHUNK_RATIO 4
 | ||
|  | 
 | ||
|  | // =============================================================================
 | ||
|  | // === spqr_private_house ======================================================
 | ||
|  | // =============================================================================
 | ||
|  | 
 | ||
|  | // Construct a Householder reflection H = I - tau * v * v' such that H*x is
 | ||
|  | //  reduced to zero except for the first element.  Returns X [0] = the first
 | ||
|  | //  entry of H*x, and X [1:n-1] = the Householder vector V [1:n-1], where
 | ||
|  | //  V [0] = 1.  If X [1:n-1] is zero, then the H=I (tau = 0) is returned,
 | ||
|  | //  and V [1:n-1] is all zero.  In MATLAB (1-based indexing), ignoring the
 | ||
|  | //  rescaling done in dlarfg/zlarfg, this function computes the following:
 | ||
|  | 
 | ||
|  | /*
 | ||
|  |     function [x, tau] = house (x) | ||
|  |     n = length (x) ; | ||
|  |     beta = norm (x) ; | ||
|  |     if (x (1) > 0) | ||
|  |         beta = -beta ; | ||
|  |     end | ||
|  |     tau = (beta - x (1)) / beta ; | ||
|  |     x (2:n) = x (2:n) / (x (1) - beta) ; | ||
|  |     x (1) = beta ; | ||
|  | */ | ||
|  | 
 | ||
|  | //  Note that for the complex case, the reflection must be applied as H'*x,
 | ||
|  | //  which requires that tau be conjugated in spqr_private_apply1.
 | ||
|  | //
 | ||
|  | //  This function performs about 3*n+2 flops
 | ||
|  | 
 | ||
|  | inline double spqr_private_larfg (Int n, double *X, cholmod_common *cc) | ||
|  | { | ||
|  |     double tau = 0 ; | ||
|  |     BLAS_INT N = n, one = 1 ; | ||
|  |     if (CHECK_BLAS_INT && !EQ (N,n)) | ||
|  |     { | ||
|  |         cc->blas_ok = FALSE ; | ||
|  |     } | ||
|  |     if (!CHECK_BLAS_INT || cc->blas_ok) | ||
|  |     { | ||
|  |         LAPACK_DLARFG (&N, X, X + 1, &one, &tau) ; | ||
|  |     } | ||
|  |     return (tau) ; | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | inline Complex spqr_private_larfg (Int n, Complex *X, cholmod_common *cc) | ||
|  | { | ||
|  |     Complex tau = 0 ; | ||
|  |     BLAS_INT N = n, one = 1 ; | ||
|  |     if (CHECK_BLAS_INT && !EQ (N,n)) | ||
|  |     { | ||
|  |         cc->blas_ok = FALSE ; | ||
|  |     } | ||
|  |     if (!CHECK_BLAS_INT || cc->blas_ok) | ||
|  |     { | ||
|  |         LAPACK_ZLARFG (&N, X, X + 1, &one, &tau) ; | ||
|  |     } | ||
|  |     return (tau) ; | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | template <typename Entry> Entry spqr_private_house  // returns tau
 | ||
|  | ( | ||
|  |     // inputs, not modified
 | ||
|  |     Int n, | ||
|  | 
 | ||
|  |     // input/output
 | ||
|  |     Entry *X,           // size n
 | ||
|  | 
 | ||
|  |     cholmod_common *cc | ||
|  | ) | ||
|  | { | ||
|  |     return (spqr_private_larfg (n, X, cc)) ; | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | // =============================================================================
 | ||
|  | // === spqr_private_apply1 =====================================================
 | ||
|  | // =============================================================================
 | ||
|  | 
 | ||
|  | // Apply a single Householder reflection; C = C - tau * v * v' * C.  The
 | ||
|  | // algorithm used by dlarf is:
 | ||
|  | 
 | ||
|  | /*
 | ||
|  |     function C = apply1 (C, v, tau) | ||
|  |     w = C'*v ; | ||
|  |     C = C - tau*v*w' ; | ||
|  | */ | ||
|  | 
 | ||
|  | //  For the complex case, we need to apply H', which requires that tau be
 | ||
|  | //  conjugated.
 | ||
|  | //
 | ||
|  | //  If applied to a single column, this function performs 2*n-1 flops to
 | ||
|  | //  compute w, and 2*n+1 to apply it to C, for a total of 4*n flops.
 | ||
|  | 
 | ||
|  | inline void spqr_private_larf (Int m, Int n, double *V, double tau, | ||
|  |     double *C, Int ldc, double *W, cholmod_common *cc) | ||
|  | { | ||
|  |     BLAS_INT M = m, N = n, LDC = ldc, one = 1 ; | ||
|  |     char left = 'L' ; | ||
|  |     if (CHECK_BLAS_INT && !(EQ (M,m) && EQ (N,n) && EQ (LDC,ldc))) | ||
|  |     { | ||
|  |         cc->blas_ok = FALSE ; | ||
|  |          | ||
|  |     } | ||
|  |     if (!CHECK_BLAS_INT || cc->blas_ok) | ||
|  |     { | ||
|  |         LAPACK_DLARF (&left, &M, &N, V, &one, &tau, C, &LDC, W) ; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | inline void spqr_private_larf (Int m, Int n, Complex *V, Complex tau, | ||
|  |     Complex *C, Int ldc, Complex *W, cholmod_common *cc) | ||
|  | { | ||
|  |     BLAS_INT M = m, N = n, LDC = ldc, one = 1 ; | ||
|  |     char left = 'L' ; | ||
|  |     Complex conj_tau = spqr_conj (tau) ; | ||
|  |     if (CHECK_BLAS_INT && !(EQ (M,m) && EQ (N,n) && EQ (LDC,ldc))) | ||
|  |     { | ||
|  |         cc->blas_ok = FALSE ; | ||
|  |     } | ||
|  |     if (!CHECK_BLAS_INT || cc->blas_ok) | ||
|  |     { | ||
|  |         LAPACK_ZLARF (&left, &M, &N, V, &one, &conj_tau, C, &LDC, W) ; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | template <typename Entry> void spqr_private_apply1 | ||
|  | ( | ||
|  |     // inputs, not modified
 | ||
|  |     Int m,              // C is m-by-n
 | ||
|  |     Int n, | ||
|  |     Int ldc,            // leading dimension of C
 | ||
|  |     Entry *V,           // size m, Householder vector V
 | ||
|  |     Entry tau,          // Householder coefficient
 | ||
|  | 
 | ||
|  |     // input/output
 | ||
|  |     Entry *C,           // size m-by-n
 | ||
|  | 
 | ||
|  |     // workspace, not defined on input or output
 | ||
|  |     Entry *W,           // size n
 | ||
|  | 
 | ||
|  |     cholmod_common *cc | ||
|  | ) | ||
|  | { | ||
|  |     Entry vsave ; | ||
|  |     if (m <= 0 || n <= 0) | ||
|  |     { | ||
|  |         return ;        // nothing to do
 | ||
|  |     } | ||
|  |     vsave = V [0] ;     // temporarily restore unit diagonal of V
 | ||
|  |     V [0] = 1 ; | ||
|  |     spqr_private_larf (m, n, V, tau, C, ldc, W, cc) ; | ||
|  |     V [0] = vsave ;     // restore V [0]
 | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | // =============================================================================
 | ||
|  | // === spqr_front ==============================================================
 | ||
|  | // =============================================================================
 | ||
|  | 
 | ||
|  | // Factorize a front F into a sequence of Householder vectors H, and an upper
 | ||
|  | // trapezoidal matrix R.  R may be a squeezed upper trapezoidal matrix if any
 | ||
|  | // of the leading npiv columns are linearly dependent.  Returns the row index
 | ||
|  | // rank that indicates the first entry in C, which is F (rank,npiv), or 0
 | ||
|  | // on error.
 | ||
|  | 
 | ||
|  | template <typename Entry> Int spqr_front | ||
|  | ( | ||
|  |     // input, not modified
 | ||
|  |     Int m,              // F is m-by-n with leading dimension m
 | ||
|  |     Int n, | ||
|  |     Int npiv,           // number of pivot columns
 | ||
|  |     double tol,         // a column is flagged as dead if its norm is <= tol
 | ||
|  |     Int ntol,           // apply tol only to first ntol pivot columns
 | ||
|  |     Int fchunk,         // block size for compact WY Householder reflections,
 | ||
|  |                         // treated as 1 if fchunk <= 1
 | ||
|  | 
 | ||
|  |     // input/output
 | ||
|  |     Entry *F,           // frontal matrix F of size m-by-n
 | ||
|  |     Int *Stair,         // size n, entries F (Stair[k]:m-1, k) are all zero,
 | ||
|  |                         // for each k = 0:n-1, and remain zero on output.
 | ||
|  |     char *Rdead,        // size npiv; all zero on input.  If k is dead,
 | ||
|  |                         // Rdead [k] is set to 1
 | ||
|  | 
 | ||
|  |     // output, not defined on input
 | ||
|  |     Entry *Tau,         // size n, Householder coefficients
 | ||
|  | 
 | ||
|  |     // workspace, undefined on input and output
 | ||
|  |     Entry *W,           // size b*n, where b = min (fchunk,n,m)
 | ||
|  | 
 | ||
|  |     // input/output
 | ||
|  |     double *wscale, | ||
|  |     double *wssq, | ||
|  | 
 | ||
|  |     cholmod_common *cc  // for cc->hypotenuse function
 | ||
|  | ) | ||
|  | { | ||
|  |     Entry tau ; | ||
|  |     double wk ; | ||
|  |     Entry *V ; | ||
|  |     Int k, t, g, g1, nv, k1, k2, i, t0, vzeros, mleft, nleft, vsize, minchunk, | ||
|  |         rank ; | ||
|  | 
 | ||
|  |     // NOTE: inputs are not checked for NULL (except if debugging enabled)
 | ||
|  | 
 | ||
|  |     ASSERT (F != NULL) ; | ||
|  |     ASSERT (Stair != NULL) ; | ||
|  |     ASSERT (Rdead != NULL) ; | ||
|  |     ASSERT (Tau != NULL) ; | ||
|  |     ASSERT (W != NULL) ; | ||
|  |     ASSERT (m >= 0 && n >= 0) ; | ||
|  | 
 | ||
|  |     npiv = MAX (0, npiv) ;  // npiv must be between 0 and n
 | ||
|  |     npiv = MIN (n, npiv) ; | ||
|  | 
 | ||
|  |     g1 = 0 ;                // row index of first queued-up Householder
 | ||
|  |     k1 = 0 ;                // pending Householders are in F (g1:t, k1:k2-1)
 | ||
|  |     k2 = 0 ; | ||
|  |     V = F ;                 // Householder vectors start here
 | ||
|  |     g = 0 ;                 // number of good Householders found
 | ||
|  |     nv = 0 ;                // number of Householder reflections queued up
 | ||
|  |     vzeros = 0 ;            // number of explicit zeros in queued-up H's
 | ||
|  |     t = 0 ;                 // staircase of current column
 | ||
|  |     fchunk = MAX (fchunk, 1) ; | ||
|  |     minchunk = MAX (MINCHUNK, fchunk/MINCHUNK_RATIO) ; | ||
|  |     rank = MIN (m,npiv) ;  // F (rank,npiv) is the first entry in C.  rank
 | ||
|  |                            // is also the number of rows in the R block,
 | ||
|  |                            // and the number of good pivot columns found.
 | ||
|  | 
 | ||
|  |     ntol = MIN (ntol, npiv) ;   // Note ntol can be negative, which means to
 | ||
|  |                                 // not use tol at all.
 | ||
|  | 
 | ||
|  |     PR (("Front %ld by %ld with %ld pivots\n", m, n, npiv)) ; | ||
|  |     for (k = 0 ; k < n ; k++) | ||
|  |     { | ||
|  | 
 | ||
|  |         // ---------------------------------------------------------------------
 | ||
|  |         // reduce the kth column of F to eliminate all but "diagonal" F (g,k)
 | ||
|  |         // ---------------------------------------------------------------------
 | ||
|  | 
 | ||
|  |         // get the staircase for the kth column, and operate on the "diagonal"
 | ||
|  |         // F (g,k); eliminate F (g+1:t-1, k) to zero
 | ||
|  |         t0 = t ;            // t0 = staircase of column k-1
 | ||
|  |         t = Stair [k] ;     // t = staircase of this column k
 | ||
|  | 
 | ||
|  |         PR (("k %ld g %ld m %ld n %ld npiv %ld\n", k, g, m, n, npiv)) ; | ||
|  |         if (g >= m) | ||
|  |         { | ||
|  |             // F (g,k) is outside the matrix, so we're done.  If this happens
 | ||
|  |             // when k < npiv, then there is no contribution block.
 | ||
|  |             PR (("hit the wall, npiv: %ld k %ld rank %ld\n", npiv, k, rank)) ; | ||
|  |             for ( ; k < npiv ; k++) | ||
|  |             { | ||
|  |                 Rdead [k] = 1 ; | ||
|  |                 Stair [k] = 0 ;         // remaining pivot columns all dead
 | ||
|  |                 Tau [k] = 0 ; | ||
|  |             } | ||
|  |             for ( ; k < n ; k++) | ||
|  |             { | ||
|  |                 Stair [k] = m ;         // non-pivotal columns
 | ||
|  |                 Tau [k] = 0 ; | ||
|  |             } | ||
|  |             ASSERT (nv == 0) ;          // there can be no pending updates
 | ||
|  |             return (rank) ; | ||
|  |         } | ||
|  | 
 | ||
|  |         // if t < g+1, then this column is all zero; fix staircase so that R is
 | ||
|  |         // always inside the staircase.
 | ||
|  |         t = MAX (g+1,t) ; | ||
|  |         Stair [k] = t ; | ||
|  | 
 | ||
|  |         // ---------------------------------------------------------------------
 | ||
|  |         // If t just grew a lot since the last t, apply H now to all of F
 | ||
|  |         // ---------------------------------------------------------------------
 | ||
|  | 
 | ||
|  |         // vzeros is the number of zero entries in V after including the next
 | ||
|  |         // Householder vector.  If it would exceed 50% of the size of V,
 | ||
|  |         // apply the pending Householder reflections now, but only if
 | ||
|  |         // enough vectors have accumulated.
 | ||
|  | 
 | ||
|  |         vzeros += nv * (t - t0) ; | ||
|  |         if (nv >= minchunk) | ||
|  |         { | ||
|  |             vsize = (nv*(nv+1))/2 + nv*(t-g1-nv) ; | ||
|  |             if (vzeros > MAX (16, vsize/2)) | ||
|  |             { | ||
|  |                 // apply pending block of Householder reflections
 | ||
|  |                 PR (("(1) apply k1 %ld k2 %ld\n", k1, k2)) ; | ||
|  |                 spqr_larftb ( | ||
|  |                     0,                          // method 0: Left, Transpose
 | ||
|  |                     t0-g1, n-k2, nv, m, m, | ||
|  |                     V,                          // F (g1:t-1, k1:k1+nv-1)
 | ||
|  |                     &Tau [k1],                  // Tau (k1:k-1)
 | ||
|  |                     &F [INDEX (g1,k2,m)],       // F (g1:t-1, k2:n-1)
 | ||
|  |                     W, cc) ;                    // size nv*nv + nv*(n-k2)
 | ||
|  |                 nv = 0 ;        // clear queued-up Householder reflections
 | ||
|  |                 vzeros = 0 ; | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         // ---------------------------------------------------------------------
 | ||
|  |         // find a Householder reflection that reduces column k
 | ||
|  |         // ---------------------------------------------------------------------
 | ||
|  | 
 | ||
|  |         tau = spqr_private_house (t-g, &F [INDEX (g,k,m)], cc) ; | ||
|  | 
 | ||
|  |         // ---------------------------------------------------------------------
 | ||
|  |         // check to see if the kth column is OK
 | ||
|  |         // ---------------------------------------------------------------------
 | ||
|  | 
 | ||
|  |         if (k < ntol && (wk = spqr_abs (F [INDEX (g,k,m)], cc)) <= tol) | ||
|  |         { | ||
|  |             // -----------------------------------------------------------------
 | ||
|  |             // norm (F (g:t-1, k)) is too tiny; the kth pivot column is dead
 | ||
|  |             // -----------------------------------------------------------------
 | ||
|  | 
 | ||
|  |             // keep track of the 2-norm of w, the dead column 2-norms
 | ||
|  |             if (wk != 0) | ||
|  |             { | ||
|  |                 // see also LAPACK's dnrm2 function
 | ||
|  |                 if ((*wscale) == 0) | ||
|  |                 { | ||
|  |                     // this is the nonzero first entry in w
 | ||
|  |                     (*wssq) = 1 ; | ||
|  |                 } | ||
|  |                 if ((*wscale) < wk) | ||
|  |                 { | ||
|  |                     double rr = (*wscale) / wk ; | ||
|  |                     (*wssq) = 1 + (*wssq) * rr * rr ; | ||
|  |                     (*wscale) = wk ; | ||
|  |                 } | ||
|  |                 else | ||
|  |                 { | ||
|  |                     double rr = wk / (*wscale) ; | ||
|  |                     (*wssq) += rr * rr ; | ||
|  |                 } | ||
|  |             } | ||
|  | 
 | ||
|  |             // zero out F (g:m-1,k) and flag it as dead
 | ||
|  |             for (i = g ; i < m ; i++) | ||
|  |             { | ||
|  |                 // This is not strictly necessary.  On output, entries outside
 | ||
|  |                 // the staircase are ignored.
 | ||
|  |                 F [INDEX (i,k,m)] = 0 ; | ||
|  |             } | ||
|  |             Stair [k] = 0 ; | ||
|  |             Tau [k] = 0 ; | ||
|  |             Rdead [k] = 1 ; | ||
|  | 
 | ||
|  |             if (nv > 0) | ||
|  |             { | ||
|  |                 // apply pending block of Householder reflections
 | ||
|  |                 PR (("(2) apply k1 %ld k2 %ld\n", k1, k2)) ; | ||
|  |                 spqr_larftb ( | ||
|  |                     0,                          // method 0: Left, Transpose
 | ||
|  |                     t0-g1, n-k2, nv, m, m, | ||
|  |                     V,                          // F (g1:t-1, k1:k1+nv-1)
 | ||
|  |                     &Tau [k1],                  // Tau (k1:k-1)
 | ||
|  |                     &F [INDEX (g1,k2,m)],       // F (g1:t-1, k2:n-1)
 | ||
|  |                     W, cc) ;                    // size nv*nv + nv*(n-k2)
 | ||
|  |                 nv = 0 ;        // clear queued-up Householder reflections
 | ||
|  |                 vzeros = 0 ; | ||
|  |             } | ||
|  | 
 | ||
|  |         } | ||
|  |         else | ||
|  |         { | ||
|  |             // -----------------------------------------------------------------
 | ||
|  |             // one more good pivot column found
 | ||
|  |             // -----------------------------------------------------------------
 | ||
|  | 
 | ||
|  |             Tau [k] = tau ;             // save the Householder coefficient
 | ||
|  |             if (nv == 0) | ||
|  |             { | ||
|  |                 // start the queue of pending Householder updates, and define
 | ||
|  |                 // the current panel as k1:k2-1
 | ||
|  |                 g1 = g ;                        // first row of V
 | ||
|  |                 k1 = k ;                        // first column of V
 | ||
|  |                 k2 = MIN (n, k+fchunk) ;        // k2-1 is last col in panel
 | ||
|  |                 V = &F [INDEX (g1,k1,m)] ;      // pending V starts here
 | ||
|  | 
 | ||
|  |                 // check for switch to unblocked code
 | ||
|  |                 mleft = m-g1 ;                  // number of rows left
 | ||
|  |                 nleft = n-k1 ;                  // number of columns left
 | ||
|  |                 if (mleft * (nleft-(fchunk+4)) < SMALL || mleft <= fchunk/2 | ||
|  |                     || fchunk <= 1) | ||
|  |                 { | ||
|  |                     // remaining matrix is small; switch to unblocked code by
 | ||
|  |                     // including the rest of the matrix in the panel.  Always
 | ||
|  |                     // use unblocked code if fchunk <= 1.
 | ||
|  |                     k2 = n ; | ||
|  |                 } | ||
|  |             } | ||
|  |             nv++ ;  // one more pending update; V is F (g1:t-1, k1:k1+nv-1)
 | ||
|  | 
 | ||
|  |             // -----------------------------------------------------------------
 | ||
|  |             // keep track of "pure" flops, for performance testing only
 | ||
|  |             // -----------------------------------------------------------------
 | ||
|  | 
 | ||
|  |             // The Householder vector is of length t-g, including the unit
 | ||
|  |             // diagonal, and takes 3*(t-g) flops to compute.  It will be
 | ||
|  |             // applied as a block, but compute the "pure" flops by assuming
 | ||
|  |             // that this single Householder vector is computed and then applied
 | ||
|  |             // just by itself to the rest of the frontal matrix (columns
 | ||
|  |             // k+1:n-1, or n-k-1 columns).  Applying the Householder reflection
 | ||
|  |             // to just one column takes 4*(t-g) flops.  This computation only
 | ||
|  |             // works if TBB is disabled, merely because it uses a global
 | ||
|  |             // variable to keep track of the flop count.  If TBB is used, this
 | ||
|  |             // computation may result in a race condition; it is disabled in
 | ||
|  |             // that case.
 | ||
|  | 
 | ||
|  |             FLOP_COUNT ((t-g) * (3 + 4 * (n-k-1))) ; | ||
|  | 
 | ||
|  |             // -----------------------------------------------------------------
 | ||
|  |             // apply the kth Householder reflection to the current panel
 | ||
|  |             // -----------------------------------------------------------------
 | ||
|  | 
 | ||
|  |             // F (g:t-1, k+1:k2-1) -= v * tau * v' * F (g:t-1, k+1:k2-1), where
 | ||
|  |             // v is stored in F (g:t-1,k).  This applies just one reflection
 | ||
|  |             // to the current panel.
 | ||
|  |             PR (("apply 1: k %ld\n", k)) ; | ||
|  |             spqr_private_apply1 (t-g, k2-k-1, m, &F [INDEX (g,k,m)], tau, | ||
|  |                 &F [INDEX (g,k+1,m)], W, cc) ; | ||
|  | 
 | ||
|  |             g++ ;   // one more pivot found
 | ||
|  | 
 | ||
|  |             // -----------------------------------------------------------------
 | ||
|  |             // apply the Householder reflections if end of panel reached
 | ||
|  |             // -----------------------------------------------------------------
 | ||
|  | 
 | ||
|  |             if (k == k2-1 || g == m)            // or if last pivot is found
 | ||
|  |             { | ||
|  |                 // apply pending block of Householder reflections
 | ||
|  |                 PR (("(3) apply k1 %ld k2 %ld\n", k1, k2)) ; | ||
|  |                 spqr_larftb ( | ||
|  |                     0,                          // method 0: Left, Transpose
 | ||
|  |                     t-g1, n-k2, nv, m, m, | ||
|  |                     V,                          // F (g1:t-1, k1:k1+nv-1)
 | ||
|  |                     &Tau [k1],                  // Tau (k1:k2-1)
 | ||
|  |                     &F [INDEX (g1,k2,m)],       // F (g1:t-1, k2:n-1)
 | ||
|  |                     W, cc) ;                    // size nv*nv + nv*(n-k2)
 | ||
|  |                 nv = 0 ;        // clear queued-up Householder reflections
 | ||
|  |                 vzeros = 0 ; | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         // ---------------------------------------------------------------------
 | ||
|  |         // determine the rank of the pivot columns
 | ||
|  |         // ---------------------------------------------------------------------
 | ||
|  | 
 | ||
|  |         if (k == npiv-1) | ||
|  |         { | ||
|  |             // the rank is the number of good columns found in the first
 | ||
|  |             // npiv columns.  It is also the number of rows in the R block.
 | ||
|  |             // F (rank,npiv) is first entry in the C block.
 | ||
|  |             rank = g ; | ||
|  |             PR (("rank of Front pivcols: %ld\n", rank)) ; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     if (CHECK_BLAS_INT && !cc->blas_ok) | ||
|  |     { | ||
|  |     	  // This cannot occur if the BLAS_INT and the Int are the same integer.
 | ||
|  |         // In that case, CHECK_BLAS_INT is FALSE at compile-time, and the
 | ||
|  |         // compiler will then remove this as dead code.
 | ||
|  |         ERROR (CHOLMOD_INVALID, "problem too large for the BLAS") ; | ||
|  |         return (0) ; | ||
|  |     } | ||
|  | 
 | ||
|  |     return (rank) ; | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | // =============================================================================
 | ||
|  | 
 | ||
|  | template Int spqr_front <double> | ||
|  | ( | ||
|  |     // input, not modified
 | ||
|  |     Int m,              // F is m-by-n with leading dimension m
 | ||
|  |     Int n, | ||
|  |     Int npiv,           // number of pivot columns
 | ||
|  |     double tol,         // a column is flagged as dead if its norm is <= tol
 | ||
|  |     Int ntol,           // apply tol only to first ntol pivot columns
 | ||
|  |     Int fchunk,         // block size for compact WY Householder reflections,
 | ||
|  |                         // treated as 1 if fchunk <= 1 (in which case the
 | ||
|  |                         // unblocked code is used).
 | ||
|  | 
 | ||
|  |     // input/output
 | ||
|  |     double *F,          // frontal matrix F of size m-by-n
 | ||
|  |     Int *Stair,         // size n, entries F (Stair[k]:m-1, k) are all zero,
 | ||
|  |                         // and remain zero on output.
 | ||
|  |     char *Rdead,        // size npiv; all zero on input.  If k is dead,
 | ||
|  |                         // Rdead [k] is set to 1
 | ||
|  | 
 | ||
|  |     // output, not defined on input
 | ||
|  |     double *Tau,        // size n, Householder coefficients
 | ||
|  | 
 | ||
|  |     // workspace, undefined on input and output
 | ||
|  |     double *W,          // size b*n, where b = min (fchunk,n,m)
 | ||
|  | 
 | ||
|  |     // input/output
 | ||
|  |     double *wscale, | ||
|  |     double *wssq, | ||
|  | 
 | ||
|  |     cholmod_common *cc  // for cc->hypotenuse function
 | ||
|  | ) ; | ||
|  | 
 | ||
|  | // =============================================================================
 | ||
|  | 
 | ||
|  | template Int spqr_front <Complex> | ||
|  | ( | ||
|  |     // input, not modified
 | ||
|  |     Int m,              // F is m-by-n with leading dimension m
 | ||
|  |     Int n, | ||
|  |     Int npiv,           // number of pivot columns
 | ||
|  |     double tol,         // a column is flagged as dead if its norm is <= tol
 | ||
|  |     Int ntol,           // apply tol only to first ntol pivot columns
 | ||
|  |     Int fchunk,         // block size for compact WY Householder reflections,
 | ||
|  |                         // treated as 1 if fchunk <= 1 (in which case the
 | ||
|  |                         // unblocked code is used). 
 | ||
|  | 
 | ||
|  |     // input/output
 | ||
|  |     Complex *F,         // frontal matrix F of size m-by-n
 | ||
|  |     Int *Stair,         // size n, entries F (Stair[k]:m-1, k) are all zero,
 | ||
|  |                         // and remain zero on output.
 | ||
|  |     char *Rdead,        // size npiv; all zero on input.  If k is dead,
 | ||
|  |                         // Rdead [k] is set to 1
 | ||
|  | 
 | ||
|  |     // output, not defined on input
 | ||
|  |     Complex *Tau,       // size n, Householder coefficients
 | ||
|  | 
 | ||
|  |     // workspace, undefined on input and output
 | ||
|  |     Complex *W,         // size b*n, where b = min (fchunk,n,m)
 | ||
|  | 
 | ||
|  |     // input/output
 | ||
|  |     double *wscale, | ||
|  |     double *wssq, | ||
|  | 
 | ||
|  |     cholmod_common *cc  // for cc->hypotenuse function
 | ||
|  | ) ; |