From 063aa1411813319a939d49c4428e7a2e36db7813 Mon Sep 17 00:00:00 2001 From: Alex Cunningham Date: Thu, 21 Jan 2010 00:59:33 +0000 Subject: [PATCH] Reimplemented matrix::householder_update using GSL and BLAS, you'll need to edit Makefile.am to enable the new version -- see email --- configure.ac | 20 ++++++++++++++ cpp/Makefile.am | 20 ++++++++++++++ cpp/Matrix.cpp | 54 ++++++++++++++++++++++++------------- cpp/timeMatrix.cpp | 67 +++++++++++++++++++--------------------------- 4 files changed, 102 insertions(+), 59 deletions(-) diff --git a/configure.ac b/configure.ac index 6c7007327..0b44fa9e9 100644 --- a/configure.ac +++ b/configure.ac @@ -23,6 +23,26 @@ AC_ARG_ENABLE([debug], *) AC_MSG_ERROR([bad value ${enableval} for --enable-debug]) ;; esac],[debug=false]) AM_CONDITIONAL([DEBUG], [test x$debug = xtrue]) + +# enable using GSL for linalg +AC_ARG_ENABLE([gsl], + [ --enable-gsl Enable the GSL library], + [case "${enableval}" in + yes) gsl=true ;; + no) gsl=false ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable-gsl]) ;; + esac],[gsl=false]) + AM_CONDITIONAL([GSL], [test x$gsl = xtrue]) + +# enable using ATLAS for BLAS +AC_ARG_ENABLE([atlas], + [ --enable-atlas Enable ATLAS optimized BLAS], + [case "${enableval}" in + yes) atlas=true ;; + no) atlas=false ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable-atlas]) ;; + esac],[atlas=false]) + AM_CONDITIONAL([ATLAS], [test x$atlas = xtrue]) # Checks for programs. AC_PROG_CXX diff --git a/cpp/Makefile.am b/cpp/Makefile.am index d968eb1bf..a772ea3e4 100644 --- a/cpp/Makefile.am +++ b/cpp/Makefile.am @@ -266,6 +266,26 @@ include_HEADERS = $(headers) AM_CXXFLAGS += -I.. AM_LDFLAGS = -L../CppUnitLite -lCppUnitLite $(BOOST_LDFLAGS) $(boost_serialization) #-L. -lgtsam +# TO ENABLE GSL AND ATLAS, uncomment this part! +# Direct call for gsl and atlas +# AM_LDFLAGS += -lgsl -lcblas -latlas +# libgtsam_la_LDFLAGS += -lgsl -lcblas -latlas +# AM_CXXFLAGS += -DGSL +# libgtsam_la_CPPFLAGS += -DGSL + +# GSL using ATLAS +# if GSL +# AM_CXXFLAGS += -DGSL +# libgtsam_la_CPPFLAGS += -DGSL +# if ATLAS +# AM_LDFLAGS += -lgsl -lcblas -latlas +# libgtsam_la_LDFLAGS += -lgsl -lcblas -latlas +# else +# AM_LDFLAGS += -lgsl -lgslcblas +# libgtsam_la_LDFLAGS += -lgsl -lgslcblas +# endif +# endif + TESTS = $(check_PROGRAMS) CXXLINK = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CXXLD) -g $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ diff --git a/cpp/Matrix.cpp b/cpp/Matrix.cpp index d2358d0f3..2ec6d9d32 100644 --- a/cpp/Matrix.cpp +++ b/cpp/Matrix.cpp @@ -9,6 +9,11 @@ #include #include +#ifdef GSL +#include // needed for gsl blas +#include +#endif + #include #include #include @@ -19,6 +24,7 @@ #include "Vector.h" #include "svdcmp.h" + using namespace std; namespace ublas = boost::numeric::ublas; @@ -276,6 +282,14 @@ void householder_update(Matrix &A, int j, double beta, const Vector& vjm) { const size_t m = A.size1(), n = A.size2(); +#ifdef GSL + // use GSL version + gsl_vector_const_view v = gsl_vector_const_view_array(vjm.data().begin(), m-j); + gsl_matrix_view Ag = gsl_matrix_view_array(A.data().begin(), m, n); + gsl_matrix_view Ag_view = gsl_matrix_submatrix (&(Ag.matrix), j, 0, m-j, n); + gsl_linalg_householder_hm (beta, &(v.vector), &(Ag_view.matrix)); + +#else // elegant but slow: A -= Matrix(outer_prod(v,beta*trans(A)*v)); // faster code below @@ -299,6 +313,7 @@ void householder_update(Matrix &A, int j, double beta, const Vector& vjm) { // A(r,c) -= vjm(r-j) * wjn(c-j); (*a) -= v[s] * wc; } +#endif } /* ************************************************************************* */ @@ -379,29 +394,30 @@ weighted_eliminate(Matrix& A, Vector& b, const Vector& sigmas) { /* ************************************************************************* */ void householder_(Matrix &A, size_t k) { - const size_t m = A.size1(), n = A.size2(), kprime = min(k,min(m,n)); - - // loop over the kprime first columns - for(size_t j=0; j < kprime; j++){ - // below, the indices r,c always refer to original A + const size_t m = A.size1(), n = A.size2(), kprime = min(k,min(m,n)); - // copy column from matrix to xjm, i.e. x(j:m) = A(j:m,j) - Vector xjm(m-j); - for(size_t r = j ; r < m; r++) - xjm(r-j) = A(r,j); - - // calculate the Householder vector - double beta; Vector vjm; - boost::tie(beta,vjm) = house(xjm); + // Original version + // loop over the kprime first columns + for(size_t j=0; j < kprime; j++){ + // below, the indices r,c always refer to original A - // do outer product update A = (I-beta vv')*A = A - v*(beta*A'*v)' = A - v*w' - householder_update(A, j, beta, vjm) ; + // copy column from matrix to xjm, i.e. x(j:m) = A(j:m,j) + Vector xjm(m-j); + for(size_t r = j ; r < m; r++) + xjm(r-j) = A(r,j); - // the Householder vector is copied in the zeroed out part - for( size_t r = j+1 ; r < m ; r++ ) - A(r,j) = vjm(r-j); + // calculate the Householder vector + double beta; Vector vjm; + boost::tie(beta,vjm) = house(xjm); - } // column j + // do outer product update A = (I-beta vv')*A = A - v*(beta*A'*v)' = A - v*w' + householder_update(A, j, beta, vjm) ; + + // the Householder vector is copied in the zeroed out part + for( size_t r = j+1 ; r < m ; r++ ) + A(r,j) = vjm(r-j); + + } // column j } /* ************************************************************************* */ diff --git a/cpp/timeMatrix.cpp b/cpp/timeMatrix.cpp index 23ea119e9..7ac4f3216 100644 --- a/cpp/timeMatrix.cpp +++ b/cpp/timeMatrix.cpp @@ -156,6 +156,8 @@ double timeColumn(size_t reps) { * Alex's machine * Baseline (no householder, just matrix copy) : 0.05 sec * Initial : 8.20 sec + * All in one function : 7.89 sec + * Replace householder update with GSL, ATLAS : 0.92 sec */ double timeHouseholder(size_t reps) { // create a matrix @@ -178,48 +180,33 @@ double timeHouseholder(size_t reps) { return elapsed; } -//double data[] = {-5, 0, 5, 0, 0, 0, -1, -// 00, -5, 0, 5, 0, 0, 1.5, -// 10, 0, 0, 0,-10, 0, 2, -// 00, 10, 0, 0, 0,-10, -1}; -// -//// check in-place householder, with v vectors below diagonal -//double data1[] = { -// 11.1803, 0, -2.2361, 0, -8.9443, 0, 2.236, -// 0, 11.1803, 0, -2.2361, 0, -8.9443, -1.565, -// -0.618034, 0, 4.4721, 0, -4.4721, 0, 0, -// 0, -0.618034, 0, 4.4721, 0, -4.4721, 0.894 }; -//Matrix expected1 = Matrix_(4,7, data1); -//Matrix A1 = Matrix_(4, 7, data); -//householder_(A1,3); - int main(int argc, char ** argv) { - // Time collect() - cout << "Starting Matrix::collect() Timing" << endl; - //size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50; - size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000; - double collect_time1 = timeCollect(p, m, n, false, reps); - double collect_time2 = timeCollect(p, m, n, true, reps); - cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl; - cout << "Average Elapsed time for collect (pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl; - - // Time vector_scale_column - cout << "Starting Matrix::vector_scale(column) Timing" << endl; - size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000; - double vsColumn_time = timeVScaleColumn(m1, n1, reps1); - cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl; - - // Time vector_scale_row - cout << "Starting Matrix::vector_scale(row) Timing" << endl; - double vsRow_time = timeVScaleRow(m1, n1, reps1); - cout << "Elapsed time for vector_scale(row) [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl; - - // Time column_() NOTE: using the ublas version - cout << "Starting column_() Timing" << endl; - size_t reps2 = 200000; - double column_time = timeColumn(reps2); - cout << "Time: " << column_time << " sec" << endl; +// // Time collect() +// cout << "Starting Matrix::collect() Timing" << endl; +// //size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50; +// size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000; +// double collect_time1 = timeCollect(p, m, n, false, reps); +// double collect_time2 = timeCollect(p, m, n, true, reps); +// cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl; +// cout << "Average Elapsed time for collect (pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl; +// +// // Time vector_scale_column +// cout << "Starting Matrix::vector_scale(column) Timing" << endl; +// size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000; +// double vsColumn_time = timeVScaleColumn(m1, n1, reps1); +// cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl; +// +// // Time vector_scale_row +// cout << "Starting Matrix::vector_scale(row) Timing" << endl; +// double vsRow_time = timeVScaleRow(m1, n1, reps1); +// cout << "Elapsed time for vector_scale(row) [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl; +// +// // Time column_() NOTE: using the ublas version +// cout << "Starting column_() Timing" << endl; +// size_t reps2 = 200000; +// double column_time = timeColumn(reps2); +// cout << "Time: " << column_time << " sec" << endl; // Time householder_ function cout << "Starting householder_() Timing" << endl;