Reimplemented matrix::householder_update using GSL and BLAS, you'll need to edit Makefile.am to enable the new version -- see email

release/4.3a0
Alex Cunningham 2010-01-21 00:59:33 +00:00
parent 188561d925
commit 063aa14118
4 changed files with 102 additions and 59 deletions

View File

@ -23,6 +23,26 @@ AC_ARG_ENABLE([debug],
*) AC_MSG_ERROR([bad value ${enableval} for --enable-debug]) ;; *) AC_MSG_ERROR([bad value ${enableval} for --enable-debug]) ;;
esac],[debug=false]) esac],[debug=false])
AM_CONDITIONAL([DEBUG], [test x$debug = xtrue]) AM_CONDITIONAL([DEBUG], [test x$debug = xtrue])
# enable using GSL for linalg
AC_ARG_ENABLE([gsl],
[ --enable-gsl Enable the GSL library],
[case "${enableval}" in
yes) gsl=true ;;
no) gsl=false ;;
*) AC_MSG_ERROR([bad value ${enableval} for --enable-gsl]) ;;
esac],[gsl=false])
AM_CONDITIONAL([GSL], [test x$gsl = xtrue])
# enable using ATLAS for BLAS
AC_ARG_ENABLE([atlas],
[ --enable-atlas Enable ATLAS optimized BLAS],
[case "${enableval}" in
yes) atlas=true ;;
no) atlas=false ;;
*) AC_MSG_ERROR([bad value ${enableval} for --enable-atlas]) ;;
esac],[atlas=false])
AM_CONDITIONAL([ATLAS], [test x$atlas = xtrue])
# Checks for programs. # Checks for programs.
AC_PROG_CXX AC_PROG_CXX

View File

@ -266,6 +266,26 @@ include_HEADERS = $(headers)
AM_CXXFLAGS += -I.. AM_CXXFLAGS += -I..
AM_LDFLAGS = -L../CppUnitLite -lCppUnitLite $(BOOST_LDFLAGS) $(boost_serialization) #-L. -lgtsam AM_LDFLAGS = -L../CppUnitLite -lCppUnitLite $(BOOST_LDFLAGS) $(boost_serialization) #-L. -lgtsam
# TO ENABLE GSL AND ATLAS, uncomment this part!
# Direct call for gsl and atlas
# AM_LDFLAGS += -lgsl -lcblas -latlas
# libgtsam_la_LDFLAGS += -lgsl -lcblas -latlas
# AM_CXXFLAGS += -DGSL
# libgtsam_la_CPPFLAGS += -DGSL
# GSL using ATLAS
# if GSL
# AM_CXXFLAGS += -DGSL
# libgtsam_la_CPPFLAGS += -DGSL
# if ATLAS
# AM_LDFLAGS += -lgsl -lcblas -latlas
# libgtsam_la_LDFLAGS += -lgsl -lcblas -latlas
# else
# AM_LDFLAGS += -lgsl -lgslcblas
# libgtsam_la_LDFLAGS += -lgsl -lgslcblas
# endif
# endif
TESTS = $(check_PROGRAMS) TESTS = $(check_PROGRAMS)
CXXLINK = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ CXXLINK = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
$(CXXLD) -g $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ $(CXXLD) -g $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \

View File

@ -9,6 +9,11 @@
#include <iomanip> #include <iomanip>
#include <list> #include <list>
#ifdef GSL
#include <gsl/gsl_blas.h> // needed for gsl blas
#include <gsl/gsl_linalg.h>
#endif
#include <boost/numeric/ublas/matrix_proxy.hpp> #include <boost/numeric/ublas/matrix_proxy.hpp>
#include <boost/foreach.hpp> #include <boost/foreach.hpp>
#include <boost/numeric/ublas/lu.hpp> #include <boost/numeric/ublas/lu.hpp>
@ -19,6 +24,7 @@
#include "Vector.h" #include "Vector.h"
#include "svdcmp.h" #include "svdcmp.h"
using namespace std; using namespace std;
namespace ublas = boost::numeric::ublas; namespace ublas = boost::numeric::ublas;
@ -276,6 +282,14 @@ void householder_update(Matrix &A, int j, double beta, const Vector& vjm) {
const size_t m = A.size1(), n = A.size2(); const size_t m = A.size1(), n = A.size2();
#ifdef GSL
// use GSL version
gsl_vector_const_view v = gsl_vector_const_view_array(vjm.data().begin(), m-j);
gsl_matrix_view Ag = gsl_matrix_view_array(A.data().begin(), m, n);
gsl_matrix_view Ag_view = gsl_matrix_submatrix (&(Ag.matrix), j, 0, m-j, n);
gsl_linalg_householder_hm (beta, &(v.vector), &(Ag_view.matrix));
#else
// elegant but slow: A -= Matrix(outer_prod(v,beta*trans(A)*v)); // elegant but slow: A -= Matrix(outer_prod(v,beta*trans(A)*v));
// faster code below // faster code below
@ -299,6 +313,7 @@ void householder_update(Matrix &A, int j, double beta, const Vector& vjm) {
// A(r,c) -= vjm(r-j) * wjn(c-j); // A(r,c) -= vjm(r-j) * wjn(c-j);
(*a) -= v[s] * wc; (*a) -= v[s] * wc;
} }
#endif
} }
/* ************************************************************************* */ /* ************************************************************************* */
@ -379,29 +394,30 @@ weighted_eliminate(Matrix& A, Vector& b, const Vector& sigmas) {
/* ************************************************************************* */ /* ************************************************************************* */
void householder_(Matrix &A, size_t k) void householder_(Matrix &A, size_t k)
{ {
const size_t m = A.size1(), n = A.size2(), kprime = min(k,min(m,n)); const size_t m = A.size1(), n = A.size2(), kprime = min(k,min(m,n));
// loop over the kprime first columns
for(size_t j=0; j < kprime; j++){
// below, the indices r,c always refer to original A
// copy column from matrix to xjm, i.e. x(j:m) = A(j:m,j) // Original version
Vector xjm(m-j); // loop over the kprime first columns
for(size_t r = j ; r < m; r++) for(size_t j=0; j < kprime; j++){
xjm(r-j) = A(r,j); // below, the indices r,c always refer to original A
// calculate the Householder vector
double beta; Vector vjm;
boost::tie(beta,vjm) = house(xjm);
// do outer product update A = (I-beta vv')*A = A - v*(beta*A'*v)' = A - v*w' // copy column from matrix to xjm, i.e. x(j:m) = A(j:m,j)
householder_update(A, j, beta, vjm) ; Vector xjm(m-j);
for(size_t r = j ; r < m; r++)
xjm(r-j) = A(r,j);
// the Householder vector is copied in the zeroed out part // calculate the Householder vector
for( size_t r = j+1 ; r < m ; r++ ) double beta; Vector vjm;
A(r,j) = vjm(r-j); boost::tie(beta,vjm) = house(xjm);
} // column j // do outer product update A = (I-beta vv')*A = A - v*(beta*A'*v)' = A - v*w'
householder_update(A, j, beta, vjm) ;
// the Householder vector is copied in the zeroed out part
for( size_t r = j+1 ; r < m ; r++ )
A(r,j) = vjm(r-j);
} // column j
} }
/* ************************************************************************* */ /* ************************************************************************* */

View File

@ -156,6 +156,8 @@ double timeColumn(size_t reps) {
* Alex's machine * Alex's machine
* Baseline (no householder, just matrix copy) : 0.05 sec * Baseline (no householder, just matrix copy) : 0.05 sec
* Initial : 8.20 sec * Initial : 8.20 sec
* All in one function : 7.89 sec
* Replace householder update with GSL, ATLAS : 0.92 sec
*/ */
double timeHouseholder(size_t reps) { double timeHouseholder(size_t reps) {
// create a matrix // create a matrix
@ -178,48 +180,33 @@ double timeHouseholder(size_t reps) {
return elapsed; return elapsed;
} }
//double data[] = {-5, 0, 5, 0, 0, 0, -1,
// 00, -5, 0, 5, 0, 0, 1.5,
// 10, 0, 0, 0,-10, 0, 2,
// 00, 10, 0, 0, 0,-10, -1};
//
//// check in-place householder, with v vectors below diagonal
//double data1[] = {
// 11.1803, 0, -2.2361, 0, -8.9443, 0, 2.236,
// 0, 11.1803, 0, -2.2361, 0, -8.9443, -1.565,
// -0.618034, 0, 4.4721, 0, -4.4721, 0, 0,
// 0, -0.618034, 0, 4.4721, 0, -4.4721, 0.894 };
//Matrix expected1 = Matrix_(4,7, data1);
//Matrix A1 = Matrix_(4, 7, data);
//householder_(A1,3);
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
// Time collect() // // Time collect()
cout << "Starting Matrix::collect() Timing" << endl; // cout << "Starting Matrix::collect() Timing" << endl;
//size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50; // //size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50;
size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000; // size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000;
double collect_time1 = timeCollect(p, m, n, false, reps); // double collect_time1 = timeCollect(p, m, n, false, reps);
double collect_time2 = timeCollect(p, m, n, true, reps); // double collect_time2 = timeCollect(p, m, n, true, reps);
cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl; // cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl;
cout << "Average Elapsed time for collect (pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl; // cout << "Average Elapsed time for collect (pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl;
//
// Time vector_scale_column // // Time vector_scale_column
cout << "Starting Matrix::vector_scale(column) Timing" << endl; // cout << "Starting Matrix::vector_scale(column) Timing" << endl;
size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000; // size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000;
double vsColumn_time = timeVScaleColumn(m1, n1, reps1); // double vsColumn_time = timeVScaleColumn(m1, n1, reps1);
cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl; // cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl;
//
// Time vector_scale_row // // Time vector_scale_row
cout << "Starting Matrix::vector_scale(row) Timing" << endl; // cout << "Starting Matrix::vector_scale(row) Timing" << endl;
double vsRow_time = timeVScaleRow(m1, n1, reps1); // double vsRow_time = timeVScaleRow(m1, n1, reps1);
cout << "Elapsed time for vector_scale(row) [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl; // cout << "Elapsed time for vector_scale(row) [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl;
//
// Time column_() NOTE: using the ublas version // // Time column_() NOTE: using the ublas version
cout << "Starting column_() Timing" << endl; // cout << "Starting column_() Timing" << endl;
size_t reps2 = 200000; // size_t reps2 = 200000;
double column_time = timeColumn(reps2); // double column_time = timeColumn(reps2);
cout << "Time: " << column_time << " sec" << endl; // cout << "Time: " << column_time << " sec" << endl;
// Time householder_ function // Time householder_ function
cout << "Starting householder_() Timing" << endl; cout << "Starting householder_() Timing" << endl;