BLAS level 1 style axpy calls in Vector and VectorConfig shave some seconds off PCG

2010-01-30 02:04:37 +00:00 · 2010-01-30 02:04:37 +00:00 · 4913326c22
parent 65bc90bf15
commit 4913326c22
8 changed files with 135 additions and 45 deletions
--- a/cpp/Vector.cpp
+++ b/cpp/Vector.cpp
@ -228,6 +228,24 @@ namespace gtsam {
  	return *(std::max_element(a.begin(), a.end()));
 	}

+  /* ************************************************************************* */
+  double dot(const Vector& a, const Vector& b) {
+  	size_t n = a.size();
+		assert (b.size()==n);
+		double result = 0.0;
+  	for (size_t i = 0; i < n; i++)
+  		result += a[i] * b[i];
+  	return result;
+  }
+
+  /* ************************************************************************* */
+  void axpy(double alpha, const Vector& x, Vector& y) {
+  	size_t n = x.size();
+		assert (y.size()==n);
+  	for (size_t i = 0; i < n; i++)
+  		y[i] += alpha * x[i];
+  }
+
  /* ************************************************************************* */
  Vector operator/(double s, const Vector& v) {
    Vector result(v.size());
--- a/cpp/Vector.h
+++ b/cpp/Vector.h
@ -199,7 +199,12 @@ Vector abs(const Vector& v);
 double max(const Vector &a);

 /** Dot product */
-inline double dot(const Vector &a, const Vector& b) { return sum(emul(a,b)); }
+double dot(const Vector &a, const Vector& b);
+
+/**
+ * BLAS Level 1 axpy: y <- alpha*x + y
+ */
+void axpy(double alpha, const Vector& x, Vector& y);

 /**
 * Divide every element of a Vector into a scalar
--- a/cpp/VectorConfig.cpp
+++ b/cpp/VectorConfig.cpp
@ -191,6 +191,14 @@ double VectorConfig::dot(const VectorConfig& b) const {
 double dot(const VectorConfig& a, const VectorConfig& b) {
 	return a.dot(b);
 }
+
+/* ************************************************************************* */
+void axpy(double alpha, const VectorConfig& x, VectorConfig& y) {
+	VectorConfig::const_iterator xj = x.begin();
+	for (VectorConfig::iterator yj = y.begin(); yj != y.end(); yj++, xj++)
+		axpy(alpha, xj->second, yj->second);
+}
+
 /* ************************************************************************* */

 void print(const VectorConfig& v, const std::string& s){
--- a/cpp/VectorConfig.h
+++ b/cpp/VectorConfig.h
@ -50,6 +50,8 @@ namespace gtsam {

    const_iterator begin() const {return values.begin();}
    const_iterator end()   const {return values.end();}
+    iterator begin() {return values.begin();}
+    iterator end()   {return values.end();}

    /** get a vector in the configuration by name */
    const Vector& get(const Symbol& name) const;
@ -135,6 +137,14 @@ namespace gtsam {
  /** Dot product */
  double dot(const VectorConfig&, const VectorConfig&);

+  /**
+   * BLAS Level 1 axpy: y <- alpha*x + y
+   * UNSAFE !!!! Only works if x and y laid out in exactly same shape
+   * Used in internal loop in iterative for fast conjugate gradients
+   * Consider using other functions if this is not in hotspot
+   */
+  void axpy(double alpha, const VectorConfig& x, VectorConfig& y);
+
  /** dim function (for iterative::CGD) */
  inline double dim(const VectorConfig& x) { return x.dim();}

--- a/cpp/iterative-inl.h
+++ b/cpp/iterative-inl.h
@ -28,7 +28,7 @@ namespace gtsam {
 		// Start with g0 = A'*(A*x0-b), d0 = - g0
 		// i.e., first step is in direction of negative gradient
 		V g = Ab.gradient(x);
-		V d = -g;
+		V d = g; // instead of negating gradient, alpha will be negated
 		double dotg0 = dot(g, g), prev_dotg = dotg0;
 		if (dotg0 < epsilon_abs) return x;
 		double threshold = epsilon * epsilon * dotg0;
@ -45,13 +45,14 @@ namespace gtsam {
 			double alpha = -dot(d, g) / dot(Ad, Ad);

 			// do step in new search direction
-			x = x + alpha * d;
+			x += alpha * d;
 			if (k==maxIterations) break;

 			// update gradient (or re-calculate at reset time)
-			g = (k%reset==0) ? Ab.gradient(x) : g + (Ab ^ Ad) * alpha;
-//			 g = g + alpha * (Ab ^ Ad);
-//			 g = Ab.gradient(x);
+			if (k%reset==0)
+				g = Ab.gradient(x);
+			else
+				axpy(alpha, Ab ^ Ad, g);

 			// check for convergence
 			double dotg = dot(g, g);
@ -61,11 +62,11 @@ namespace gtsam {

 			// calculate new search direction
 			if (steepest)
-				d = -g;
+				d = g;
 			else {
 				double beta = dotg / prev_dotg;
 				prev_dotg = dotg;
-				d = -g + beta * d;
+				d = g + d*beta;
 			}
 		}
 		return x;
--- a/cpp/testVector.cpp
+++ b/cpp/testVector.cpp
@ -200,14 +200,32 @@ TEST( TestVector, weightedPseudoinverse_nan )
 /* ************************************************************************* */
 TEST( TestVector, ediv )
 {
-  Vector a(3); a(0) = 10; a(1) = 20; a(2) = 30;
-  Vector b(3); b(0) =  2; b(1) = 5;  b(2) =  6;
+  Vector a = Vector_(3,10.,20.,30.);
+  Vector b = Vector_(3,2.0,5.0,6.0);
  Vector actual(ediv(a,b));

-  Vector c(3); c(0) =  5; c(1) = 4;  c(2) =  5;
+  Vector c = Vector_(3,5.0,4.0,5.0);
  CHECK(assert_equal(c,actual));
 }

+/* ************************************************************************* */
+TEST( TestVector, dot )
+{
+  Vector a = Vector_(3,10.,20.,30.);
+  Vector b = Vector_(3,2.0,5.0,6.0);
+  DOUBLES_EQUAL(20+100+180,dot(a,b),1e-9);
+}
+
+/* ************************************************************************* */
+TEST( TestVector, axpy )
+{
+  Vector x = Vector_(3,10.,20.,30.);
+  Vector y = Vector_(3,2.0,5.0,6.0);
+  axpy(0.1,x,y);
+  Vector expected = Vector_(3,3.0,7.0,9.0);
+  CHECK(assert_equal(expected,y));
+}
+
 /* ************************************************************************* */
 TEST( TestVector, equals )
 {
--- a/cpp/testVectorConfig.cpp
+++ b/cpp/testVectorConfig.cpp
@ -119,6 +119,19 @@ TEST( VectorConfig, scale) {
 	CHECK(assert_equal(actual, expected));
 }

+/* ************************************************************************* */
+TEST( VectorConfig, axpy) {
+  VectorConfig x,y,expected;
+  x += VectorConfig("x",Vector_(3, 1.0, 1.0, 1.0));
+  x += VectorConfig("y",Vector_(2, -1.0, -1.0));
+  y += VectorConfig("x",Vector_(3, 5.0, 6.0, 7.0));
+  y += VectorConfig("y",Vector_(2, 8.0, 9.0));
+  expected += VectorConfig("x",Vector_(3, 15.0, 16.0, 17.0));
+  expected += VectorConfig("y",Vector_(2, -2.0, -1.0));
+  axpy(10,x,y);
+  CHECK(assert_equal(expected,y));
+}
+
 /* ************************************************************************* */
 TEST( VectorConfig, update_with_large_delta) {
 	// this test ensures that if the update for delta is larger than
--- a/cpp/timeVectorConfig.cpp
+++ b/cpp/timeVectorConfig.cpp
@ -11,47 +11,64 @@
 using namespace std;
 using namespace gtsam;

-/*
- * Results:
- * Frank's machine:
- */
-double timeAssignment(size_t n, size_t m, size_t r) {
-	// assign a large VectorConfig
-	// n =  number of times to loop
-	// m =  number of vectors
-	// r =  rows per vector
+#define TIME(STATEMENT) { boost::timer t; \
+		for (int j = 0; j < n; ++j) STATEMENT; \
+		double time = t.elapsed(); \
+		cout << "Average elapsed time :" << 10e3 * time / n << "ms." << endl; }

-	// Create 2 VectorConfigs
-	VectorConfig a, b;
-	for (int i = 0; i < m; ++i) {
-		Vector v = zero(r);
-		Symbol key('x', i);
-		a.add(key, v);
-		b.add(key, v);
-	}
-
-	// start timing
-	double elapsed;
-	{
-		boost::timer t;
-
-		for (int j = 0; j < n; ++j)
-			a = b;
-
-		elapsed = t.elapsed();
-	}
-
-	return elapsed;
+/* ************************************************************************* */
+void unsafe_assign(VectorConfig& a, const VectorConfig& b) {
+	VectorConfig::const_iterator bp = b.begin();
+	for (VectorConfig::iterator ap = a.begin(); ap != a.end(); ap++, bp++)
+		ap->second = bp->second;
 }

+/* ************************************************************************* */
+void unsafe_add(VectorConfig& a, const VectorConfig& b) {
+	VectorConfig::const_iterator bp = b.begin();
+	for (VectorConfig::iterator ap = a.begin(); ap != a.end(); ap++, bp++)
+		ap->second += bp->second;
+}
+
+/* ************************************************************************* */
 int main(int argc, char ** argv) {

 	// Time assignment operator
 	cout << "Starting operator=() Timing" << endl;
-	size_t n = 1000, m = 10000, r = 3;
-	double time = timeAssignment(n, m, r);
-	cout << "Average Elapsed time for assigning " << m << "*" << r
-			<< " VectorConfigs: " << 10e3 * time / n << "ms." << endl;
+
+	// n =  number of times to loop
+	// m =  number of vectors
+	// r =  rows per vector
+	size_t n = 100, m = 10000, r = 3, alpha = 0.1;
+
+	// Create 2 VectorConfigs
+	VectorConfig x, y;
+	for (int i = 0; i < m; ++i) {
+		Vector v = zero(r);
+		Symbol key('x', i);
+		x.add(key, v);
+		y.add(key, v);
+	}
+
+	cout << "Convenient VectorConfig:" << endl;
+	TIME(x=y);
+	TIME(x+=y);
+	TIME(x+=alpha*y);
+	//TIME(a=a+b);
+
+	cout << "Unsafe VectorConfig:" << endl;
+	TIME(unsafe_assign(x,y));
+	TIME(unsafe_add(x,y));
+	TIME(axpy(alpha,x,y));
+
+	cout << "Compares with Vector:" << endl;
+	Vector v = zero(m * r), w = zero(m * r);
+	TIME(v=w);
+	TIME(v+=w);
+	TIME(v+=alpha*w);
+	TIME(axpy(alpha,v,w));
+	//	TIME(v=v+w);

 	return 0;
 }
+/* ************************************************************************* */