278 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			C++
		
	
	
		
		
			
		
	
	
			278 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			C++
		
	
	
|  | /* ----------------------------------------------------------------------------
 | ||
|  | 
 | ||
|  |  * GTSAM Copyright 2010, Georgia Tech Research Corporation,  | ||
|  |  * Atlanta, Georgia 30332-0415 | ||
|  |  * All Rights Reserved | ||
|  |  * Authors: Frank Dellaert, et al. (see THANKS for the full author list) | ||
|  | 
 | ||
|  |  * See LICENSE for the license information | ||
|  | 
 | ||
|  |  * -------------------------------------------------------------------------- */ | ||
|  | 
 | ||
|  | /*
 | ||
|  |  * @file timeMatrix.cpp | ||
|  |  * @brief Performs timing and profiling for Matrix operations | ||
|  |  * @author Alex Cunningham | ||
|  |  */ | ||
|  | 
 | ||
|  | #include <iostream>
 | ||
|  | #include <boost/timer.hpp>
 | ||
|  | #include <gtsam/base/Matrix.h>
 | ||
|  | 
 | ||
|  | using namespace std; | ||
|  | using namespace gtsam; | ||
|  | 
 | ||
|  | /*
 | ||
|  |  * Results: | ||
|  |  * Alex's Machine: | ||
|  |  * (using p = 100000 m = 10 n = 12 reps = 50) - Average times | ||
|  |  *  - (1st pass of simple changes) no pass: 0.184  sec , pass: 0.181 sec | ||
|  |  *  - (1st rev memcpy)             no pass: 0.181  sec , pass: 0.180 sec | ||
|  |  *  - (1st rev matrix_range)       no pass: 0.186  sec , pass: 0.184 sec | ||
|  |  * (using p = 10 m = 10 n = 12 reps = 10000000) | ||
|  |  *  - (matrix_range version)       no pass: 24.21  sec , pass: 23.97 sec | ||
|  |  *  - (memcpy version)             no pass: 18.96  sec , pass: 18.39 sec | ||
|  |  *  - (original version)           no pass: 23.45  sec , pass: 22.80 sec | ||
|  |  *  - rev 2100                     no pass: 18.45  sec , pass: 18.35 sec | ||
|  |  */ | ||
|  | double timeCollect(size_t p, size_t m, size_t n, bool passDims, size_t reps) { | ||
|  |   // create a large number of matrices
 | ||
|  |   // p =  number of matrices
 | ||
|  |   // m =  rows per matrix
 | ||
|  |   // n =  columns per matrix
 | ||
|  |   // reps = number of repetitions
 | ||
|  | 
 | ||
|  |   // fill the matrices with identities
 | ||
|  |   vector<const Matrix *> matrices; | ||
|  |   for (size_t i=0; i<p;++i) { | ||
|  |     Matrix * M = new Matrix; | ||
|  |     (*M) = eye(m,n); | ||
|  |     matrices.push_back(M); | ||
|  |   } | ||
|  | 
 | ||
|  |   // start timing
 | ||
|  |   Matrix result; | ||
|  |   double elapsed; | ||
|  |   { | ||
|  |     boost::timer t; | ||
|  | 
 | ||
|  |     if (passDims) | ||
|  |       for (size_t i=0; i<reps; ++i) | ||
|  |         result = collect(matrices, m, n); | ||
|  |     else | ||
|  |       for (size_t i=0; i<reps; ++i) | ||
|  |         result = collect(matrices); | ||
|  |     elapsed = t.elapsed(); | ||
|  |   } | ||
|  |   // delete the matrices
 | ||
|  |   for (size_t i=0; i<p;++i) { | ||
|  |     delete matrices[i]; | ||
|  |   } | ||
|  | 
 | ||
|  |   return elapsed; | ||
|  |   //return elapsed/reps;
 | ||
|  | } | ||
|  | 
 | ||
|  | /*
 | ||
|  |  * Results: | ||
|  |  * Alex's Machine: | ||
|  |  *  - Original : 0.60 sec (x1000) | ||
|  |  *  - 1st Rev  : 0.49 sec (x1000) | ||
|  |  *  - rev 2100 : 0.52 sec (x1000) | ||
|  |  */ | ||
|  | double timeVScaleColumn(size_t m, size_t n, size_t reps) { | ||
|  |   // make a matrix to scale
 | ||
|  |   Matrix M(m, n); | ||
|  |   for (size_t i=0; i<m; ++i) | ||
|  |     for (size_t j=0; j<n; ++j) | ||
|  |       M(i,j) = 2*i+j; | ||
|  | 
 | ||
|  |   // make a vector to use for scaling
 | ||
|  |   Vector V(m); | ||
|  |   for (size_t i=0; i<m; ++i) | ||
|  |     V(i) = i*2; | ||
|  | 
 | ||
|  |   double elapsed; | ||
|  |   Matrix result; | ||
|  |   { | ||
|  |     boost::timer t; | ||
|  |     for (size_t i=0; i<reps; ++i) | ||
|  |       Matrix result = vector_scale(M,V); | ||
|  |     elapsed = t.elapsed(); | ||
|  |   } | ||
|  | 
 | ||
|  |   return elapsed; | ||
|  | } | ||
|  | 
 | ||
|  | /*
 | ||
|  |  * Results: | ||
|  |  * Alex's Machine: | ||
|  |  *  - Original : 0.54 sec (x1000) | ||
|  |  *  - 1st rev  : 0.44 sec (x1000) | ||
|  |  *  - rev 2100 : 1.69 sec (x1000) | ||
|  |  */ | ||
|  | double timeVScaleRow(size_t m, size_t n, size_t reps) { | ||
|  |   // make a matrix to scale
 | ||
|  |   Matrix M(m, n); | ||
|  |   for (size_t i=0; i<m; ++i) | ||
|  |     for (size_t j=0; j<n; ++j) | ||
|  |       M(i,j) = 2*i+j; | ||
|  | 
 | ||
|  |   // make a vector to use for scaling
 | ||
|  |   Vector V(n); | ||
|  |   for (size_t i=0; i<n; ++i) | ||
|  |     V(i) = i*2; | ||
|  | 
 | ||
|  |   double elapsed; | ||
|  |   Matrix result; | ||
|  |   { | ||
|  |     boost::timer t; | ||
|  |     for (size_t i=0; i<reps; ++i) | ||
|  |       result = vector_scale(V,M); | ||
|  |     elapsed = t.elapsed(); | ||
|  |   } | ||
|  | 
 | ||
|  |   return elapsed; | ||
|  | } | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * Results: | ||
|  |  * Alex's Machine (reps = 200000) | ||
|  |  *  - ublas matrix_column  : 4.63 sec | ||
|  |  *  - naive implementation : 4.70 sec | ||
|  |  * | ||
|  |  * reps = 2000000 | ||
|  |  *  - rev 2100             : 45.11 sec | ||
|  |  */ | ||
|  | double timeColumn(size_t reps) { | ||
|  |   // create a matrix
 | ||
|  |   size_t m = 100; size_t n = 100; | ||
|  |   Matrix M(m, n); | ||
|  |   for (size_t i=0; i<m; ++i) | ||
|  |       for (size_t j=0; j<n; ++j) | ||
|  |         M(i,j) = 2*i+j; | ||
|  | 
 | ||
|  |   // extract a column
 | ||
|  |   double elapsed; | ||
|  |   Vector result; | ||
|  |   { | ||
|  |     boost::timer t; | ||
|  |     for (size_t i=0; i<reps; ++i) | ||
|  |       for (size_t j = 0; j<n; ++j) | ||
|  |         //result = ublas::matrix_column<Matrix>(M, j);
 | ||
|  |         result = column(M, j); | ||
|  |     elapsed = t.elapsed(); | ||
|  |   } | ||
|  |   return elapsed; | ||
|  | } | ||
|  | 
 | ||
|  | /*
 | ||
|  |  * Results | ||
|  |  * Alex's machine | ||
|  |  * | ||
|  |  * Runs at reps = 500000 | ||
|  |  * Baseline (no householder, just matrix copy) : 0.05 sec | ||
|  |  * Initial                                     : 8.20 sec | ||
|  |  * All in one function                         : 7.89 sec | ||
|  |  * Replace householder update with GSL, ATLAS  : 0.92 sec | ||
|  |  * | ||
|  |  * Runs at reps = 2000000 | ||
|  |  * Baseline (GSL/ATLAS householder update)     : 3.61 sec | ||
|  |  * | ||
|  |  * Runs at reps = 5000000 | ||
|  |  * Baseline                                    : 8.76 sec | ||
|  |  * GSL/Atlas version of updateAb               : 9.03 sec // Why does this have an effect?
 | ||
|  |  * Inlining house()                            : 6.33 sec | ||
|  |  * Inlining householder_update [GSL]           : 6.15 sec | ||
|  |  * Rev 2100                                    : 5.75 sec | ||
|  |  * | ||
|  |  */ | ||
|  | double timeHouseholder(size_t reps) { | ||
|  |   // create a matrix
 | ||
|  |   Matrix Abase = (Matrix(4, 7) << | ||
|  |       -5,  0, 5, 0,  0,  0,  -1, | ||
|  |       00, -5, 0, 5,  0,  0, 1.5, | ||
|  |       10,  0, 0, 0,-10,  0,   2, | ||
|  |       00, 10, 0, 0,  0,-10,  -1); | ||
|  | 
 | ||
|  |   // perform timing
 | ||
|  |   double elapsed; | ||
|  |   { | ||
|  |     boost::timer t; | ||
|  |     for (size_t i=0; i<reps; ++i) { | ||
|  |       Matrix A = Abase; | ||
|  |       householder_(A,3); | ||
|  |     } | ||
|  |     elapsed = t.elapsed(); | ||
|  |   } | ||
|  |   return elapsed; | ||
|  | } | ||
|  | /**
 | ||
|  |  * Results: (Alex's machine) | ||
|  |  * reps: 200000 | ||
|  |  * | ||
|  |  * Initial (boost matrix proxies) - 12.08 | ||
|  |  * Direct pointer method          - 4.62 | ||
|  |  */ | ||
|  | double timeMatrixInsert(size_t reps) { | ||
|  |   // create a matrix
 | ||
|  |   Matrix bigBase = zeros(100, 100); | ||
|  |   Matrix small = eye(5,5); | ||
|  | 
 | ||
|  |   // perform timing
 | ||
|  |   double elapsed; | ||
|  |   { | ||
|  |     boost::timer t; | ||
|  |     Matrix big = bigBase; | ||
|  |     for (size_t rep=0; rep<reps; ++rep) | ||
|  |       for (size_t i=0; i<100; i += 5) | ||
|  |         for (size_t j=0; j<100; j += 5) | ||
|  |           insertSub(big, small, i,j); | ||
|  |     elapsed = t.elapsed(); | ||
|  |   } | ||
|  |   return elapsed; | ||
|  | } | ||
|  | 
 | ||
|  | int main(int argc, char ** argv) { | ||
|  | 
 | ||
|  |   // Time collect()
 | ||
|  |   cout << "Starting Matrix::collect() Timing" << endl; | ||
|  |   //size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50;
 | ||
|  |   size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000; | ||
|  |   double collect_time1 = timeCollect(p, m, n, false, reps); | ||
|  |   double collect_time2 = timeCollect(p, m, n, true, reps); | ||
|  |   cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl; | ||
|  |   cout << "Average Elapsed time for collect (pass)    [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl; | ||
|  | 
 | ||
|  |   // Time vector_scale_column
 | ||
|  |   cout << "Starting Matrix::vector_scale(column) Timing" << endl; | ||
|  |   size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000; | ||
|  |   double vsColumn_time = timeVScaleColumn(m1, n1, reps1); | ||
|  |   cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl; | ||
|  | 
 | ||
|  |   // Time vector_scale_row
 | ||
|  |   cout << "Starting Matrix::vector_scale(row)    Timing" << endl; | ||
|  |   double vsRow_time = timeVScaleRow(m1, n1, reps1); | ||
|  |   cout << "Elapsed time for vector_scale(row)    [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl; | ||
|  | 
 | ||
|  |   // Time column() NOTE: using the ublas version
 | ||
|  |   cout << "Starting column() Timing" << endl; | ||
|  |   size_t reps2 = 2000000; | ||
|  |   double column_time = timeColumn(reps2); | ||
|  |   cout << "Time: " << column_time << " sec" << endl; | ||
|  | 
 | ||
|  |   // Time householder_ function
 | ||
|  |   cout << "Starting householder_() Timing" << endl; | ||
|  |   size_t reps_house = 5000000; | ||
|  |   double house_time = timeHouseholder(reps_house); | ||
|  |   cout << "Elapsed time for householder_() : " << house_time << " sec" << endl; | ||
|  | 
 | ||
|  |   // Time matrix insertion
 | ||
|  |   cout << "Starting insertSub() Timing" << endl; | ||
|  |   size_t reps_insert = 200000; | ||
|  |   double insert_time = timeMatrixInsert(reps_insert); | ||
|  |   cout << "Elapsed time for insertSub() : " << insert_time << " sec" << endl; | ||
|  | 
 | ||
|  |   return 0; | ||
|  | } |