Checked Matrix::collect() with different timing metric, and memcpy has approx 25% speedup over original version. Matrix_range is slower or the same speed as the original version.
parent
977a97eacf
commit
6bc0462669
|
@ -506,6 +506,16 @@ Matrix collect(const std::vector<const Matrix *>& matrices, size_t m, size_t n)
|
||||||
dimA2 += M->size2();
|
dimA2 += M->size2();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// original version
|
||||||
|
// Matrix A(dimA1, dimA2);
|
||||||
|
// size_t hindex = 0;
|
||||||
|
// BOOST_FOREACH(const Matrix* M, matrices) {
|
||||||
|
// for(size_t d1 = 0; d1 < M->size1(); d1++)
|
||||||
|
// for(size_t d2 = 0; d2 < M->size2(); d2++)
|
||||||
|
// A(d1, d2+hindex) = (*M)(d1, d2);
|
||||||
|
// hindex += M->size2();
|
||||||
|
// }
|
||||||
|
|
||||||
// matrix_range version
|
// matrix_range version
|
||||||
// Result: slower
|
// Result: slower
|
||||||
// Matrix A(dimA1, dimA2);
|
// Matrix A(dimA1, dimA2);
|
||||||
|
|
|
@ -14,9 +14,14 @@ using namespace gtsam;
|
||||||
/*
|
/*
|
||||||
* Results:
|
* Results:
|
||||||
* Alex's Machine:
|
* Alex's Machine:
|
||||||
|
* (using p = 100000 m = 10 n = 12 reps = 50) - Average times
|
||||||
* - (1st pass of simple changes) no pass: 0.184 sec , pass: 0.181 sec
|
* - (1st pass of simple changes) no pass: 0.184 sec , pass: 0.181 sec
|
||||||
* - (1st rev memcpy) no pass: 0.181 sec , pass: 0.180 sec
|
* - (1st rev memcpy) no pass: 0.181 sec , pass: 0.180 sec
|
||||||
* - (1st rev matrix_range) no pass: 0.186 sec , pass: 0.184 sec
|
* - (1st rev matrix_range) no pass: 0.186 sec , pass: 0.184 sec
|
||||||
|
* (using p = 10 m = 10 n = 12 reps = 10000000)
|
||||||
|
* - (matrix_range version) no pass: 24.21 sec , pass: 23.97 sec
|
||||||
|
* - (memcpy version) no pass: 18.96 sec , pass: 18.39 sec
|
||||||
|
* - (original version) no pass: 23.45 sec , pass: 22.80 sec
|
||||||
*/
|
*/
|
||||||
double timeCollect(size_t p, size_t m, size_t n, bool passDims, size_t reps) {
|
double timeCollect(size_t p, size_t m, size_t n, bool passDims, size_t reps) {
|
||||||
// create a large number of matrices
|
// create a large number of matrices
|
||||||
|
@ -52,7 +57,8 @@ double timeCollect(size_t p, size_t m, size_t n, bool passDims, size_t reps) {
|
||||||
delete matrices[i];
|
delete matrices[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
return elapsed/reps;
|
return elapsed;
|
||||||
|
//return elapsed/reps;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -119,7 +125,8 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
// Time collect()
|
// Time collect()
|
||||||
cout << "Starting Matrix::collect() Timing" << endl;
|
cout << "Starting Matrix::collect() Timing" << endl;
|
||||||
size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50;
|
//size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50;
|
||||||
|
size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000;
|
||||||
double collect_time1 = timeCollect(p, m, n, false, reps);
|
double collect_time1 = timeCollect(p, m, n, false, reps);
|
||||||
double collect_time2 = timeCollect(p, m, n, true, reps);
|
double collect_time2 = timeCollect(p, m, n, true, reps);
|
||||||
cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl;
|
cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl;
|
||||||
|
|
Loading…
Reference in New Issue