From 758c4b061d695fabe931e0df8aa5c6c1a6ec082e Mon Sep 17 00:00:00 2001 From: jingwuOUO Date: Wed, 30 Sep 2020 11:55:18 -0400 Subject: [PATCH] Refactor power and accelerated method --- gtsam/sfm/PowerMethod.h | 420 ++++++++++------------------ gtsam/sfm/ShonanAveraging.cpp | 16 +- gtsam/sfm/tests/testPowerMethod.cpp | 71 +++-- 3 files changed, 206 insertions(+), 301 deletions(-) diff --git a/gtsam/sfm/PowerMethod.h b/gtsam/sfm/PowerMethod.h index b8471e8e2..5fa355aaa 100644 --- a/gtsam/sfm/PowerMethod.h +++ b/gtsam/sfm/PowerMethod.h @@ -13,7 +13,8 @@ * @file PowerMethod.h * @date Sept 2020 * @author Jing Wu - * @brief accelerated power method for fast eigenvalue and eigenvector computation + * @brief accelerated power method for fast eigenvalue and eigenvector + * computation */ #pragma once @@ -24,15 +25,15 @@ #include #include +#include +#include +#include #include +#include #include #include #include #include -#include -#include -#include -#include namespace gtsam { @@ -41,9 +42,11 @@ using Sparse = Eigen::SparseMatrix; /* ************************************************************************* */ /// MINIMUM EIGENVALUE COMPUTATIONS -struct PowerFunctor { +// Template argument Operator just needs multiplication operator +template +class PowerMethod { /** - * \brief Computer i-th Eigenpair with power method + * \brief Compute maximum Eigenpair with power method * * References : * 1) Rosen, D. and Carlone, L., 2017, September. Computational @@ -52,144 +55,180 @@ struct PowerFunctor { * 2) Yulun Tian and Kasra Khosoussi and David M. Rosen and Jonathan P. How, * 2020, Aug, Distributed Certifiably Correct Pose-Graph Optimization, Arxiv * 3) C. de Sa, B. He, I. Mitliagkas, C. Ré, and P. Xu, “Accelerated - * stochastic power iteration,” in Proc. Mach. Learn. Res., no. 84, 2018, pp. 58–67 + * stochastic power iteration,” in Proc. Mach. Learn. Res., no. 84, 2018, pp. + * 58–67 * * It performs the following iteration: \f$ x_{k+1} = A * x_k + \beta * - * x_{k-1} \f$ where A is the certificate matrix, x is the ritz vector + * x_{k-1} \f$ where A is the certificate matrix, x is the Ritz vector * */ - + public: // Const reference to an externally-held matrix whose minimum-eigenvalue we // want to compute - const Sparse &A_; + const Operator &A_; - const Matrix &S_; + const int dim_; // dimension of Matrix A - const int m_n_; // dimension of Matrix A - const int m_nev_; // number of eigenvalues required + size_t nrIterations_; // number of iterations - // flag for running power method or accelerated power method. If false, the former, vice versa. - bool accelerated_; + private: + double ritzValues_; // all Ritz eigenvalues + Vector ritzVectors_; // all Ritz eigenvectors - // a Polyak momentum term - double beta_; + public: + // Constructor + explicit PowerMethod(const Operator &A, const Vector &initial) + : A_(A), dim_(A.rows()), nrIterations_(0) { + Vector x0; + x0 = initial.isZero(0) ? Vector::Random(dim_) : initial; + x0.normalize(); - // const int m_ncv_; // dimention of orthonormal basis subspace - size_t m_niter_; // number of iterations + // initialize Ritz eigen values + ritzValues_ = 0.0; -private: - Vector ritz_val_; // all ritz eigenvalues - Matrix ritz_vectors_; // all ritz eigenvectors - Vector ritz_conv_; // store whether the ritz eigenpair converged - Vector sorted_ritz_val_; // sorted converged eigenvalue - Matrix sorted_ritz_vectors_; // sorted converged eigenvectors + // initialize Ritz eigen vectors + ritzVectors_.resize(dim_, 1); + ritzVectors_.setZero(); -public: - // Constructor - explicit PowerFunctor(const Sparse& A, const Matrix& S, int nev, int ncv, - bool accelerated = false, double beta = 0) - : A_(A), - S_(S), - m_n_(A.rows()), - m_nev_(nev), - // m_ncv_(ncv > m_n_ ? m_n_ : ncv), - accelerated_(accelerated), - beta_(beta), - m_niter_(0) { - // Do nothing - } - - int rows() const { return A_.rows(); } - int cols() const { return A_.cols(); } - - // Matrix-vector multiplication operation - Vector perform_op(const Vector& x1, const Vector& x0) const { - // Do the multiplication - Vector x2 = A_ * x1 - beta_ * x0; - x2.normalize(); - return x2; + ritzVectors_.col(0) = update(x0); + perturb(); } - Vector perform_op(const Vector& x1, const Vector& x0, const double beta) const { - Vector x2 = A_ * x1 - beta * x0; - x2.normalize(); - return x2; + Vector update(const Vector &x) const { + Vector y = A_ * x; + y.normalize(); + return y; } - Vector perform_op(const Vector& x1) const { - Vector x2 = A_ * x1; - x2.normalize(); - return x2; - } + Vector update() const { return update(ritzVectors_); } - Vector perform_op(int i) const { - if (accelerated_) { - Vector x1 = ritz_vectors_.col(i-1); - Vector x2 = ritz_vectors_.col(i-2); - return perform_op(x1, x2); - } else - return perform_op(ritz_vectors_.col(i-1)); - } + void updateRitz(const Vector &ritz) { ritzVectors_ = ritz; } - long next_long_rand(long seed) { - const unsigned int m_a = 16807; - const unsigned long m_max = 2147483647L; - unsigned long lo, hi; + Vector getRitz() { return ritzVectors_; } - lo = m_a * (long)(seed & 0xFFFF); - hi = m_a * (long)((unsigned long)seed >> 16); - lo += (hi & 0x7FFF) << 16; - if (lo > m_max) { - lo &= m_max; - ++lo; + void perturb() { + // generate a 0.03*||x_0||_2 as stated in David's paper + unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); + std::mt19937 generator(seed); + std::uniform_real_distribution uniform01(0.0, 1.0); + + int n = dim_; + Vector disturb; + disturb.resize(n); + disturb.setZero(); + for (int i = 0; i < n; ++i) { + disturb(i) = uniform01(generator); } - lo += hi >> 15; - if (lo > m_max) { - lo &= m_max; - ++lo; - } - return (long)lo; + disturb.normalize(); + + Vector x0 = ritzVectors_; + double magnitude = x0.norm(); + ritzVectors_ = x0 + 0.03 * magnitude * disturb; } - Vector random_vec(const int len) { - Vector res(len); - const unsigned long m_max = 2147483647L; - for (int i = 0; i < len; i++) { - long m_rand = next_long_rand(m_rand); - res[i] = double(m_rand) / double(m_max) - double(0.5); + // Perform power iteration on a single Ritz value + // Updates ritzValues_ + bool iterateOne(double tol) { + const Vector x = ritzVectors_; + double theta = x.transpose() * A_ * x; + + // store the Ritz eigen value + ritzValues_ = theta; + + const Vector diff = A_ * x - theta * x; + double error = diff.norm(); + return error < tol; + } + + size_t nrIterations() { return nrIterations_; } + + int compute(int maxit, double tol) { + // Starting + int nrConverged = 0; + + for (int i = 0; i < maxit; i++) { + nrIterations_ += 1; + ritzVectors_ = update(); + nrConverged = iterateOne(tol); + if (nrConverged) break; } - res.normalize(); - return res; + + return std::min(1, nrConverged); + } + + double eigenvalues() { return ritzValues_; } + + Vector eigenvectors() { return ritzVectors_; } +}; + +template +class AcceleratedPowerMethod : public PowerMethod { + double beta_ = 0; // a Polyak momentum term + + Vector previousVector_; // store previous vector + + public: + // Constructor + explicit AcceleratedPowerMethod(const Operator &A, const Vector &initial) + : PowerMethod(A, initial) { + Vector x0 = initial; + // initialize ritz vector + x0 = x0.isZero(0) ? Vector::Random(PowerMethod::dim_) : x0; + Vector x00 = Vector::Random(PowerMethod::dim_); + x0.normalize(); + x00.normalize(); + + // initialize Ritz eigen vector and previous vector + previousVector_ = update(x0, x00, beta_); + this->updateRitz(update(previousVector_, x0, beta_)); + this->perturb(); + + // set beta + Vector init_resid = this->getRitz(); + const double up = init_resid.transpose() * this->A_ * init_resid; + const double down = init_resid.transpose().dot(init_resid); + const double mu = up / down; + beta_ = mu * mu / 4; + setBeta(); + } + + Vector update(const Vector &x1, const Vector &x0, const double beta) const { + Vector y = this->A_ * x1 - beta * x0; + y.normalize(); + return y; + } + + Vector update() const { + Vector y = update(this->ritzVectors_, previousVector_, beta_); + previousVector_ = this->ritzVectors_; + return y; } /// Tuning the momentum beta using the Best Heavy Ball algorithm in Ref(3) void setBeta() { - if (m_n_ < 10) return; + if (PowerMethod::dim_ < 10) return; double maxBeta = beta_; size_t maxIndex; - std::vector betas = {2/3*maxBeta, 0.99*maxBeta, maxBeta, 1.01*maxBeta, 1.5*maxBeta}; + std::vector betas = {2 / 3 * maxBeta, 0.99 * maxBeta, maxBeta, + 1.01 * maxBeta, 1.5 * maxBeta}; - Matrix tmp_ritz_vectors; - tmp_ritz_vectors.resize(m_n_, 10); - tmp_ritz_vectors.setZero(); + Matrix tmpRitzVectors; + tmpRitzVectors.resize(PowerMethod::dim_, 10); + tmpRitzVectors.setZero(); for (size_t i = 0; i < 10; i++) { for (size_t k = 0; k < betas.size(); ++k) { for (size_t j = 1; j < 10; j++) { - // double rayleighQuotient; - if (j <2 ) { - Vector x0 = random_vec(m_n_); - Vector x00 = random_vec(m_n_); - tmp_ritz_vectors.col(0) = perform_op(x0, x00, betas[k]); - tmp_ritz_vectors.col(1) = - perform_op(tmp_ritz_vectors.col(0), x0, betas[k]); + if (j < 2) { + Vector x0 = Vector::Random(PowerMethod::dim_); + Vector x00 = Vector::Random(PowerMethod::dim_); + tmpRitzVectors.col(0) = update(x0, x00, betas[k]); + tmpRitzVectors.col(1) = update(tmpRitzVectors.col(0), x0, betas[k]); + } else { + tmpRitzVectors.col(j) = update(tmpRitzVectors.col(j - 1), + tmpRitzVectors.col(j - 2), betas[k]); } - else { - tmp_ritz_vectors.col(j) = - perform_op(tmp_ritz_vectors.col(j - 1), - tmp_ritz_vectors.col(j - 2), betas[k]); - } - const Vector x = tmp_ritz_vectors.col(j); - const double up = x.transpose() * A_ * x; + const Vector x = tmpRitzVectors.col(j); + const double up = x.transpose() * this->A_ * x; const double down = x.transpose().dot(x); const double mu = up / down; if (mu * mu / 4 > maxBeta) { @@ -203,165 +242,6 @@ public: beta_ = betas[maxIndex]; } - - void perturb(int i) { - // generate a 0.03*||x_0||_2 as stated in David's paper - unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); - std::mt19937 generator (seed); - std::uniform_real_distribution uniform01(0.0, 1.0); - - int n = m_n_; - Vector disturb; - disturb.resize(n); - disturb.setZero(); - for (int i =0; i(); - if (error < tol) ritz_conv_(i) = 1; - return error < tol; - } - - int num_converged(double tol) { - int num_converge = 0; - for (int i=0; i0 && i> pairs; - for(int i=0; i& left, const std::pair& right) { - return left.first < right.first; - }); - - // initialzie sorted ritz eigenvalues and eigenvectors - size_t num_converged = pairs.size(); - sorted_ritz_val_.resize(num_converged); - sorted_ritz_val_.setZero(); - sorted_ritz_vectors_.resize(m_n_, num_converged); - sorted_ritz_vectors_.setZero(); - - // fill sorted ritz eigenvalues and eigenvectors with sorted index - for(size_t j=0; j= m_nev_) break; - else reset(); - } - - // sort the result - sort_eigenpair(); - - return std::min(m_nev_, nconv); - } - - Vector eigenvalues() { - return sorted_ritz_val_; - } - - Matrix eigenvectors() { - return sorted_ritz_vectors_; - } - }; -} // namespace gtsam +} // namespace gtsam diff --git a/gtsam/sfm/ShonanAveraging.cpp b/gtsam/sfm/ShonanAveraging.cpp index f287437d4..9ebee4f9f 100644 --- a/gtsam/sfm/ShonanAveraging.cpp +++ b/gtsam/sfm/ShonanAveraging.cpp @@ -480,8 +480,7 @@ static bool PowerMinimumEigenValue( Eigen::Index numLanczosVectors = 20) { // a. Compute dominant eigenpair of S using power method - PowerFunctor lmOperator(A, S, 1, A.rows()); - lmOperator.init(); + PowerMethod lmOperator(A, S.row(0)); const int lmConverged = lmOperator.compute( maxIterations, 1e-5); @@ -489,34 +488,33 @@ static bool PowerMinimumEigenValue( // Check convergence and bail out if necessary if (lmConverged != 1) return false; - const double lmEigenValue = lmOperator.eigenvalues()(0); + const double lmEigenValue = lmOperator.eigenvalues(); if (lmEigenValue < 0) { // The largest-magnitude eigenvalue is negative, and therefore also the // minimum eigenvalue, so just return this solution *minEigenValue = lmEigenValue; if (minEigenVector) { - *minEigenVector = lmOperator.eigenvectors().col(0); + *minEigenVector = lmOperator.eigenvectors(); minEigenVector->normalize(); // Ensure that this is a unit vector } return true; } Sparse C = lmEigenValue * Matrix::Identity(A.rows(), A.cols()) - A; - PowerFunctor minShiftedOperator(C, S, 1, C.rows(), true); - minShiftedOperator.init(); + AcceleratedPowerMethod minShiftedOperator(C, S.row(0)); const int minConverged = minShiftedOperator.compute( maxIterations, minEigenvalueNonnegativityTolerance / lmEigenValue); if (minConverged != 1) return false; - *minEigenValue = lmEigenValue - minShiftedOperator.eigenvalues()(0); + *minEigenValue = lmEigenValue - minShiftedOperator.eigenvalues(); if (minEigenVector) { - *minEigenVector = minShiftedOperator.eigenvectors().col(0); + *minEigenVector = minShiftedOperator.eigenvectors(); minEigenVector->normalize(); // Ensure that this is a unit vector } - if (numIterations) *numIterations = minShiftedOperator.num_iterations(); + if (numIterations) *numIterations = minShiftedOperator.nrIterations(); return true; } diff --git a/gtsam/sfm/tests/testPowerMethod.cpp b/gtsam/sfm/tests/testPowerMethod.cpp index 4547c91b9..d02906980 100644 --- a/gtsam/sfm/tests/testPowerMethod.cpp +++ b/gtsam/sfm/tests/testPowerMethod.cpp @@ -18,38 +18,34 @@ * @brief Check eigenvalue and eigenvector computed by power method */ +#include +#include +#include +#include #include -#include #include #include #include +#include + #include #include using namespace std; using namespace gtsam; - -ShonanAveraging3 fromExampleName( - const std::string &name, - ShonanAveraging3::Parameters parameters = ShonanAveraging3::Parameters()) { - string g2oFile = findExampleDataFile(name); - return ShonanAveraging3(g2oFile, parameters); -} - -static const ShonanAveraging3 kShonan = fromExampleName("toyExample.g2o"); +using symbol_shorthand::X; /* ************************************************************************* */ TEST(PowerMethod, powerIteration) { - // test power accelerated iteration - gtsam::Sparse A(6, 6); + // test power iteration, beta is set to 0 + Sparse A(6, 6); A.coeffRef(0, 0) = 6; Matrix S = Matrix66::Zero(); - PowerFunctor apf(A, S, 1, A.rows(), true); - apf.init(); + PowerMethod apf(A, S.row(0)); apf.compute(20, 1e-4); - EXPECT_LONGS_EQUAL(6, apf.eigenvectors().cols()); + EXPECT_LONGS_EQUAL(1, apf.eigenvectors().cols()); EXPECT_LONGS_EQUAL(6, apf.eigenvectors().rows()); const Vector6 x1 = (Vector(6) << 1.0, 0.0, 0.0, 0.0, 0.0, 0.0).finished(); @@ -58,21 +54,52 @@ TEST(PowerMethod, powerIteration) { EXPECT(assert_equal(x1, actual0)); const double ev1 = 6.0; - EXPECT_DOUBLES_EQUAL(ev1, apf.eigenvalues()(0), 1e-5); + EXPECT_DOUBLES_EQUAL(ev1, apf.eigenvalues(), 1e-5); - // test power iteration, beta is set to 0 - PowerFunctor pf(A, S, 1, A.rows()); - pf.init(); + // test power accelerated iteration + AcceleratedPowerMethod pf(A, S.row(0)); pf.compute(20, 1e-4); - // for power method, only 5 ritz vectors converge with 20 iteration - EXPECT_LONGS_EQUAL(5, pf.eigenvectors().cols()); + // for power method, only 5 ritz vectors converge with 20 iterations + EXPECT_LONGS_EQUAL(1, pf.eigenvectors().cols()); EXPECT_LONGS_EQUAL(6, pf.eigenvectors().rows()); Vector6 actual1 = apf.eigenvectors().col(0); actual1(0) = abs(actual1(0)); EXPECT(assert_equal(x1, actual1)); - EXPECT_DOUBLES_EQUAL(ev1, pf.eigenvalues()(0), 1e-5); + EXPECT_DOUBLES_EQUAL(ev1, pf.eigenvalues(), 1e-5); +} + +/* ************************************************************************* */ +TEST(PowerMethod, useFactorGraph) { + // Let's make a scalar synchronization graph with 4 nodes + GaussianFactorGraph fg; + auto model = noiseModel::Unit::Create(1); + for (size_t j = 0; j < 3; j++) { + fg.add(X(j), -I_1x1, X(j + 1), I_1x1, Vector1::Zero(), model); + } + fg.add(X(3), -I_1x1, X(0), I_1x1, Vector1::Zero(), model); // extra row + + // Get eigenvalues and eigenvectors with Eigen + auto L = fg.hessian(); + cout << L.first << endl; + Eigen::EigenSolver solver(L.first); + cout << solver.eigenvalues() << endl; + cout << solver.eigenvectors() << endl; + + // Check that we get zero eigenvalue and "constant" eigenvector + EXPECT_DOUBLES_EQUAL(0.0, solver.eigenvalues()[0].real(), 1e-9); + auto v0 = solver.eigenvectors().col(0); + for (size_t j = 0; j < 3; j++) + EXPECT_DOUBLES_EQUAL(-0.5, v0[j].real(), 1e-9); + + // test power iteration, beta is set to 0 + Matrix S = Matrix44::Zero(); + // PowerMethod pf(L.first, S.row(0)); + AcceleratedPowerMethod pf(L.first, S.row(0)); + pf.compute(20, 1e-4); + cout << pf.eigenvalues() << endl; + cout << pf.eigenvectors() << endl; } /* ************************************************************************* */