Refactor power and accelerated method

release/4.3a0
jingwuOUO 2020-09-30 11:55:18 -04:00
parent d6e2546cf5
commit 758c4b061d
3 changed files with 206 additions and 301 deletions

View File

@ -13,7 +13,8 @@
* @file PowerMethod.h
* @date Sept 2020
* @author Jing Wu
* @brief accelerated power method for fast eigenvalue and eigenvector computation
* @brief accelerated power method for fast eigenvalue and eigenvector
* computation
*/
#pragma once
@ -24,15 +25,15 @@
#include <Eigen/Core>
#include <Eigen/Sparse>
#include <algorithm>
#include <chrono>
#include <cmath>
#include <map>
#include <random>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>
#include <algorithm>
#include <cmath>
#include <chrono>
#include <random>
namespace gtsam {
@ -41,9 +42,11 @@ using Sparse = Eigen::SparseMatrix<double>;
/* ************************************************************************* */
/// MINIMUM EIGENVALUE COMPUTATIONS
struct PowerFunctor {
// Template argument Operator just needs multiplication operator
template <class Operator>
class PowerMethod {
/**
* \brief Computer i-th Eigenpair with power method
* \brief Compute maximum Eigenpair with power method
*
* References :
* 1) Rosen, D. and Carlone, L., 2017, September. Computational
@ -52,144 +55,180 @@ struct PowerFunctor {
* 2) Yulun Tian and Kasra Khosoussi and David M. Rosen and Jonathan P. How,
* 2020, Aug, Distributed Certifiably Correct Pose-Graph Optimization, Arxiv
* 3) C. de Sa, B. He, I. Mitliagkas, C. Ré, and P. Xu, Accelerated
* stochastic power iteration, in Proc. Mach. Learn. Res., no. 84, 2018, pp. 5867
* stochastic power iteration, in Proc. Mach. Learn. Res., no. 84, 2018, pp.
* 5867
*
* It performs the following iteration: \f$ x_{k+1} = A * x_k + \beta *
* x_{k-1} \f$ where A is the certificate matrix, x is the ritz vector
* x_{k-1} \f$ where A is the certificate matrix, x is the Ritz vector
*
*/
public:
// Const reference to an externally-held matrix whose minimum-eigenvalue we
// want to compute
const Sparse &A_;
const Operator &A_;
const Matrix &S_;
const int dim_; // dimension of Matrix A
const int m_n_; // dimension of Matrix A
const int m_nev_; // number of eigenvalues required
size_t nrIterations_; // number of iterations
// flag for running power method or accelerated power method. If false, the former, vice versa.
bool accelerated_;
private:
double ritzValues_; // all Ritz eigenvalues
Vector ritzVectors_; // all Ritz eigenvectors
// a Polyak momentum term
double beta_;
public:
// Constructor
explicit PowerMethod(const Operator &A, const Vector &initial)
: A_(A), dim_(A.rows()), nrIterations_(0) {
Vector x0;
x0 = initial.isZero(0) ? Vector::Random(dim_) : initial;
x0.normalize();
// const int m_ncv_; // dimention of orthonormal basis subspace
size_t m_niter_; // number of iterations
// initialize Ritz eigen values
ritzValues_ = 0.0;
private:
Vector ritz_val_; // all ritz eigenvalues
Matrix ritz_vectors_; // all ritz eigenvectors
Vector ritz_conv_; // store whether the ritz eigenpair converged
Vector sorted_ritz_val_; // sorted converged eigenvalue
Matrix sorted_ritz_vectors_; // sorted converged eigenvectors
// initialize Ritz eigen vectors
ritzVectors_.resize(dim_, 1);
ritzVectors_.setZero();
public:
// Constructor
explicit PowerFunctor(const Sparse& A, const Matrix& S, int nev, int ncv,
bool accelerated = false, double beta = 0)
: A_(A),
S_(S),
m_n_(A.rows()),
m_nev_(nev),
// m_ncv_(ncv > m_n_ ? m_n_ : ncv),
accelerated_(accelerated),
beta_(beta),
m_niter_(0) {
// Do nothing
}
int rows() const { return A_.rows(); }
int cols() const { return A_.cols(); }
// Matrix-vector multiplication operation
Vector perform_op(const Vector& x1, const Vector& x0) const {
// Do the multiplication
Vector x2 = A_ * x1 - beta_ * x0;
x2.normalize();
return x2;
ritzVectors_.col(0) = update(x0);
perturb();
}
Vector perform_op(const Vector& x1, const Vector& x0, const double beta) const {
Vector x2 = A_ * x1 - beta * x0;
x2.normalize();
return x2;
Vector update(const Vector &x) const {
Vector y = A_ * x;
y.normalize();
return y;
}
Vector perform_op(const Vector& x1) const {
Vector x2 = A_ * x1;
x2.normalize();
return x2;
}
Vector update() const { return update(ritzVectors_); }
Vector perform_op(int i) const {
if (accelerated_) {
Vector x1 = ritz_vectors_.col(i-1);
Vector x2 = ritz_vectors_.col(i-2);
return perform_op(x1, x2);
} else
return perform_op(ritz_vectors_.col(i-1));
}
void updateRitz(const Vector &ritz) { ritzVectors_ = ritz; }
long next_long_rand(long seed) {
const unsigned int m_a = 16807;
const unsigned long m_max = 2147483647L;
unsigned long lo, hi;
Vector getRitz() { return ritzVectors_; }
lo = m_a * (long)(seed & 0xFFFF);
hi = m_a * (long)((unsigned long)seed >> 16);
lo += (hi & 0x7FFF) << 16;
if (lo > m_max) {
lo &= m_max;
++lo;
void perturb() {
// generate a 0.03*||x_0||_2 as stated in David's paper
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
std::mt19937 generator(seed);
std::uniform_real_distribution<double> uniform01(0.0, 1.0);
int n = dim_;
Vector disturb;
disturb.resize(n);
disturb.setZero();
for (int i = 0; i < n; ++i) {
disturb(i) = uniform01(generator);
}
lo += hi >> 15;
if (lo > m_max) {
lo &= m_max;
++lo;
}
return (long)lo;
disturb.normalize();
Vector x0 = ritzVectors_;
double magnitude = x0.norm();
ritzVectors_ = x0 + 0.03 * magnitude * disturb;
}
Vector random_vec(const int len) {
Vector res(len);
const unsigned long m_max = 2147483647L;
for (int i = 0; i < len; i++) {
long m_rand = next_long_rand(m_rand);
res[i] = double(m_rand) / double(m_max) - double(0.5);
// Perform power iteration on a single Ritz value
// Updates ritzValues_
bool iterateOne(double tol) {
const Vector x = ritzVectors_;
double theta = x.transpose() * A_ * x;
// store the Ritz eigen value
ritzValues_ = theta;
const Vector diff = A_ * x - theta * x;
double error = diff.norm();
return error < tol;
}
size_t nrIterations() { return nrIterations_; }
int compute(int maxit, double tol) {
// Starting
int nrConverged = 0;
for (int i = 0; i < maxit; i++) {
nrIterations_ += 1;
ritzVectors_ = update();
nrConverged = iterateOne(tol);
if (nrConverged) break;
}
res.normalize();
return res;
return std::min(1, nrConverged);
}
double eigenvalues() { return ritzValues_; }
Vector eigenvectors() { return ritzVectors_; }
};
template <class Operator>
class AcceleratedPowerMethod : public PowerMethod<Operator> {
double beta_ = 0; // a Polyak momentum term
Vector previousVector_; // store previous vector
public:
// Constructor
explicit AcceleratedPowerMethod(const Operator &A, const Vector &initial)
: PowerMethod<Operator>(A, initial) {
Vector x0 = initial;
// initialize ritz vector
x0 = x0.isZero(0) ? Vector::Random(PowerMethod<Operator>::dim_) : x0;
Vector x00 = Vector::Random(PowerMethod<Operator>::dim_);
x0.normalize();
x00.normalize();
// initialize Ritz eigen vector and previous vector
previousVector_ = update(x0, x00, beta_);
this->updateRitz(update(previousVector_, x0, beta_));
this->perturb();
// set beta
Vector init_resid = this->getRitz();
const double up = init_resid.transpose() * this->A_ * init_resid;
const double down = init_resid.transpose().dot(init_resid);
const double mu = up / down;
beta_ = mu * mu / 4;
setBeta();
}
Vector update(const Vector &x1, const Vector &x0, const double beta) const {
Vector y = this->A_ * x1 - beta * x0;
y.normalize();
return y;
}
Vector update() const {
Vector y = update(this->ritzVectors_, previousVector_, beta_);
previousVector_ = this->ritzVectors_;
return y;
}
/// Tuning the momentum beta using the Best Heavy Ball algorithm in Ref(3)
void setBeta() {
if (m_n_ < 10) return;
if (PowerMethod<Operator>::dim_ < 10) return;
double maxBeta = beta_;
size_t maxIndex;
std::vector<double> betas = {2/3*maxBeta, 0.99*maxBeta, maxBeta, 1.01*maxBeta, 1.5*maxBeta};
std::vector<double> betas = {2 / 3 * maxBeta, 0.99 * maxBeta, maxBeta,
1.01 * maxBeta, 1.5 * maxBeta};
Matrix tmp_ritz_vectors;
tmp_ritz_vectors.resize(m_n_, 10);
tmp_ritz_vectors.setZero();
Matrix tmpRitzVectors;
tmpRitzVectors.resize(PowerMethod<Operator>::dim_, 10);
tmpRitzVectors.setZero();
for (size_t i = 0; i < 10; i++) {
for (size_t k = 0; k < betas.size(); ++k) {
for (size_t j = 1; j < 10; j++) {
// double rayleighQuotient;
if (j <2 ) {
Vector x0 = random_vec(m_n_);
Vector x00 = random_vec(m_n_);
tmp_ritz_vectors.col(0) = perform_op(x0, x00, betas[k]);
tmp_ritz_vectors.col(1) =
perform_op(tmp_ritz_vectors.col(0), x0, betas[k]);
if (j < 2) {
Vector x0 = Vector::Random(PowerMethod<Operator>::dim_);
Vector x00 = Vector::Random(PowerMethod<Operator>::dim_);
tmpRitzVectors.col(0) = update(x0, x00, betas[k]);
tmpRitzVectors.col(1) = update(tmpRitzVectors.col(0), x0, betas[k]);
} else {
tmpRitzVectors.col(j) = update(tmpRitzVectors.col(j - 1),
tmpRitzVectors.col(j - 2), betas[k]);
}
else {
tmp_ritz_vectors.col(j) =
perform_op(tmp_ritz_vectors.col(j - 1),
tmp_ritz_vectors.col(j - 2), betas[k]);
}
const Vector x = tmp_ritz_vectors.col(j);
const double up = x.transpose() * A_ * x;
const Vector x = tmpRitzVectors.col(j);
const double up = x.transpose() * this->A_ * x;
const double down = x.transpose().dot(x);
const double mu = up / down;
if (mu * mu / 4 > maxBeta) {
@ -203,165 +242,6 @@ public:
beta_ = betas[maxIndex];
}
void perturb(int i) {
// generate a 0.03*||x_0||_2 as stated in David's paper
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
std::mt19937 generator (seed);
std::uniform_real_distribution<double> uniform01(0.0, 1.0);
int n = m_n_;
Vector disturb;
disturb.resize(n);
disturb.setZero();
for (int i =0; i<n; ++i) {
disturb(i) = uniform01(generator);
}
disturb.normalize();
Vector x0 = ritz_vectors_.col(i);
double magnitude = x0.norm();
ritz_vectors_.col(i) = x0 + 0.03 * magnitude * disturb;
}
void init(const Vector x0, const Vector x00) {
// initialzie ritz eigen values
ritz_val_.resize(m_n_);
ritz_val_.setZero();
// initialzie the ritz converged vector
ritz_conv_.resize(m_n_);
ritz_conv_.setZero();
// initialzie ritz eigen vectors
ritz_vectors_.resize(m_n_, m_n_);
ritz_vectors_.setZero();
if (accelerated_){
ritz_vectors_.col(0) = perform_op(x0, x00);
ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), x0);
} else {
ritz_vectors_.col(0) = perform_op(x0);
perturb(0);
}
// setting beta
if (accelerated_) {
Vector init_resid = ritz_vectors_.col(0);
const double up = init_resid.transpose() * A_ * init_resid;
const double down = init_resid.transpose().dot(init_resid);
const double mu = up/down;
beta_ = mu*mu/4;
setBeta();
}
}
void init() {
Vector x0;
Vector x00;
if (!S_.isZero(0)) {
x0 = S_.row(1);
x00 = S_.row(0);
} else {
x0 = random_vec(m_n_);
x00 = random_vec(m_n_);
}
init(x0, x00);
}
bool converged(double tol, int i) {
Vector x = ritz_vectors_.col(i);
double theta = x.transpose() * A_ * x;
// store the ritz eigen value
ritz_val_(i) = theta;
// update beta
if (accelerated_) beta_ = std::max(beta_, theta * theta / 4);
Vector diff = A_ * x - theta * x;
double error = diff.lpNorm<1>();
if (error < tol) ritz_conv_(i) = 1;
return error < tol;
}
int num_converged(double tol) {
int num_converge = 0;
for (int i=0; i<ritz_vectors_.cols(); i++) {
if (converged(tol, i)) {
num_converge += 1;
}
if (!accelerated_ && i<ritz_vectors_.cols()-1) {
ritz_vectors_.col(i+1) = perform_op(i+1);
} else if (accelerated_ && i>0 && i<ritz_vectors_.cols()-1) {
ritz_vectors_.col(i+1) = perform_op(i+1);
}
}
return num_converge;
}
size_t num_iterations() {
return m_niter_;
}
void sort_eigenpair() {
std::vector<std::pair<double, int>> pairs;
for(int i=0; i<ritz_conv_.size(); ++i) {
if (ritz_conv_(i)) pairs.push_back({ritz_val_(i), i});
}
std::sort(pairs.begin(), pairs.end(), [](const std::pair<double, int>& left, const std::pair<double, int>& right) {
return left.first < right.first;
});
// initialzie sorted ritz eigenvalues and eigenvectors
size_t num_converged = pairs.size();
sorted_ritz_val_.resize(num_converged);
sorted_ritz_val_.setZero();
sorted_ritz_vectors_.resize(m_n_, num_converged);
sorted_ritz_vectors_.setZero();
// fill sorted ritz eigenvalues and eigenvectors with sorted index
for(size_t j=0; j<num_converged; ++j) {
sorted_ritz_val_(j) = pairs[j].first;
sorted_ritz_vectors_.col(j) = ritz_vectors_.col(pairs[j].second);
}
}
void reset() {
if (accelerated_) {
ritz_vectors_.col(0) = perform_op(ritz_vectors_.col(m_n_-1-1), ritz_vectors_.col(m_n_-1-2));
ritz_vectors_.col(1) = perform_op(ritz_vectors_.col(0), ritz_vectors_.col(m_n_-1-1));
} else {
ritz_vectors_.col(0) = perform_op(ritz_vectors_.col(m_n_-1));
}
}
int compute(int maxit, double tol) {
// Starting
int i = 0;
int nconv = 0;
for (; i < maxit; i++) {
m_niter_ +=1;
nconv = num_converged(tol);
if (nconv >= m_nev_) break;
else reset();
}
// sort the result
sort_eigenpair();
return std::min(m_nev_, nconv);
}
Vector eigenvalues() {
return sorted_ritz_val_;
}
Matrix eigenvectors() {
return sorted_ritz_vectors_;
}
};
} // namespace gtsam
} // namespace gtsam

View File

@ -480,8 +480,7 @@ static bool PowerMinimumEigenValue(
Eigen::Index numLanczosVectors = 20) {
// a. Compute dominant eigenpair of S using power method
PowerFunctor lmOperator(A, S, 1, A.rows());
lmOperator.init();
PowerMethod<Sparse> lmOperator(A, S.row(0));
const int lmConverged = lmOperator.compute(
maxIterations, 1e-5);
@ -489,34 +488,33 @@ static bool PowerMinimumEigenValue(
// Check convergence and bail out if necessary
if (lmConverged != 1) return false;
const double lmEigenValue = lmOperator.eigenvalues()(0);
const double lmEigenValue = lmOperator.eigenvalues();
if (lmEigenValue < 0) {
// The largest-magnitude eigenvalue is negative, and therefore also the
// minimum eigenvalue, so just return this solution
*minEigenValue = lmEigenValue;
if (minEigenVector) {
*minEigenVector = lmOperator.eigenvectors().col(0);
*minEigenVector = lmOperator.eigenvectors();
minEigenVector->normalize(); // Ensure that this is a unit vector
}
return true;
}
Sparse C = lmEigenValue * Matrix::Identity(A.rows(), A.cols()) - A;
PowerFunctor minShiftedOperator(C, S, 1, C.rows(), true);
minShiftedOperator.init();
AcceleratedPowerMethod<Sparse> minShiftedOperator(C, S.row(0));
const int minConverged = minShiftedOperator.compute(
maxIterations, minEigenvalueNonnegativityTolerance / lmEigenValue);
if (minConverged != 1) return false;
*minEigenValue = lmEigenValue - minShiftedOperator.eigenvalues()(0);
*minEigenValue = lmEigenValue - minShiftedOperator.eigenvalues();
if (minEigenVector) {
*minEigenVector = minShiftedOperator.eigenvectors().col(0);
*minEigenVector = minShiftedOperator.eigenvectors();
minEigenVector->normalize(); // Ensure that this is a unit vector
}
if (numIterations) *numIterations = minShiftedOperator.num_iterations();
if (numIterations) *numIterations = minShiftedOperator.nrIterations();
return true;
}

View File

@ -18,38 +18,34 @@
* @brief Check eigenvalue and eigenvector computed by power method
*/
#include <gtsam/base/Matrix.h>
#include <gtsam/base/VectorSpace.h>
#include <gtsam/inference/Symbol.h>
#include <gtsam/linear/GaussianFactorGraph.h>
#include <gtsam/sfm/PowerMethod.h>
#include <gtsam/sfm/ShonanAveraging.h>
#include <CppUnitLite/TestHarness.h>
#include <Eigen/Core>
#include <Eigen/Dense>
#include <Eigen/Eigenvalues>
#include <iostream>
#include <random>
using namespace std;
using namespace gtsam;
ShonanAveraging3 fromExampleName(
const std::string &name,
ShonanAveraging3::Parameters parameters = ShonanAveraging3::Parameters()) {
string g2oFile = findExampleDataFile(name);
return ShonanAveraging3(g2oFile, parameters);
}
static const ShonanAveraging3 kShonan = fromExampleName("toyExample.g2o");
using symbol_shorthand::X;
/* ************************************************************************* */
TEST(PowerMethod, powerIteration) {
// test power accelerated iteration
gtsam::Sparse A(6, 6);
// test power iteration, beta is set to 0
Sparse A(6, 6);
A.coeffRef(0, 0) = 6;
Matrix S = Matrix66::Zero();
PowerFunctor apf(A, S, 1, A.rows(), true);
apf.init();
PowerMethod<Sparse> apf(A, S.row(0));
apf.compute(20, 1e-4);
EXPECT_LONGS_EQUAL(6, apf.eigenvectors().cols());
EXPECT_LONGS_EQUAL(1, apf.eigenvectors().cols());
EXPECT_LONGS_EQUAL(6, apf.eigenvectors().rows());
const Vector6 x1 = (Vector(6) << 1.0, 0.0, 0.0, 0.0, 0.0, 0.0).finished();
@ -58,21 +54,52 @@ TEST(PowerMethod, powerIteration) {
EXPECT(assert_equal(x1, actual0));
const double ev1 = 6.0;
EXPECT_DOUBLES_EQUAL(ev1, apf.eigenvalues()(0), 1e-5);
EXPECT_DOUBLES_EQUAL(ev1, apf.eigenvalues(), 1e-5);
// test power iteration, beta is set to 0
PowerFunctor pf(A, S, 1, A.rows());
pf.init();
// test power accelerated iteration
AcceleratedPowerMethod<Sparse> pf(A, S.row(0));
pf.compute(20, 1e-4);
// for power method, only 5 ritz vectors converge with 20 iteration
EXPECT_LONGS_EQUAL(5, pf.eigenvectors().cols());
// for power method, only 5 ritz vectors converge with 20 iterations
EXPECT_LONGS_EQUAL(1, pf.eigenvectors().cols());
EXPECT_LONGS_EQUAL(6, pf.eigenvectors().rows());
Vector6 actual1 = apf.eigenvectors().col(0);
actual1(0) = abs(actual1(0));
EXPECT(assert_equal(x1, actual1));
EXPECT_DOUBLES_EQUAL(ev1, pf.eigenvalues()(0), 1e-5);
EXPECT_DOUBLES_EQUAL(ev1, pf.eigenvalues(), 1e-5);
}
/* ************************************************************************* */
TEST(PowerMethod, useFactorGraph) {
// Let's make a scalar synchronization graph with 4 nodes
GaussianFactorGraph fg;
auto model = noiseModel::Unit::Create(1);
for (size_t j = 0; j < 3; j++) {
fg.add(X(j), -I_1x1, X(j + 1), I_1x1, Vector1::Zero(), model);
}
fg.add(X(3), -I_1x1, X(0), I_1x1, Vector1::Zero(), model); // extra row
// Get eigenvalues and eigenvectors with Eigen
auto L = fg.hessian();
cout << L.first << endl;
Eigen::EigenSolver<Matrix> solver(L.first);
cout << solver.eigenvalues() << endl;
cout << solver.eigenvectors() << endl;
// Check that we get zero eigenvalue and "constant" eigenvector
EXPECT_DOUBLES_EQUAL(0.0, solver.eigenvalues()[0].real(), 1e-9);
auto v0 = solver.eigenvectors().col(0);
for (size_t j = 0; j < 3; j++)
EXPECT_DOUBLES_EQUAL(-0.5, v0[j].real(), 1e-9);
// test power iteration, beta is set to 0
Matrix S = Matrix44::Zero();
// PowerMethod<Matrix> pf(L.first, S.row(0));
AcceleratedPowerMethod<Matrix> pf(L.first, S.row(0));
pf.compute(20, 1e-4);
cout << pf.eigenvalues() << endl;
cout << pf.eigenvectors() << endl;
}
/* ************************************************************************* */