fix gradient and gradientAtZero for linear constrained Jacobian, optionally scaled by the dual variables

release/4.3a0
thduynguyen 2014-09-24 22:19:47 -04:00
parent 43a8de01c1
commit 11969b32f6
7 changed files with 53 additions and 29 deletions

View File

@ -128,7 +128,7 @@ namespace gtsam {
virtual void multiplyHessianAdd(double alpha, const double* x, double* y) const = 0; virtual void multiplyHessianAdd(double alpha, const double* x, double* y) const = 0;
/// A'*b for Jacobian, eta for Hessian /// A'*b for Jacobian, eta for Hessian
virtual VectorValues gradientAtZero(const boost::optional<Vector&> dual = boost::none) const = 0; virtual VectorValues gradientAtZero(const boost::optional<const VectorValues&> negDuals = boost::none) const = 0;
/// A'*b for Jacobian, eta for Hessian (raw memory version) /// A'*b for Jacobian, eta for Hessian (raw memory version)
virtual void gradientAtZero(double* d) const = 0; virtual void gradientAtZero(double* d) const = 0;

View File

@ -254,6 +254,7 @@ namespace gtsam {
map<Key,Matrix> GaussianFactorGraph::hessianBlockDiagonal() const { map<Key,Matrix> GaussianFactorGraph::hessianBlockDiagonal() const {
map<Key,Matrix> blocks; map<Key,Matrix> blocks;
BOOST_FOREACH(const sharedFactor& factor, *this) { BOOST_FOREACH(const sharedFactor& factor, *this) {
if (!factor) continue;
map<Key,Matrix> BD = factor->hessianBlockDiagonal(); map<Key,Matrix> BD = factor->hessianBlockDiagonal();
map<Key,Matrix>::const_iterator it = BD.begin(); map<Key,Matrix>::const_iterator it = BD.begin();
for(;it!=BD.end();it++) { for(;it!=BD.end();it++) {
@ -299,11 +300,12 @@ namespace gtsam {
} }
/* ************************************************************************* */ /* ************************************************************************* */
VectorValues GaussianFactorGraph::gradientAtZero() const { VectorValues GaussianFactorGraph::gradientAtZero(
const boost::optional<const VectorValues&> negDuals) const {
// Zero-out the gradient // Zero-out the gradient
VectorValues g; VectorValues g;
BOOST_FOREACH(const sharedFactor& factor, *this) { BOOST_FOREACH(const sharedFactor& factor, *this) {
VectorValues gi = factor->gradientAtZero(); VectorValues gi = factor->gradientAtZero(negDuals);
g.addInPlace_(gi); g.addInPlace_(gi);
} }
return g; return g;
@ -426,6 +428,7 @@ namespace gtsam {
const VectorValues& delta) const { const VectorValues& delta) const {
GaussianFactorGraph::shared_ptr dualGraph(new GaussianFactorGraph()); GaussianFactorGraph::shared_ptr dualGraph(new GaussianFactorGraph());
BOOST_FOREACH(const Key key, constrainedVariables) { BOOST_FOREACH(const Key key, constrainedVariables) {
// Each constrained key becomes a factor in the dual graph
dualGraph->push_back(createDualFactor(key, variableIndex, delta)); dualGraph->push_back(createDualFactor(key, variableIndex, delta));
} }
return dualGraph; return dualGraph;

View File

@ -262,11 +262,11 @@ namespace gtsam {
/** /**
* Compute the gradient of the energy function, \f$ \nabla_{x=0} \left\Vert \Sigma^{-1} A x - b * Compute the gradient of the energy function, \f$ \nabla_{x=0} \left\Vert \Sigma^{-1} A x - b
* \right\Vert^2 \f$, centered around zero. The gradient is \f$ A^T(Ax-b) \f$. * \right\Vert^2 \f$, centered around zero. The gradient is \f$ A^T(Ax-b) \f$.
* @param fg The Jacobian factor graph $(A,b)$ * @param duals[optional] Values of dual variables to scale constrained gradients, \lambda*\nabla c(x)
* @param [output] g A VectorValues to store the gradient, which must be preallocated, * @param [output] g A VectorValues to store the gradient, which must be preallocated,
* see allocateVectorValues * see allocateVectorValues
* @return The gradient as a VectorValues */ * @return The gradient as a VectorValues */
virtual VectorValues gradientAtZero() const; virtual VectorValues gradientAtZero(const boost::optional<const VectorValues&> duals = boost::none) const;
/** Optimize along the gradient direction, with a closed-form computation to perform the line /** Optimize along the gradient direction, with a closed-form computation to perform the line
* search. The gradient is computed about \f$ \delta x=0 \f$. * search. The gradient is computed about \f$ \delta x=0 \f$.

View File

@ -591,17 +591,11 @@ void HessianFactor::multiplyHessianAdd(double alpha, const double* x,
/* ************************************************************************* */ /* ************************************************************************* */
VectorValues HessianFactor::gradientAtZero(const boost::optional<Vector&> dual) const { VectorValues HessianFactor::gradientAtZero(const boost::optional<const VectorValues&> negDuals) const {
VectorValues g; VectorValues g;
size_t n = size(); size_t n = size();
for (size_t j = 0; j < n; ++j) for (size_t j = 0; j < n; ++j)
g.insert(keys_[j], -info_(j,n).knownOffDiagonal()); g.insert(keys_[j], -info_(j,n).knownOffDiagonal());
if (dual) {
if (dual->size() != 1) {
throw std::runtime_error("Fail to scale the gradient with the dual vector: Size mismatch!");
}
g = (*dual)[0] * g;
}
return g; return g;
} }

View File

@ -384,8 +384,11 @@ namespace gtsam {
void multiplyHessianAdd(double alpha, const double* x, double* y) const {}; void multiplyHessianAdd(double alpha, const double* x, double* y) const {};
/// eta for Hessian /**
VectorValues gradientAtZero(const boost::optional<Vector&> dual = boost::none) const; * eta for Hessian
* Ignore duals parameters. It's only valid for constraints, which need to be JacobianFactor
*/
VectorValues gradientAtZero(const boost::optional<const VectorValues&> negDuals = boost::none) const;
virtual void gradientAtZero(double* d) const; virtual void gradientAtZero(double* d) const;

View File

@ -152,8 +152,8 @@ JacobianFactor::JacobianFactor(const HessianFactor& factor) :
bool success; bool success;
boost::tie(maxrank, success) = choleskyCareful(Ab_.matrix()); boost::tie(maxrank, success) = choleskyCareful(Ab_.matrix());
factor.print("HessianFactor to convert: "); // factor.print("HessianFactor to convert: ");
cout << "Maxrank: " << maxrank << ", success: " << int(success) << endl; // cout << "Maxrank: " << maxrank << ", success: " << int(success) << endl;
// Check for indefinite system // Check for indefinite system
if (!success) { if (!success) {
@ -606,20 +606,36 @@ void JacobianFactor::multiplyHessianAdd(double alpha, const double* x,
/* ************************************************************************* */ /* ************************************************************************* */
VectorValues JacobianFactor::gradientAtZero( VectorValues JacobianFactor::gradientAtZero(
const boost::optional<Vector&> dual) const { const boost::optional<const VectorValues&> negDuals) const {
VectorValues g; VectorValues g;
Vector b = getb();
// Gradient is really -A'*b / sigma^2 /* We treat linear constraints differently: They are not least square terms, 0.5*||Ax-b||^2,
// transposeMultiplyAdd will divide by sigma once, so we need one more * but simply linear constraints: Ax-b=0.
Vector b_sigma = model_ ? model_->whiten(b) : b; * The constraint function is c(x) = Ax-b. It's Jacobian is A,
// scale b by the dual vector if it exists * and the gradient is the sum of columns of A', each optionally scaled by the
if (dual) { * corresponding element of negDual vector.
if (dual->size() != b_sigma.size()) */
if (isConstrained()) {
Vector scale = ones(rows());
if (negDuals && dualKey_) {
scale = negDuals->at(dualKey());
if (scale.size() != rows())
throw std::runtime_error(
"Fail to scale the gradient with the dual vector: Size mismatch!");
}
if (negDuals && !dualKey_) {
throw std::runtime_error( throw std::runtime_error(
"Fail to scale the gradient with the dual vector: Size mismatch!"); "Fail to scale the gradient with the dual vector: No dual key!");
b_sigma = b_sigma.cwiseProduct(*dual); }
this->transposeMultiplyAdd(1.0, scale, g); // g = A'*scale
}
else {
Vector b = getb();
// Gradient is really -A'*b / sigma^2
// transposeMultiplyAdd will divide by sigma once, so we need one more
Vector b_sigma = model_ ? model_->whiten(b) : b;
this->transposeMultiplyAdd(-1.0, b_sigma, g); // g -= A'*b/sigma^2
} }
this->transposeMultiplyAdd(-1.0, b_sigma, g); // g -= A'*b/sigma^2
return g; return g;
} }
@ -630,6 +646,10 @@ void JacobianFactor::gradientAtZero(double* d) const {
/* ************************************************************************* */ /* ************************************************************************* */
Vector JacobianFactor::gradient(Key key, const VectorValues& x) const { Vector JacobianFactor::gradient(Key key, const VectorValues& x) const {
if (isConstrained()) { // Untested. But see the explaination in gradientAtZero()
Matrix A = getA(find(key));
return A.transpose()*ones(rows());
}
// TODO: optimize it for JacobianFactor without converting to a HessianFactor // TODO: optimize it for JacobianFactor without converting to a HessianFactor
HessianFactor hessian(*this); HessianFactor hessian(*this);
return hessian.gradient(key, x); return hessian.gradient(key, x);

View File

@ -342,9 +342,13 @@ public:
} }
; ;
/// A'*b for Jacobian /**
* A'*b for Jacobian,
* with the option to scale with the corresponding negative dual variable
* for constrained factor, -\lambda*\nabla c
*/
VectorValues gradientAtZero( VectorValues gradientAtZero(
const boost::optional<Vector&> dual = boost::none) const; const boost::optional<const VectorValues&> negDuals = boost::none) const;
/* ************************************************************************* */ /* ************************************************************************* */
virtual void gradientAtZero(double* d) const; virtual void gradientAtZero(double* d) const;