fix gradient and gradientAtZero for linear constrained Jacobian, optionally scaled by the dual variables
parent
43a8de01c1
commit
11969b32f6
|
@ -128,7 +128,7 @@ namespace gtsam {
|
||||||
virtual void multiplyHessianAdd(double alpha, const double* x, double* y) const = 0;
|
virtual void multiplyHessianAdd(double alpha, const double* x, double* y) const = 0;
|
||||||
|
|
||||||
/// A'*b for Jacobian, eta for Hessian
|
/// A'*b for Jacobian, eta for Hessian
|
||||||
virtual VectorValues gradientAtZero(const boost::optional<Vector&> dual = boost::none) const = 0;
|
virtual VectorValues gradientAtZero(const boost::optional<const VectorValues&> negDuals = boost::none) const = 0;
|
||||||
|
|
||||||
/// A'*b for Jacobian, eta for Hessian (raw memory version)
|
/// A'*b for Jacobian, eta for Hessian (raw memory version)
|
||||||
virtual void gradientAtZero(double* d) const = 0;
|
virtual void gradientAtZero(double* d) const = 0;
|
||||||
|
|
|
@ -254,6 +254,7 @@ namespace gtsam {
|
||||||
map<Key,Matrix> GaussianFactorGraph::hessianBlockDiagonal() const {
|
map<Key,Matrix> GaussianFactorGraph::hessianBlockDiagonal() const {
|
||||||
map<Key,Matrix> blocks;
|
map<Key,Matrix> blocks;
|
||||||
BOOST_FOREACH(const sharedFactor& factor, *this) {
|
BOOST_FOREACH(const sharedFactor& factor, *this) {
|
||||||
|
if (!factor) continue;
|
||||||
map<Key,Matrix> BD = factor->hessianBlockDiagonal();
|
map<Key,Matrix> BD = factor->hessianBlockDiagonal();
|
||||||
map<Key,Matrix>::const_iterator it = BD.begin();
|
map<Key,Matrix>::const_iterator it = BD.begin();
|
||||||
for(;it!=BD.end();it++) {
|
for(;it!=BD.end();it++) {
|
||||||
|
@ -299,11 +300,12 @@ namespace gtsam {
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
VectorValues GaussianFactorGraph::gradientAtZero() const {
|
VectorValues GaussianFactorGraph::gradientAtZero(
|
||||||
|
const boost::optional<const VectorValues&> negDuals) const {
|
||||||
// Zero-out the gradient
|
// Zero-out the gradient
|
||||||
VectorValues g;
|
VectorValues g;
|
||||||
BOOST_FOREACH(const sharedFactor& factor, *this) {
|
BOOST_FOREACH(const sharedFactor& factor, *this) {
|
||||||
VectorValues gi = factor->gradientAtZero();
|
VectorValues gi = factor->gradientAtZero(negDuals);
|
||||||
g.addInPlace_(gi);
|
g.addInPlace_(gi);
|
||||||
}
|
}
|
||||||
return g;
|
return g;
|
||||||
|
@ -426,6 +428,7 @@ namespace gtsam {
|
||||||
const VectorValues& delta) const {
|
const VectorValues& delta) const {
|
||||||
GaussianFactorGraph::shared_ptr dualGraph(new GaussianFactorGraph());
|
GaussianFactorGraph::shared_ptr dualGraph(new GaussianFactorGraph());
|
||||||
BOOST_FOREACH(const Key key, constrainedVariables) {
|
BOOST_FOREACH(const Key key, constrainedVariables) {
|
||||||
|
// Each constrained key becomes a factor in the dual graph
|
||||||
dualGraph->push_back(createDualFactor(key, variableIndex, delta));
|
dualGraph->push_back(createDualFactor(key, variableIndex, delta));
|
||||||
}
|
}
|
||||||
return dualGraph;
|
return dualGraph;
|
||||||
|
|
|
@ -262,11 +262,11 @@ namespace gtsam {
|
||||||
/**
|
/**
|
||||||
* Compute the gradient of the energy function, \f$ \nabla_{x=0} \left\Vert \Sigma^{-1} A x - b
|
* Compute the gradient of the energy function, \f$ \nabla_{x=0} \left\Vert \Sigma^{-1} A x - b
|
||||||
* \right\Vert^2 \f$, centered around zero. The gradient is \f$ A^T(Ax-b) \f$.
|
* \right\Vert^2 \f$, centered around zero. The gradient is \f$ A^T(Ax-b) \f$.
|
||||||
* @param fg The Jacobian factor graph $(A,b)$
|
* @param duals[optional] Values of dual variables to scale constrained gradients, \lambda*\nabla c(x)
|
||||||
* @param [output] g A VectorValues to store the gradient, which must be preallocated,
|
* @param [output] g A VectorValues to store the gradient, which must be preallocated,
|
||||||
* see allocateVectorValues
|
* see allocateVectorValues
|
||||||
* @return The gradient as a VectorValues */
|
* @return The gradient as a VectorValues */
|
||||||
virtual VectorValues gradientAtZero() const;
|
virtual VectorValues gradientAtZero(const boost::optional<const VectorValues&> duals = boost::none) const;
|
||||||
|
|
||||||
/** Optimize along the gradient direction, with a closed-form computation to perform the line
|
/** Optimize along the gradient direction, with a closed-form computation to perform the line
|
||||||
* search. The gradient is computed about \f$ \delta x=0 \f$.
|
* search. The gradient is computed about \f$ \delta x=0 \f$.
|
||||||
|
|
|
@ -591,17 +591,11 @@ void HessianFactor::multiplyHessianAdd(double alpha, const double* x,
|
||||||
|
|
||||||
|
|
||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
VectorValues HessianFactor::gradientAtZero(const boost::optional<Vector&> dual) const {
|
VectorValues HessianFactor::gradientAtZero(const boost::optional<const VectorValues&> negDuals) const {
|
||||||
VectorValues g;
|
VectorValues g;
|
||||||
size_t n = size();
|
size_t n = size();
|
||||||
for (size_t j = 0; j < n; ++j)
|
for (size_t j = 0; j < n; ++j)
|
||||||
g.insert(keys_[j], -info_(j,n).knownOffDiagonal());
|
g.insert(keys_[j], -info_(j,n).knownOffDiagonal());
|
||||||
if (dual) {
|
|
||||||
if (dual->size() != 1) {
|
|
||||||
throw std::runtime_error("Fail to scale the gradient with the dual vector: Size mismatch!");
|
|
||||||
}
|
|
||||||
g = (*dual)[0] * g;
|
|
||||||
}
|
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -384,8 +384,11 @@ namespace gtsam {
|
||||||
|
|
||||||
void multiplyHessianAdd(double alpha, const double* x, double* y) const {};
|
void multiplyHessianAdd(double alpha, const double* x, double* y) const {};
|
||||||
|
|
||||||
/// eta for Hessian
|
/**
|
||||||
VectorValues gradientAtZero(const boost::optional<Vector&> dual = boost::none) const;
|
* eta for Hessian
|
||||||
|
* Ignore duals parameters. It's only valid for constraints, which need to be JacobianFactor
|
||||||
|
*/
|
||||||
|
VectorValues gradientAtZero(const boost::optional<const VectorValues&> negDuals = boost::none) const;
|
||||||
|
|
||||||
virtual void gradientAtZero(double* d) const;
|
virtual void gradientAtZero(double* d) const;
|
||||||
|
|
||||||
|
|
|
@ -152,8 +152,8 @@ JacobianFactor::JacobianFactor(const HessianFactor& factor) :
|
||||||
bool success;
|
bool success;
|
||||||
boost::tie(maxrank, success) = choleskyCareful(Ab_.matrix());
|
boost::tie(maxrank, success) = choleskyCareful(Ab_.matrix());
|
||||||
|
|
||||||
factor.print("HessianFactor to convert: ");
|
// factor.print("HessianFactor to convert: ");
|
||||||
cout << "Maxrank: " << maxrank << ", success: " << int(success) << endl;
|
// cout << "Maxrank: " << maxrank << ", success: " << int(success) << endl;
|
||||||
|
|
||||||
// Check for indefinite system
|
// Check for indefinite system
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
@ -606,20 +606,36 @@ void JacobianFactor::multiplyHessianAdd(double alpha, const double* x,
|
||||||
|
|
||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
VectorValues JacobianFactor::gradientAtZero(
|
VectorValues JacobianFactor::gradientAtZero(
|
||||||
const boost::optional<Vector&> dual) const {
|
const boost::optional<const VectorValues&> negDuals) const {
|
||||||
VectorValues g;
|
VectorValues g;
|
||||||
Vector b = getb();
|
|
||||||
// Gradient is really -A'*b / sigma^2
|
/* We treat linear constraints differently: They are not least square terms, 0.5*||Ax-b||^2,
|
||||||
// transposeMultiplyAdd will divide by sigma once, so we need one more
|
* but simply linear constraints: Ax-b=0.
|
||||||
Vector b_sigma = model_ ? model_->whiten(b) : b;
|
* The constraint function is c(x) = Ax-b. It's Jacobian is A,
|
||||||
// scale b by the dual vector if it exists
|
* and the gradient is the sum of columns of A', each optionally scaled by the
|
||||||
if (dual) {
|
* corresponding element of negDual vector.
|
||||||
if (dual->size() != b_sigma.size())
|
*/
|
||||||
|
if (isConstrained()) {
|
||||||
|
Vector scale = ones(rows());
|
||||||
|
if (negDuals && dualKey_) {
|
||||||
|
scale = negDuals->at(dualKey());
|
||||||
|
if (scale.size() != rows())
|
||||||
|
throw std::runtime_error(
|
||||||
|
"Fail to scale the gradient with the dual vector: Size mismatch!");
|
||||||
|
}
|
||||||
|
if (negDuals && !dualKey_) {
|
||||||
throw std::runtime_error(
|
throw std::runtime_error(
|
||||||
"Fail to scale the gradient with the dual vector: Size mismatch!");
|
"Fail to scale the gradient with the dual vector: No dual key!");
|
||||||
b_sigma = b_sigma.cwiseProduct(*dual);
|
}
|
||||||
|
this->transposeMultiplyAdd(1.0, scale, g); // g = A'*scale
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Vector b = getb();
|
||||||
|
// Gradient is really -A'*b / sigma^2
|
||||||
|
// transposeMultiplyAdd will divide by sigma once, so we need one more
|
||||||
|
Vector b_sigma = model_ ? model_->whiten(b) : b;
|
||||||
|
this->transposeMultiplyAdd(-1.0, b_sigma, g); // g -= A'*b/sigma^2
|
||||||
}
|
}
|
||||||
this->transposeMultiplyAdd(-1.0, b_sigma, g); // g -= A'*b/sigma^2
|
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -630,6 +646,10 @@ void JacobianFactor::gradientAtZero(double* d) const {
|
||||||
|
|
||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
Vector JacobianFactor::gradient(Key key, const VectorValues& x) const {
|
Vector JacobianFactor::gradient(Key key, const VectorValues& x) const {
|
||||||
|
if (isConstrained()) { // Untested. But see the explaination in gradientAtZero()
|
||||||
|
Matrix A = getA(find(key));
|
||||||
|
return A.transpose()*ones(rows());
|
||||||
|
}
|
||||||
// TODO: optimize it for JacobianFactor without converting to a HessianFactor
|
// TODO: optimize it for JacobianFactor without converting to a HessianFactor
|
||||||
HessianFactor hessian(*this);
|
HessianFactor hessian(*this);
|
||||||
return hessian.gradient(key, x);
|
return hessian.gradient(key, x);
|
||||||
|
|
|
@ -342,9 +342,13 @@ public:
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
/// A'*b for Jacobian
|
/**
|
||||||
|
* A'*b for Jacobian,
|
||||||
|
* with the option to scale with the corresponding negative dual variable
|
||||||
|
* for constrained factor, -\lambda*\nabla c
|
||||||
|
*/
|
||||||
VectorValues gradientAtZero(
|
VectorValues gradientAtZero(
|
||||||
const boost::optional<Vector&> dual = boost::none) const;
|
const boost::optional<const VectorValues&> negDuals = boost::none) const;
|
||||||
|
|
||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
virtual void gradientAtZero(double* d) const;
|
virtual void gradientAtZero(double* d) const;
|
||||||
|
|
Loading…
Reference in New Issue