// Ceres Solver - A fast non-linear least squares minimizer | |
// Copyright 2019 Google Inc. All rights reserved. | |
// http://ceres-solver.org/ | |
// | |
// Redistribution and use in source and binary forms, with or without | |
// modification, are permitted provided that the following conditions are met: | |
// | |
// * Redistributions of source code must retain the above copyright notice, | |
// this list of conditions and the following disclaimer. | |
// * Redistributions in binary form must reproduce the above copyright notice, | |
// this list of conditions and the following disclaimer in the documentation | |
// and/or other materials provided with the distribution. | |
// * Neither the name of Google Inc. nor the names of its contributors may be | |
// used to endorse or promote products derived from this software without | |
// specific prior written permission. | |
// | |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
// POSSIBILITY OF SUCH DAMAGE. | |
// | |
// Author: sameeragarwal@google.com (Sameer Agarwal) | |
// | |
// The LossFunction interface is the way users describe how residuals | |
// are converted to cost terms for the overall problem cost function. | |
// For the exact manner in which loss functions are converted to the | |
// overall cost for a problem, see problem.h. | |
// | |
// For least squares problem where there are no outliers and standard | |
// squared loss is expected, it is not necessary to create a loss | |
// function; instead passing a nullptr to the problem when adding | |
// residuals implies a standard squared loss. | |
// | |
// For least squares problems where the minimization may encounter | |
// input terms that contain outliers, that is, completely bogus | |
// measurements, it is important to use a loss function that reduces | |
// their associated penalty. | |
// | |
// Consider a structure from motion problem. The unknowns are 3D | |
// points and camera parameters, and the measurements are image | |
// coordinates describing the expected reprojected position for a | |
// point in a camera. For example, we want to model the geometry of a | |
// street scene with fire hydrants and cars, observed by a moving | |
// camera with unknown parameters, and the only 3D points we care | |
// about are the pointy tippy-tops of the fire hydrants. Our magic | |
// image processing algorithm, which is responsible for producing the | |
// measurements that are input to Ceres, has found and matched all | |
// such tippy-tops in all image frames, except that in one of the | |
// frame it mistook a car's headlight for a hydrant. If we didn't do | |
// anything special (i.e. if we used a basic quadratic loss), the | |
// residual for the erroneous measurement will result in extreme error | |
// due to the quadratic nature of squared loss. This results in the | |
// entire solution getting pulled away from the optimum to reduce | |
// the large error that would otherwise be attributed to the wrong | |
// measurement. | |
// | |
// Using a robust loss function, the cost for large residuals is | |
// reduced. In the example above, this leads to outlier terms getting | |
// downweighted so they do not overly influence the final solution. | |
// | |
// What cost function is best? | |
// | |
// In general, there isn't a principled way to select a robust loss | |
// function. The authors suggest starting with a non-robust cost, then | |
// only experimenting with robust loss functions if standard squared | |
// loss doesn't work. | |
namespace ceres { | |
class CERES_EXPORT LossFunction { | |
public: | |
virtual ~LossFunction(); | |
// For a residual vector with squared 2-norm 'sq_norm', this method | |
// is required to fill in the value and derivatives of the loss | |
// function (rho in this example): | |
// | |
// out[0] = rho(sq_norm), | |
// out[1] = rho'(sq_norm), | |
// out[2] = rho''(sq_norm), | |
// | |
// Here the convention is that the contribution of a term to the | |
// cost function is given by 1/2 rho(s), where | |
// | |
// s = ||residuals||^2. | |
// | |
// Calling the method with a negative value of 's' is an error and | |
// the implementations are not required to handle that case. | |
// | |
// Most sane choices of rho() satisfy: | |
// | |
// rho(0) = 0, | |
// rho'(0) = 1, | |
// rho'(s) < 1 in outlier region, | |
// rho''(s) < 0 in outlier region, | |
// | |
// so that they mimic the least squares cost for small residuals. | |
virtual void Evaluate(double sq_norm, double out[3]) const = 0; | |
}; | |
// Some common implementations follow below. | |
// | |
// Note: in the region of interest (i.e. s < 3) we have: | |
// TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss | |
// This corresponds to no robustification. | |
// | |
// rho(s) = s | |
// | |
// At s = 0: rho = [0, 1, 0]. | |
// | |
// It is not normally necessary to use this, as passing nullptr for the | |
// loss function when building the problem accomplishes the same | |
// thing. | |
class CERES_EXPORT TrivialLoss final : public LossFunction { | |
public: | |
void Evaluate(double, double*) const override; | |
}; | |
// Scaling | |
// ------- | |
// Given one robustifier | |
// s -> rho(s) | |
// one can change the length scale at which robustification takes | |
// place, by adding a scale factor 'a' as follows: | |
// | |
// s -> a^2 rho(s / a^2). | |
// | |
// The first and second derivatives are: | |
// | |
// s -> rho'(s / a^2), | |
// s -> (1 / a^2) rho''(s / a^2), | |
// | |
// but the behaviour near s = 0 is the same as the original function, | |
// i.e. | |
// | |
// rho(s) = s + higher order terms, | |
// a^2 rho(s / a^2) = s + higher order terms. | |
// | |
// The scalar 'a' should be positive. | |
// | |
// The reason for the appearance of squaring is that 'a' is in the | |
// units of the residual vector norm whereas 's' is a squared | |
// norm. For applications it is more convenient to specify 'a' than | |
// its square. The commonly used robustifiers below are described in | |
// un-scaled format (a = 1) but their implementations work for any | |
// non-zero value of 'a'. | |
// Huber. | |
// | |
// rho(s) = s for s <= 1, | |
// rho(s) = 2 sqrt(s) - 1 for s >= 1. | |
// | |
// At s = 0: rho = [0, 1, 0]. | |
// | |
// The scaling parameter 'a' corresponds to 'delta' on this page: | |
// http://en.wikipedia.org/wiki/Huber_Loss_Function | |
class CERES_EXPORT HuberLoss final : public LossFunction { | |
public: | |
explicit HuberLoss(double a) : a_(a), b_(a * a) {} | |
void Evaluate(double, double*) const override; | |
private: | |
const double a_; | |
// b = a^2. | |
const double b_; | |
}; | |
// Soft L1, similar to Huber but smooth. | |
// | |
// rho(s) = 2 (sqrt(1 + s) - 1). | |
// | |
// At s = 0: rho = [0, 1, -1 / (2 * a^2)]. | |
class CERES_EXPORT SoftLOneLoss final : public LossFunction { | |
public: | |
explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) {} | |
void Evaluate(double, double*) const override; | |
private: | |
// b = a^2. | |
const double b_; | |
// c = 1 / a^2. | |
const double c_; | |
}; | |
// Inspired by the Cauchy distribution | |
// | |
// rho(s) = log(1 + s). | |
// | |
// At s = 0: rho = [0, 1, -1 / a^2]. | |
class CERES_EXPORT CauchyLoss final : public LossFunction { | |
public: | |
explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) {} | |
void Evaluate(double, double*) const override; | |
private: | |
// b = a^2. | |
const double b_; | |
// c = 1 / a^2. | |
const double c_; | |
}; | |
// Loss that is capped beyond a certain level using the arc-tangent function. | |
// The scaling parameter 'a' determines the level where falloff occurs. | |
// For costs much smaller than 'a', the loss function is linear and behaves like | |
// TrivialLoss, and for values much larger than 'a' the value asymptotically | |
// approaches the constant value of a * PI / 2. | |
// | |
// rho(s) = a atan(s / a). | |
// | |
// At s = 0: rho = [0, 1, 0]. | |
class CERES_EXPORT ArctanLoss final : public LossFunction { | |
public: | |
explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) {} | |
void Evaluate(double, double*) const override; | |
private: | |
const double a_; | |
// b = 1 / a^2. | |
const double b_; | |
}; | |
// Loss function that maps to approximately zero cost in a range around the | |
// origin, and reverts to linear in error (quadratic in cost) beyond this range. | |
// The tolerance parameter 'a' sets the nominal point at which the | |
// transition occurs, and the transition size parameter 'b' sets the nominal | |
// distance over which most of the transition occurs. Both a and b must be | |
// greater than zero, and typically b will be set to a fraction of a. | |
// The slope rho'[s] varies smoothly from about 0 at s <= a - b to | |
// about 1 at s >= a + b. | |
// | |
// The term is computed as: | |
// | |
// rho(s) = b log(1 + exp((s - a) / b)) - c0. | |
// | |
// where c0 is chosen so that rho(0) == 0 | |
// | |
// c0 = b log(1 + exp(-a / b) | |
// | |
// This has the following useful properties: | |
// | |
// rho(s) == 0 for s = 0 | |
// rho'(s) ~= 0 for s << a - b | |
// rho'(s) ~= 1 for s >> a + b | |
// rho''(s) > 0 for all s | |
// | |
// In addition, all derivatives are continuous, and the curvature is | |
// concentrated in the range a - b to a + b. | |
// | |
// At s = 0: rho = [0, ~0, ~0]. | |
class CERES_EXPORT TolerantLoss final : public LossFunction { | |
public: | |
explicit TolerantLoss(double a, double b); | |
void Evaluate(double, double*) const override; | |
private: | |
const double a_, b_, c_; | |
}; | |
// This is the Tukey biweight loss function which aggressively | |
// attempts to suppress large errors. | |
// | |
// The term is computed as follows where the equations are scaled by a | |
// factor of 2 because the cost function is given by 1/2 rho(s): | |
// | |
// rho(s) = a^2 / 3 * (1 - (1 - s / a^2)^3 ) for s <= a^2, | |
// rho(s) = a^2 / 3 for s > a^2. | |
// | |
// At s = 0: rho = [0, 1, -2 / a^2] | |
class CERES_EXPORT TukeyLoss final : public ceres::LossFunction { | |
public: | |
explicit TukeyLoss(double a) : a_squared_(a * a) {} | |
void Evaluate(double, double*) const override; | |
private: | |
const double a_squared_; | |
}; | |
// Composition of two loss functions. The error is the result of first | |
// evaluating g followed by f to yield the composition f(g(s)). | |
// The loss functions must not be nullptr. | |
class CERES_EXPORT ComposedLoss final : public LossFunction { | |
public: | |
explicit ComposedLoss(const LossFunction* f, | |
Ownership ownership_f, | |
const LossFunction* g, | |
Ownership ownership_g); | |
~ComposedLoss() override; | |
void Evaluate(double, double*) const override; | |
private: | |
std::unique_ptr<const LossFunction> f_, g_; | |
const Ownership ownership_f_, ownership_g_; | |
}; | |
// The discussion above has to do with length scaling: it affects the space | |
// in which s is measured. Sometimes you want to simply scale the output | |
// value of the robustifier. For example, you might want to weight | |
// different error terms differently (e.g., weight pixel reprojection | |
// errors differently from terrain errors). | |
// | |
// If rho is the wrapped robustifier, then this simply outputs | |
// s -> a * rho(s) | |
// | |
// The first and second derivatives are, not surprisingly | |
// s -> a * rho'(s) | |
// s -> a * rho''(s) | |
// | |
// Since we treat the a nullptr Loss function as the Identity loss | |
// function, rho = nullptr is a valid input and will result in the input | |
// being scaled by a. This provides a simple way of implementing a | |
// scaled ResidualBlock. | |
class CERES_EXPORT ScaledLoss final : public LossFunction { | |
public: | |
// Constructs a ScaledLoss wrapping another loss function. Takes | |
// ownership of the wrapped loss function or not depending on the | |
// ownership parameter. | |
ScaledLoss(const LossFunction* rho, double a, Ownership ownership) | |
: rho_(rho), a_(a), ownership_(ownership) {} | |
ScaledLoss(const ScaledLoss&) = delete; | |
void operator=(const ScaledLoss&) = delete; | |
~ScaledLoss() override { | |
if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { | |
rho_.release(); | |
} | |
} | |
void Evaluate(double, double*) const override; | |
private: | |
std::unique_ptr<const LossFunction> rho_; | |
const double a_; | |
const Ownership ownership_; | |
}; | |
// Sometimes after the optimization problem has been constructed, we | |
// wish to mutate the scale of the loss function. For example, when | |
// performing estimation from data which has substantial outliers, | |
// convergence can be improved by starting out with a large scale, | |
// optimizing the problem and then reducing the scale. This can have | |
// better convergence behaviour than just using a loss function with a | |
// small scale. | |
// | |
// This templated class allows the user to implement a loss function | |
// whose scale can be mutated after an optimization problem has been | |
// constructed. | |
// | |
// Since we treat the a nullptr Loss function as the Identity loss | |
// function, rho = nullptr is a valid input. | |
// | |
// Example usage | |
// | |
// Problem problem; | |
// | |
// // Add parameter blocks | |
// | |
// CostFunction* cost_function = | |
// new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>( | |
// new UW_Camera_Mapper(feature_x, feature_y)); | |
// | |
// LossFunctionWrapper* loss_function = new LossFunctionWrapper( | |
// new HuberLoss(1.0), TAKE_OWNERSHIP); | |
// | |
// problem.AddResidualBlock(cost_function, loss_function, parameters); | |
// | |
// Solver::Options options; | |
// Solger::Summary summary; | |
// | |
// Solve(options, &problem, &summary) | |
// | |
// loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP); | |
// | |
// Solve(options, &problem, &summary) | |
// | |
class CERES_EXPORT LossFunctionWrapper final : public LossFunction { | |
public: | |
LossFunctionWrapper(LossFunction* rho, Ownership ownership) | |
: rho_(rho), ownership_(ownership) {} | |
LossFunctionWrapper(const LossFunctionWrapper&) = delete; | |
void operator=(const LossFunctionWrapper&) = delete; | |
~LossFunctionWrapper() override { | |
if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { | |
rho_.release(); | |
} | |
} | |
void Evaluate(double sq_norm, double out[3]) const override { | |
if (rho_.get() == nullptr) { | |
out[0] = sq_norm; | |
out[1] = 1.0; | |
out[2] = 0.0; | |
} else { | |
rho_->Evaluate(sq_norm, out); | |
} | |
} | |
void Reset(LossFunction* rho, Ownership ownership) { | |
if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { | |
rho_.release(); | |
} | |
rho_.reset(rho); | |
ownership_ = ownership; | |
} | |
private: | |
std::unique_ptr<const LossFunction> rho_; | |
Ownership ownership_; | |
}; | |
} // namespace ceres | |