ceres-solver / include /ceres /loss_function.h

ceres-solver and colmap

2b5a2b6 almost 3 years ago

14.9 kB

	// Ceres Solver - A fast non-linear least squares minimizer
	// Copyright 2019 Google Inc. All rights reserved.
	// http://ceres-solver.org/
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are met:
	//
	// * Redistributions of source code must retain the above copyright notice,
	// this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above copyright notice,
	// this list of conditions and the following disclaimer in the documentation
	// and/or other materials provided with the distribution.
	// * Neither the name of Google Inc. nor the names of its contributors may be
	// used to endorse or promote products derived from this software without
	// specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	// POSSIBILITY OF SUCH DAMAGE.
	//
	// Author: sameeragarwal@google.com (Sameer Agarwal)
	//
	// The LossFunction interface is the way users describe how residuals
	// are converted to cost terms for the overall problem cost function.
	// For the exact manner in which loss functions are converted to the
	// overall cost for a problem, see problem.h.
	//
	// For least squares problem where there are no outliers and standard
	// squared loss is expected, it is not necessary to create a loss
	// function; instead passing a nullptr to the problem when adding
	// residuals implies a standard squared loss.
	//
	// For least squares problems where the minimization may encounter
	// input terms that contain outliers, that is, completely bogus
	// measurements, it is important to use a loss function that reduces
	// their associated penalty.
	//
	// Consider a structure from motion problem. The unknowns are 3D
	// points and camera parameters, and the measurements are image
	// coordinates describing the expected reprojected position for a
	// point in a camera. For example, we want to model the geometry of a
	// street scene with fire hydrants and cars, observed by a moving
	// camera with unknown parameters, and the only 3D points we care
	// about are the pointy tippy-tops of the fire hydrants. Our magic
	// image processing algorithm, which is responsible for producing the
	// measurements that are input to Ceres, has found and matched all
	// such tippy-tops in all image frames, except that in one of the
	// frame it mistook a car's headlight for a hydrant. If we didn't do
	// anything special (i.e. if we used a basic quadratic loss), the
	// residual for the erroneous measurement will result in extreme error
	// due to the quadratic nature of squared loss. This results in the
	// entire solution getting pulled away from the optimum to reduce
	// the large error that would otherwise be attributed to the wrong
	// measurement.
	//
	// Using a robust loss function, the cost for large residuals is
	// reduced. In the example above, this leads to outlier terms getting
	// downweighted so they do not overly influence the final solution.
	//
	// What cost function is best?
	//
	// In general, there isn't a principled way to select a robust loss
	// function. The authors suggest starting with a non-robust cost, then
	// only experimenting with robust loss functions if standard squared
	// loss doesn't work.

	#ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
	#define CERES_PUBLIC_LOSS_FUNCTION_H_

	#include <memory>

	#include "ceres/internal/disable_warnings.h"
	#include "ceres/internal/export.h"
	#include "ceres/types.h"
	#include "glog/logging.h"

	namespace ceres {

	class CERES_EXPORT LossFunction {
	public:
	virtual ~LossFunction();

	// For a residual vector with squared 2-norm 'sq_norm', this method
	// is required to fill in the value and derivatives of the loss
	// function (rho in this example):
	//
	// out[0] = rho(sq_norm),
	// out[1] = rho'(sq_norm),
	// out[2] = rho''(sq_norm),
	//
	// Here the convention is that the contribution of a term to the
	// cost function is given by 1/2 rho(s), where
	//
	// s = \|\|residuals\|\|^2.
	//
	// Calling the method with a negative value of 's' is an error and
	// the implementations are not required to handle that case.
	//
	// Most sane choices of rho() satisfy:
	//
	// rho(0) = 0,
	// rho'(0) = 1,
	// rho'(s) < 1 in outlier region,
	// rho''(s) < 0 in outlier region,
	//
	// so that they mimic the least squares cost for small residuals.
	virtual void Evaluate(double sq_norm, double out[3]) const = 0;
	};

	// Some common implementations follow below.
	//
	// Note: in the region of interest (i.e. s < 3) we have:
	// TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss

	// This corresponds to no robustification.
	//
	// rho(s) = s
	//
	// At s = 0: rho = [0, 1, 0].
	//
	// It is not normally necessary to use this, as passing nullptr for the
	// loss function when building the problem accomplishes the same
	// thing.
	class CERES_EXPORT TrivialLoss final : public LossFunction {
	public:
	void Evaluate(double, double*) const override;
	};

	// Scaling
	// -------
	// Given one robustifier
	// s -> rho(s)
	// one can change the length scale at which robustification takes
	// place, by adding a scale factor 'a' as follows:
	//
	// s -> a^2 rho(s / a^2).
	//
	// The first and second derivatives are:
	//
	// s -> rho'(s / a^2),
	// s -> (1 / a^2) rho''(s / a^2),
	//
	// but the behaviour near s = 0 is the same as the original function,
	// i.e.
	//
	// rho(s) = s + higher order terms,
	// a^2 rho(s / a^2) = s + higher order terms.
	//
	// The scalar 'a' should be positive.
	//
	// The reason for the appearance of squaring is that 'a' is in the
	// units of the residual vector norm whereas 's' is a squared
	// norm. For applications it is more convenient to specify 'a' than
	// its square. The commonly used robustifiers below are described in
	// un-scaled format (a = 1) but their implementations work for any
	// non-zero value of 'a'.

	// Huber.
	//
	// rho(s) = s for s <= 1,
	// rho(s) = 2 sqrt(s) - 1 for s >= 1.
	//
	// At s = 0: rho = [0, 1, 0].
	//
	// The scaling parameter 'a' corresponds to 'delta' on this page:
	// http://en.wikipedia.org/wiki/Huber_Loss_Function
	class CERES_EXPORT HuberLoss final : public LossFunction {
	public:
	explicit HuberLoss(double a) : a_(a), b_(a * a) {}
	void Evaluate(double, double*) const override;

	private:
	const double a_;
	// b = a^2.
	const double b_;
	};

	// Soft L1, similar to Huber but smooth.
	//
	// rho(s) = 2 (sqrt(1 + s) - 1).
	//
	// At s = 0: rho = [0, 1, -1 / (2 * a^2)].
	class CERES_EXPORT SoftLOneLoss final : public LossFunction {
	public:
	explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) {}
	void Evaluate(double, double*) const override;

	private:
	// b = a^2.
	const double b_;
	// c = 1 / a^2.
	const double c_;
	};

	// Inspired by the Cauchy distribution
	//
	// rho(s) = log(1 + s).
	//
	// At s = 0: rho = [0, 1, -1 / a^2].
	class CERES_EXPORT CauchyLoss final : public LossFunction {
	public:
	explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) {}
	void Evaluate(double, double*) const override;

	private:
	// b = a^2.
	const double b_;
	// c = 1 / a^2.
	const double c_;
	};

	// Loss that is capped beyond a certain level using the arc-tangent function.
	// The scaling parameter 'a' determines the level where falloff occurs.
	// For costs much smaller than 'a', the loss function is linear and behaves like
	// TrivialLoss, and for values much larger than 'a' the value asymptotically
	// approaches the constant value of a * PI / 2.
	//
	// rho(s) = a atan(s / a).
	//
	// At s = 0: rho = [0, 1, 0].
	class CERES_EXPORT ArctanLoss final : public LossFunction {
	public:
	explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) {}
	void Evaluate(double, double*) const override;

	private:
	const double a_;
	// b = 1 / a^2.
	const double b_;
	};

	// Loss function that maps to approximately zero cost in a range around the
	// origin, and reverts to linear in error (quadratic in cost) beyond this range.
	// The tolerance parameter 'a' sets the nominal point at which the
	// transition occurs, and the transition size parameter 'b' sets the nominal
	// distance over which most of the transition occurs. Both a and b must be
	// greater than zero, and typically b will be set to a fraction of a.
	// The slope rho'[s] varies smoothly from about 0 at s <= a - b to
	// about 1 at s >= a + b.
	//
	// The term is computed as:
	//
	// rho(s) = b log(1 + exp((s - a) / b)) - c0.
	//
	// where c0 is chosen so that rho(0) == 0
	//
	// c0 = b log(1 + exp(-a / b)
	//
	// This has the following useful properties:
	//
	// rho(s) == 0 for s = 0
	// rho'(s) ~= 0 for s << a - b
	// rho'(s) ~= 1 for s >> a + b
	// rho''(s) > 0 for all s
	//
	// In addition, all derivatives are continuous, and the curvature is
	// concentrated in the range a - b to a + b.
	//
	// At s = 0: rho = [0, ~0, ~0].
	class CERES_EXPORT TolerantLoss final : public LossFunction {
	public:
	explicit TolerantLoss(double a, double b);
	void Evaluate(double, double*) const override;

	private:
	const double a_, b_, c_;
	};

	// This is the Tukey biweight loss function which aggressively
	// attempts to suppress large errors.
	//
	// The term is computed as follows where the equations are scaled by a
	// factor of 2 because the cost function is given by 1/2 rho(s):
	//
	// rho(s) = a^2 / 3 * (1 - (1 - s / a^2)^3 ) for s <= a^2,
	// rho(s) = a^2 / 3 for s > a^2.
	//
	// At s = 0: rho = [0, 1, -2 / a^2]
	class CERES_EXPORT TukeyLoss final : public ceres::LossFunction {
	public:
	explicit TukeyLoss(double a) : a_squared_(a * a) {}
	void Evaluate(double, double*) const override;

	private:
	const double a_squared_;
	};

	// Composition of two loss functions. The error is the result of first
	// evaluating g followed by f to yield the composition f(g(s)).
	// The loss functions must not be nullptr.
	class CERES_EXPORT ComposedLoss final : public LossFunction {
	public:
	explicit ComposedLoss(const LossFunction* f,
	Ownership ownership_f,
	const LossFunction* g,
	Ownership ownership_g);
	~ComposedLoss() override;
	void Evaluate(double, double*) const override;

	private:
	std::unique_ptr<const LossFunction> f_, g_;
	const Ownership ownership_f_, ownership_g_;
	};

	// The discussion above has to do with length scaling: it affects the space
	// in which s is measured. Sometimes you want to simply scale the output
	// value of the robustifier. For example, you might want to weight
	// different error terms differently (e.g., weight pixel reprojection
	// errors differently from terrain errors).
	//
	// If rho is the wrapped robustifier, then this simply outputs
	// s -> a * rho(s)
	//
	// The first and second derivatives are, not surprisingly
	// s -> a * rho'(s)
	// s -> a * rho''(s)
	//
	// Since we treat the a nullptr Loss function as the Identity loss
	// function, rho = nullptr is a valid input and will result in the input
	// being scaled by a. This provides a simple way of implementing a
	// scaled ResidualBlock.
	class CERES_EXPORT ScaledLoss final : public LossFunction {
	public:
	// Constructs a ScaledLoss wrapping another loss function. Takes
	// ownership of the wrapped loss function or not depending on the
	// ownership parameter.
	ScaledLoss(const LossFunction* rho, double a, Ownership ownership)
	: rho_(rho), a_(a), ownership_(ownership) {}
	ScaledLoss(const ScaledLoss&) = delete;
	void operator=(const ScaledLoss&) = delete;

	~ScaledLoss() override {
	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
	rho_.release();
	}
	}
	void Evaluate(double, double*) const override;

	private:
	std::unique_ptr<const LossFunction> rho_;
	const double a_;
	const Ownership ownership_;
	};

	// Sometimes after the optimization problem has been constructed, we
	// wish to mutate the scale of the loss function. For example, when
	// performing estimation from data which has substantial outliers,
	// convergence can be improved by starting out with a large scale,
	// optimizing the problem and then reducing the scale. This can have
	// better convergence behaviour than just using a loss function with a
	// small scale.
	//
	// This templated class allows the user to implement a loss function
	// whose scale can be mutated after an optimization problem has been
	// constructed.
	//
	// Since we treat the a nullptr Loss function as the Identity loss
	// function, rho = nullptr is a valid input.
	//
	// Example usage
	//
	// Problem problem;
	//
	// // Add parameter blocks
	//
	// CostFunction* cost_function =
	// new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
	// new UW_Camera_Mapper(feature_x, feature_y));
	//
	// LossFunctionWrapper* loss_function = new LossFunctionWrapper(
	// new HuberLoss(1.0), TAKE_OWNERSHIP);
	//
	// problem.AddResidualBlock(cost_function, loss_function, parameters);
	//
	// Solver::Options options;
	// Solger::Summary summary;
	//
	// Solve(options, &problem, &summary)
	//
	// loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
	//
	// Solve(options, &problem, &summary)
	//
	class CERES_EXPORT LossFunctionWrapper final : public LossFunction {
	public:
	LossFunctionWrapper(LossFunction* rho, Ownership ownership)
	: rho_(rho), ownership_(ownership) {}

	LossFunctionWrapper(const LossFunctionWrapper&) = delete;
	void operator=(const LossFunctionWrapper&) = delete;

	~LossFunctionWrapper() override {
	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
	rho_.release();
	}
	}

	void Evaluate(double sq_norm, double out[3]) const override {
	if (rho_.get() == nullptr) {
	out[0] = sq_norm;
	out[1] = 1.0;
	out[2] = 0.0;
	} else {
	rho_->Evaluate(sq_norm, out);
	}
	}

	void Reset(LossFunction* rho, Ownership ownership) {
	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
	rho_.release();
	}
	rho_.reset(rho);
	ownership_ = ownership;
	}

	private:
	std::unique_ptr<const LossFunction> rho_;
	Ownership ownership_;
	};

	} // namespace ceres

	#include "ceres/internal/reenable_warnings.h"

	#endif // CERES_PUBLIC_LOSS_FUNCTION_H_