spam-classifier / venv /lib /python3.11 /site-packages /scipy /optimize /_differentiable_functions.py

Sam Chaudry

Upload folder using huggingface_hub

7885a28 verified about 1 month ago

24.9 kB

	import numpy as np
	import scipy.sparse as sps
	from ._numdiff import approx_derivative, group_columns
	from ._hessian_update_strategy import HessianUpdateStrategy
	from scipy.sparse.linalg import LinearOperator
	from scipy._lib._array_api import array_namespace
	from scipy._lib import array_api_extra as xpx


	FD_METHODS = ('2-point', '3-point', 'cs')


	def _wrapper_fun(fun, args=()):
	ncalls = [0]

	def wrapped(x):
	ncalls[0] += 1
	# Send a copy because the user may overwrite it.
	# Overwriting results in undefined behaviour because
	# fun(self.x) will change self.x, with the two no longer linked.
	fx = fun(np.copy(x), *args)
	# Make sure the function returns a true scalar
	if not np.isscalar(fx):
	try:
	fx = np.asarray(fx).item()
	except (TypeError, ValueError) as e:
	raise ValueError(
	"The user-provided objective function "
	"must return a scalar value."
	) from e
	return fx
	return wrapped, ncalls


	def _wrapper_grad(grad, fun=None, args=(), finite_diff_options=None):
	ncalls = [0]

	if callable(grad):
	def wrapped(x, **kwds):
	# kwds present to give function same signature as numdiff variant
	ncalls[0] += 1
	return np.atleast_1d(grad(np.copy(x), *args))
	return wrapped, ncalls

	elif grad in FD_METHODS:
	def wrapped1(x, f0=None):
	ncalls[0] += 1
	return approx_derivative(
	fun, x, f0=f0, **finite_diff_options
	)

	return wrapped1, ncalls


	def _wrapper_hess(hess, grad=None, x0=None, args=(), finite_diff_options=None):
	if callable(hess):
	H = hess(np.copy(x0), *args)
	ncalls = [1]

	if sps.issparse(H):
	def wrapped(x, **kwds):
	ncalls[0] += 1
	return sps.csr_matrix(hess(np.copy(x), *args))

	H = sps.csr_matrix(H)

	elif isinstance(H, LinearOperator):
	def wrapped(x, **kwds):
	ncalls[0] += 1
	return hess(np.copy(x), *args)

	else: # dense
	def wrapped(x, **kwds):
	ncalls[0] += 1
	return np.atleast_2d(np.asarray(hess(np.copy(x), *args)))

	H = np.atleast_2d(np.asarray(H))

	return wrapped, ncalls, H
	elif hess in FD_METHODS:
	ncalls = [0]

	def wrapped1(x, f0=None):
	return approx_derivative(
	grad, x, f0=f0, **finite_diff_options
	)

	return wrapped1, ncalls, None


	class ScalarFunction:
	"""Scalar function and its derivatives.

	This class defines a scalar function F: R^n->R and methods for
	computing or approximating its first and second derivatives.

	Parameters
	----------
	fun : callable
	evaluates the scalar function. Must be of the form ``fun(x, *args)``,
	where ``x`` is the argument in the form of a 1-D array and ``args`` is
	a tuple of any additional fixed parameters needed to completely specify
	the function. Should return a scalar.
	x0 : array-like
	Provides an initial set of variables for evaluating fun. Array of real
	elements of size (n,), where 'n' is the number of independent
	variables.
	args : tuple, optional
	Any additional fixed parameters needed to completely specify the scalar
	function.
	grad : {callable, '2-point', '3-point', 'cs'}
	Method for computing the gradient vector.
	If it is a callable, it should be a function that returns the gradient
	vector:

	``grad(x, *args) -> array_like, shape (n,)``

	where ``x`` is an array with shape (n,) and ``args`` is a tuple with
	the fixed parameters.
	Alternatively, the keywords {'2-point', '3-point', 'cs'} can be used
	to select a finite difference scheme for numerical estimation of the
	gradient with a relative step size. These finite difference schemes
	obey any specified `bounds`.
	hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy}
	Method for computing the Hessian matrix. If it is callable, it should
	return the Hessian matrix:

	``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)``

	where x is a (n,) ndarray and `args` is a tuple with the fixed
	parameters. Alternatively, the keywords {'2-point', '3-point', 'cs'}
	select a finite difference scheme for numerical estimation. Or, objects
	implementing `HessianUpdateStrategy` interface can be used to
	approximate the Hessian.
	Whenever the gradient is estimated via finite-differences, the Hessian
	cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
	to be estimated using one of the quasi-Newton strategies.
	finite_diff_rel_step : None or array_like
	Relative step size to use. The absolute step size is computed as
	``h = finite_diff_rel_step * sign(x0) * max(1, abs(x0))``, possibly
	adjusted to fit into the bounds. For ``method='3-point'`` the sign
	of `h` is ignored. If None then finite_diff_rel_step is selected
	automatically,
	finite_diff_bounds : tuple of array_like
	Lower and upper bounds on independent variables. Defaults to no bounds,
	(-np.inf, np.inf). Each bound must match the size of `x0` or be a
	scalar, in the latter case the bound will be the same for all
	variables. Use it to limit the range of function evaluation.
	epsilon : None or array_like, optional
	Absolute step size to use, possibly adjusted to fit into the bounds.
	For ``method='3-point'`` the sign of `epsilon` is ignored. By default
	relative steps are used, only if ``epsilon is not None`` are absolute
	steps used.

	Notes
	-----
	This class implements a memoization logic. There are methods `fun`,
	`grad`, hess` and corresponding attributes `f`, `g` and `H`. The following
	things should be considered:

	1. Use only public methods `fun`, `grad` and `hess`.
	2. After one of the methods is called, the corresponding attribute
	will be set. However, a subsequent call with a different argument
	of any of the methods may overwrite the attribute.
	"""
	def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
	finite_diff_bounds, epsilon=None):
	if not callable(grad) and grad not in FD_METHODS:
	raise ValueError(
	f"`grad` must be either callable or one of {FD_METHODS}."
	)

	if not (callable(hess) or hess in FD_METHODS
	or isinstance(hess, HessianUpdateStrategy)):
	raise ValueError(
	f"`hess` must be either callable, HessianUpdateStrategy"
	f" or one of {FD_METHODS}."
	)

	if grad in FD_METHODS and hess in FD_METHODS:
	raise ValueError("Whenever the gradient is estimated via "
	"finite-differences, we require the Hessian "
	"to be estimated using one of the "
	"quasi-Newton strategies.")

	self.xp = xp = array_namespace(x0)
	_x = xpx.atleast_nd(xp.asarray(x0), ndim=1, xp=xp)
	_dtype = xp.float64
	if xp.isdtype(_x.dtype, "real floating"):
	_dtype = _x.dtype

	# original arguments
	self._wrapped_fun, self._nfev = _wrapper_fun(fun, args=args)
	self._orig_fun = fun
	self._orig_grad = grad
	self._orig_hess = hess
	self._args = args

	# promotes to floating
	self.x = xp.astype(_x, _dtype)
	self.x_dtype = _dtype
	self.n = self.x.size
	self.f_updated = False
	self.g_updated = False
	self.H_updated = False

	self._lowest_x = None
	self._lowest_f = np.inf

	finite_diff_options = {}
	if grad in FD_METHODS:
	finite_diff_options["method"] = grad
	finite_diff_options["rel_step"] = finite_diff_rel_step
	finite_diff_options["abs_step"] = epsilon
	finite_diff_options["bounds"] = finite_diff_bounds
	if hess in FD_METHODS:
	finite_diff_options["method"] = hess
	finite_diff_options["rel_step"] = finite_diff_rel_step
	finite_diff_options["abs_step"] = epsilon
	finite_diff_options["as_linear_operator"] = True

	# Initial function evaluation
	self._update_fun()

	# Initial gradient evaluation
	self._wrapped_grad, self._ngev = _wrapper_grad(
	grad,
	fun=self._wrapped_fun,
	args=args,
	finite_diff_options=finite_diff_options
	)
	self._update_grad()

	# Hessian evaluation
	if callable(hess):
	self._wrapped_hess, self._nhev, self.H = _wrapper_hess(
	hess, x0=x0, args=args
	)
	self.H_updated = True
	elif hess in FD_METHODS:
	self._wrapped_hess, self._nhev, self.H = _wrapper_hess(
	hess,
	grad=self._wrapped_grad,
	x0=x0,
	finite_diff_options=finite_diff_options
	)
	self._update_grad()
	self.H = self._wrapped_hess(self.x, f0=self.g)
	self.H_updated = True
	elif isinstance(hess, HessianUpdateStrategy):
	self.H = hess
	self.H.initialize(self.n, 'hess')
	self.H_updated = True
	self.x_prev = None
	self.g_prev = None
	self._nhev = [0]

	@property
	def nfev(self):
	return self._nfev[0]

	@property
	def ngev(self):
	return self._ngev[0]

	@property
	def nhev(self):
	return self._nhev[0]

	def _update_x(self, x):
	if isinstance(self._orig_hess, HessianUpdateStrategy):
	self._update_grad()
	self.x_prev = self.x
	self.g_prev = self.g
	# ensure that self.x is a copy of x. Don't store a reference
	# otherwise the memoization doesn't work properly.

	_x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
	self.x = self.xp.astype(_x, self.x_dtype)
	self.f_updated = False
	self.g_updated = False
	self.H_updated = False
	self._update_hess()
	else:
	# ensure that self.x is a copy of x. Don't store a reference
	# otherwise the memoization doesn't work properly.
	_x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
	self.x = self.xp.astype(_x, self.x_dtype)
	self.f_updated = False
	self.g_updated = False
	self.H_updated = False

	def _update_fun(self):
	if not self.f_updated:
	fx = self._wrapped_fun(self.x)
	if fx < self._lowest_f:
	self._lowest_x = self.x
	self._lowest_f = fx

	self.f = fx
	self.f_updated = True

	def _update_grad(self):
	if not self.g_updated:
	if self._orig_grad in FD_METHODS:
	self._update_fun()
	self.g = self._wrapped_grad(self.x, f0=self.f)
	self.g_updated = True

	def _update_hess(self):
	if not self.H_updated:
	if self._orig_hess in FD_METHODS:
	self._update_grad()
	self.H = self._wrapped_hess(self.x, f0=self.g)
	elif isinstance(self._orig_hess, HessianUpdateStrategy):
	self._update_grad()
	self.H.update(self.x - self.x_prev, self.g - self.g_prev)
	else: # should be callable(hess)
	self.H = self._wrapped_hess(self.x)

	self.H_updated = True

	def fun(self, x):
	if not np.array_equal(x, self.x):
	self._update_x(x)
	self._update_fun()
	return self.f

	def grad(self, x):
	if not np.array_equal(x, self.x):
	self._update_x(x)
	self._update_grad()
	return self.g

	def hess(self, x):
	if not np.array_equal(x, self.x):
	self._update_x(x)
	self._update_hess()
	return self.H

	def fun_and_grad(self, x):
	if not np.array_equal(x, self.x):
	self._update_x(x)
	self._update_fun()
	self._update_grad()
	return self.f, self.g


	class VectorFunction:
	"""Vector function and its derivatives.

	This class defines a vector function F: R^n->R^m and methods for
	computing or approximating its first and second derivatives.

	Notes
	-----
	This class implements a memoization logic. There are methods `fun`,
	`jac`, hess` and corresponding attributes `f`, `J` and `H`. The following
	things should be considered:

	1. Use only public methods `fun`, `jac` and `hess`.
	2. After one of the methods is called, the corresponding attribute
	will be set. However, a subsequent call with a different argument
	of any of the methods may overwrite the attribute.
	"""
	def __init__(self, fun, x0, jac, hess,
	finite_diff_rel_step, finite_diff_jac_sparsity,
	finite_diff_bounds, sparse_jacobian):
	if not callable(jac) and jac not in FD_METHODS:
	raise ValueError(f"`jac` must be either callable or one of {FD_METHODS}.")

	if not (callable(hess) or hess in FD_METHODS
	or isinstance(hess, HessianUpdateStrategy)):
	raise ValueError("`hess` must be either callable,"
	f"HessianUpdateStrategy or one of {FD_METHODS}.")

	if jac in FD_METHODS and hess in FD_METHODS:
	raise ValueError("Whenever the Jacobian is estimated via "
	"finite-differences, we require the Hessian to "
	"be estimated using one of the quasi-Newton "
	"strategies.")

	self.xp = xp = array_namespace(x0)
	_x = xpx.atleast_nd(xp.asarray(x0), ndim=1, xp=xp)
	_dtype = xp.float64
	if xp.isdtype(_x.dtype, "real floating"):
	_dtype = _x.dtype

	# promotes to floating
	self.x = xp.astype(_x, _dtype)
	self.x_dtype = _dtype

	self.n = self.x.size
	self.nfev = 0
	self.njev = 0
	self.nhev = 0
	self.f_updated = False
	self.J_updated = False
	self.H_updated = False

	finite_diff_options = {}
	if jac in FD_METHODS:
	finite_diff_options["method"] = jac
	finite_diff_options["rel_step"] = finite_diff_rel_step
	if finite_diff_jac_sparsity is not None:
	sparsity_groups = group_columns(finite_diff_jac_sparsity)
	finite_diff_options["sparsity"] = (finite_diff_jac_sparsity,
	sparsity_groups)
	finite_diff_options["bounds"] = finite_diff_bounds
	self.x_diff = np.copy(self.x)
	if hess in FD_METHODS:
	finite_diff_options["method"] = hess
	finite_diff_options["rel_step"] = finite_diff_rel_step
	finite_diff_options["as_linear_operator"] = True
	self.x_diff = np.copy(self.x)
	if jac in FD_METHODS and hess in FD_METHODS:
	raise ValueError("Whenever the Jacobian is estimated via "
	"finite-differences, we require the Hessian to "
	"be estimated using one of the quasi-Newton "
	"strategies.")

	# Function evaluation
	def fun_wrapped(x):
	self.nfev += 1
	return np.atleast_1d(fun(x))

	def update_fun():
	self.f = fun_wrapped(self.x)

	self._update_fun_impl = update_fun
	update_fun()

	self.v = np.zeros_like(self.f)
	self.m = self.v.size

	# Jacobian Evaluation
	if callable(jac):
	self.J = jac(self.x)
	self.J_updated = True
	self.njev += 1

	if (sparse_jacobian or
	sparse_jacobian is None and sps.issparse(self.J)):
	def jac_wrapped(x):
	self.njev += 1
	return sps.csr_matrix(jac(x))
	self.J = sps.csr_matrix(self.J)
	self.sparse_jacobian = True

	elif sps.issparse(self.J):
	def jac_wrapped(x):
	self.njev += 1
	return jac(x).toarray()
	self.J = self.J.toarray()
	self.sparse_jacobian = False

	else:
	def jac_wrapped(x):
	self.njev += 1
	return np.atleast_2d(jac(x))
	self.J = np.atleast_2d(self.J)
	self.sparse_jacobian = False

	def update_jac():
	self.J = jac_wrapped(self.x)

	elif jac in FD_METHODS:
	self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
	**finite_diff_options)
	self.J_updated = True

	if (sparse_jacobian or
	sparse_jacobian is None and sps.issparse(self.J)):
	def update_jac():
	self._update_fun()
	self.J = sps.csr_matrix(
	approx_derivative(fun_wrapped, self.x, f0=self.f,
	**finite_diff_options))
	self.J = sps.csr_matrix(self.J)
	self.sparse_jacobian = True

	elif sps.issparse(self.J):
	def update_jac():
	self._update_fun()
	self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
	**finite_diff_options).toarray()
	self.J = self.J.toarray()
	self.sparse_jacobian = False

	else:
	def update_jac():
	self._update_fun()
	self.J = np.atleast_2d(
	approx_derivative(fun_wrapped, self.x, f0=self.f,
	**finite_diff_options))
	self.J = np.atleast_2d(self.J)
	self.sparse_jacobian = False

	self._update_jac_impl = update_jac

	# Define Hessian
	if callable(hess):
	self.H = hess(self.x, self.v)
	self.H_updated = True
	self.nhev += 1

	if sps.issparse(self.H):
	def hess_wrapped(x, v):
	self.nhev += 1
	return sps.csr_matrix(hess(x, v))
	self.H = sps.csr_matrix(self.H)

	elif isinstance(self.H, LinearOperator):
	def hess_wrapped(x, v):
	self.nhev += 1
	return hess(x, v)

	else:
	def hess_wrapped(x, v):
	self.nhev += 1
	return np.atleast_2d(np.asarray(hess(x, v)))
	self.H = np.atleast_2d(np.asarray(self.H))

	def update_hess():
	self.H = hess_wrapped(self.x, self.v)
	elif hess in FD_METHODS:
	def jac_dot_v(x, v):
	return jac_wrapped(x).T.dot(v)

	def update_hess():
	self._update_jac()
	self.H = approx_derivative(jac_dot_v, self.x,
	f0=self.J.T.dot(self.v),
	args=(self.v,),
	**finite_diff_options)
	update_hess()
	self.H_updated = True
	elif isinstance(hess, HessianUpdateStrategy):
	self.H = hess
	self.H.initialize(self.n, 'hess')
	self.H_updated = True
	self.x_prev = None
	self.J_prev = None

	def update_hess():
	self._update_jac()
	# When v is updated before x was updated, then x_prev and
	# J_prev are None and we need this check.
	if self.x_prev is not None and self.J_prev is not None:
	delta_x = self.x - self.x_prev
	delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v)
	self.H.update(delta_x, delta_g)

	self._update_hess_impl = update_hess

	if isinstance(hess, HessianUpdateStrategy):
	def update_x(x):
	self._update_jac()
	self.x_prev = self.x
	self.J_prev = self.J
	_x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
	self.x = self.xp.astype(_x, self.x_dtype)
	self.f_updated = False
	self.J_updated = False
	self.H_updated = False
	self._update_hess()
	else:
	def update_x(x):
	_x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
	self.x = self.xp.astype(_x, self.x_dtype)
	self.f_updated = False
	self.J_updated = False
	self.H_updated = False

	self._update_x_impl = update_x

	def _update_v(self, v):
	if not np.array_equal(v, self.v):
	self.v = v
	self.H_updated = False

	def _update_x(self, x):
	if not np.array_equal(x, self.x):
	self._update_x_impl(x)

	def _update_fun(self):
	if not self.f_updated:
	self._update_fun_impl()
	self.f_updated = True

	def _update_jac(self):
	if not self.J_updated:
	self._update_jac_impl()
	self.J_updated = True

	def _update_hess(self):
	if not self.H_updated:
	self._update_hess_impl()
	self.H_updated = True

	def fun(self, x):
	self._update_x(x)
	self._update_fun()
	return self.f

	def jac(self, x):
	self._update_x(x)
	self._update_jac()
	return self.J

	def hess(self, x, v):
	# v should be updated before x.
	self._update_v(v)
	self._update_x(x)
	self._update_hess()
	return self.H


	class LinearVectorFunction:
	"""Linear vector function and its derivatives.

	Defines a linear function F = A x, where x is N-D vector and
	A is m-by-n matrix. The Jacobian is constant and equals to A. The Hessian
	is identically zero and it is returned as a csr matrix.
	"""
	def __init__(self, A, x0, sparse_jacobian):
	if sparse_jacobian or sparse_jacobian is None and sps.issparse(A):
	self.J = sps.csr_matrix(A)
	self.sparse_jacobian = True
	elif sps.issparse(A):
	self.J = A.toarray()
	self.sparse_jacobian = False
	else:
	# np.asarray makes sure A is ndarray and not matrix
	self.J = np.atleast_2d(np.asarray(A))
	self.sparse_jacobian = False

	self.m, self.n = self.J.shape

	self.xp = xp = array_namespace(x0)
	_x = xpx.atleast_nd(xp.asarray(x0), ndim=1, xp=xp)
	_dtype = xp.float64
	if xp.isdtype(_x.dtype, "real floating"):
	_dtype = _x.dtype

	# promotes to floating
	self.x = xp.astype(_x, _dtype)
	self.x_dtype = _dtype

	self.f = self.J.dot(self.x)
	self.f_updated = True

	self.v = np.zeros(self.m, dtype=float)
	self.H = sps.csr_matrix((self.n, self.n))

	def _update_x(self, x):
	if not np.array_equal(x, self.x):
	_x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp)
	self.x = self.xp.astype(_x, self.x_dtype)
	self.f_updated = False

	def fun(self, x):
	self._update_x(x)
	if not self.f_updated:
	self.f = self.J.dot(x)
	self.f_updated = True
	return self.f

	def jac(self, x):
	self._update_x(x)
	return self.J

	def hess(self, x, v):
	self._update_x(x)
	self.v = v
	return self.H


	class IdentityVectorFunction(LinearVectorFunction):
	"""Identity vector function and its derivatives.

	The Jacobian is the identity matrix, returned as a dense array when
	`sparse_jacobian=False` and as a csr matrix otherwise. The Hessian is
	identically zero and it is returned as a csr matrix.
	"""
	def __init__(self, x0, sparse_jacobian):
	n = len(x0)
	if sparse_jacobian or sparse_jacobian is None:
	A = sps.eye(n, format='csr')
	sparse_jacobian = True
	else:
	A = np.eye(n)
	sparse_jacobian = False
	super().__init__(A, x0, sparse_jacobian)