import numpy as np
from scipy.linalg import lu_factor, lu_solve
from collections import namedtuple
[docs]
def solve_constraint_lagrangian(x, jac_x, c_x, c_prime):
"""
Function which solves the problem
minimize || J.dot(x_mod - x) ||
subject to C(x_mod) = 0
via the method of Lagrange multipliers.
:param x: Parameter values at x.
:type x: 1D numpy array
:param jac_x: The (estimated, approximate or exact) value of the Jacobian J(x).
:type jac_x: 2D numpy array.
:param c_x: Values of the constraints at x.
:type c_x: 1D numpy array
:param c_prime: The Jacobian of the constraints (A, where A.x + b = 0).
:type c_prime: 2D array of floats
:returns: An array containing the parameter values which minimize the L2-norm
of any function which has the Jacobian jac_x, and another array containing
the multipliers for each of the equality constraints.
:rtype: tuple
"""
n_x = len(x)
n = n_x + len(c_x)
A = np.zeros((n, n))
b = np.zeros(n)
JTJ = jac_x.T.dot(jac_x)
A[:n_x, :n_x] = JTJ / np.linalg.norm(JTJ) * n * n # includes scaling
A[:n_x, n_x:] = c_prime.T
A[n_x:, :n_x] = c_prime
b[n_x:] = c_x
luA = lu_factor(A)
dx_m = lu_solve(luA, -b) # lu_solve computes the solution of ax = b
x_mod = x + dx_m[:n_x]
lagrange_multipliers = dx_m[n_x:]
return (x_mod, lagrange_multipliers)
[docs]
def damped_newton_solve(
F,
J,
guess,
tol=1.0e-6,
max_iterations=100,
lambda_bounds=lambda dx, x: (1.0e-8, 1.0),
linear_constraints=(0.0, np.array([-1.0])),
store_iterates=False,
):
"""
Solver for the multivariate nonlinear system F(x)=0
with Jacobian J(x), using the damped affine invariant modification
to Newton's method (Deuflhard, 1974;1975;2004).
Here we follow the algorithm as described in Nowak and Weimann (1991):
[Technical Report TR-91-10, Algorithm B], modified to accept
linear inequality constraints.
Linear inequality constraints are provided by the arrays constraints_A and
constraints_b. The constraints are satisfied if A*x + b <= 0.
If any constraints are not satisfied by the current
value of lambda, lambda is reduced to satisfy all the constraints.
If a current iterate starting point (x_i) lies on one or more constraints
and the Newton step violates one or more of those constraints, then
the next step is calculated via the method of Lagrangian multipliers,
minimizing the L2-norm of F(x_i+1) subject to the violated constraints.
Successful termination of the solver is based on three criteria:
- all(np.abs(dx (simplified newton step) < tol))
- all(np.abs(dx (full Newton step) < sqrt(10*tol))) (avoids pathology) and
- lambda = lambda_bounds(dx, x)[1] (lambda = 1 for a full Newton step).
If these criteria are not satisfied, iterations continue until one of the following
occurs:
- the value of lmda is reduced to its minimum value (for v. nonlinear problems)
- successive iterations have descent vectors which violate the constraints
- the maximum number of iterations (given by max_iterations) is reached.
Information on the root (or lack of root) obtained by the solver is provided
in the returned namedtuple.
:param F: Function returning the system function F(x) as a 1D numpy array.
:type F: function of x
:param J: Function returning the Jacobian function J(x) as a 2D numpy array.
:type J: function of x
:param guess: Starting guess for the solver.
:type guess: 1D numpy.array
:param tol: Tolerance(s) for termination.
:type tol: float or array of floats
:param max_iterations: Maximum number of iterations for the solver.
:type max_iterations: int
:param lambda_bounds: A function of dx and x that returns
a tuple of floats corresponding to the minimum and maximum
allowed fractions of the full newton step (dx).
:type lambda bounds: function of dx and x
:param linear_constraints: tuple of a 2D numpy array (A) and 1D numpy array (b)
Constraints are satisfied if A.x + b < eps
:type linear_constraints: tuple of a 2D numpy.array (A) and 1D numpy.array (b)
:returns: A namedtuple with the following attributes:
- x: The solution vector [1D numpy array].
- F: The evaluated function F(x) [1D numpy array].
- F_norm: Euclidean norm of F(x) [float].
- J: The evaluated Jacobian J(x) [2D numpy array].
- n_it: Number of iterations [int].
- code: Numerical description of the solver termination [int].
- 0: Successful convergence
- 1: Failure due to solver hitting lower lambda bound
- 2: Failure due to descent vector crossing constraints
- 3: Failure due to solver reaching maximum number of iterations
- text: Description of the solver termination [str].
- success: Solver convergence state [bool].
- iterates: [namedtuple]
Only present if store_iterates=True
Includes the following attributes:
- x: list of 1D numpy arrays of floats
The parameters for each iteration
- F: list of 2D numpy arrays of floats
The function for each iteration
- lmda: list of floats
The value of the damping parameter for each iteration
:rtype: namedtuple
"""
# Make sure damping factor is within bounds, and that the bounds are reasonable
# Problem classes in Nowak and Weimann (1991); [lmda_min, lmda_max]:
# linear: [0.1, 1.]
# mildly nonlinear: [1.e-4, 1.]
# highly nonlinear: [1.e-2, 1.e-4]
# extremely nonlinear: [1.e-4, 1.e-8]
eps = 2.0 * np.finfo(float).eps
def update_lmda(x, dx, h, lmda_bounds):
assert (
lmda_bounds[1] < 1.0 + eps
), "The highest upper bound for lambda is 1. (a full Newton step)"
assert (
lmda_bounds[0] > 1.0e-8 - eps
), "The lowest lower bound for lambda is 1.e-8 (suitable only for extremely nonlinear systems)"
lmda_j = min(1.0 / (h + eps), lmda_bounds[1]) # this is lmda_j^0
return max(lmda_j, lmda_bounds[0])
def constraints(x):
return np.dot(linear_constraints[0], x) + linear_constraints[1]
assert np.all(
constraints(guess) < eps
), "The starting guess is outside the supplied constraints."
if not isinstance(tol, float):
assert len(tol) < len(
guess
), "tol must either be a float or an array like guess."
sol = namedtuple(
"Solution", ["x", "n_it", "F", "F_norm", "J", "code", "text", "success"]
)
# evaluate system
sol.x = guess
sol.F = F(sol.x)
if store_iterates:
sol.iterates = namedtuple("iterates", ["x", "F", "lmda"])
sol.iterates.x = [sol.x]
sol.iterates.F = [sol.F]
sol.iterates.lmda = [0.0]
# Begin Newton loop
# Some dummy variables for the first h calculation (h = 0)
lmda = 0.0
dxprev = [1.0]
dxbar = [1.0]
sol.n_it = 0
n_constraints = len(constraints(sol.x))
minimum_lmda = False
converged = False
persistent_bound_violation = False
while (
sol.n_it < max_iterations
and not minimum_lmda
and not persistent_bound_violation
and not converged
):
sol.J = J(sol.x) # evaluate Jacobian
luJ = lu_factor(sol.J) # storing the factorisation saves time later
dx = lu_solve(luJ, -sol.F) # compute ordinary Newton step
dx_norm = np.linalg.norm(dx, ord=2)
lmda_bounds = lambda_bounds(dx, sol.x)
h = (
lmda
* np.linalg.norm((dxbar - dx), ord=2)
* dx_norm
/ (np.linalg.norm(dxprev, ord=2) * np.linalg.norm(dxbar, ord=2))
)
lmda = update_lmda(sol.x, dx, h, lmda_bounds)
# Create the (k+1)^0 values
x_j = sol.x + lmda * dx
# Check that all constraints are satisfied. If not, adjust lambda.
# This must be done just before every call to F() *if* lambda
# has been increased:
c_x_j = constraints(x_j)
if not np.all(
c_x_j < eps
): # x allowed to lie on constraints but not in forbidden area
c_x = constraints(sol.x)
violated_constraints = sorted(
[
(i, c_x[i] / (c_x[i] - c_x_j[i]))
for i in range(n_constraints)
if c_x_j[i] >= eps
],
key=lambda x: x[1],
)
lmda = lmda * violated_constraints[0][1]
x_j = sol.x + lmda * dx
# If the same current iterate is on a constraint,
# and a very small lambda causes the next iterate to leave the
# feasible region, then a new step direction must be found,
# along with a new guess for lmda
# We do this here using Lagrange multipliers
if lmda < eps:
active_constraint_indices = [
i for i, vc in violated_constraints if vc < eps
]
inactive_constraint_indices = [
i for i, vc in violated_constraints if vc >= eps
]
c_newton = constraints(sol.x + dx)[active_constraint_indices]
c_A = linear_constraints[0][active_constraint_indices]
x_n = sol.x + dx # newton iterate
if np.linalg.matrix_rank(c_A) == len(
dx
): # if true, we must leave a constraint here
n_act = len(active_constraint_indices)
for i_rm in range(n_act):
potential_active_indices = [
active_constraint_indices[i] for i in range(n_act) if i != i_rm
]
c_newton = constraints(sol.x + dx)[potential_active_indices]
c_A = linear_constraints[0][potential_active_indices]
x_m = solve_constraint_lagrangian(x_n, sol.J, c_newton, c_A)[0]
if constraints(x_m)[active_constraint_indices[i_rm]] < 0.0:
break
else:
x_m = solve_constraint_lagrangian(x_n, sol.J, c_newton, c_A)[0]
dx = x_m - sol.x
lmda_bounds = lambda_bounds(dx, sol.x)
lmda = lmda_bounds[1] # no a-priori maximum limit
x_j = sol.x + lmda * dx
# Check that the solution is still able to converge, i.e.
# that the constraints aren't stopping our approach to a potential root
x_j_min = (
sol.x + lmda_bounds[0] * dx
) # because lmda must be getting smaller, no need to check constraints
F_j_min = F(x_j_min)
dxbar_j_min = lu_solve(luJ, -F_j_min)
dxbar_j_min_norm = np.linalg.norm(dxbar_j_min, ord=2)
# Newton step size must be decreasing and dx must be non-zero
if dxbar_j_min_norm > dx_norm or np.linalg.norm(dx, ord=2) < eps:
persistent_bound_violation = True
# Now we need to check for newly violated constraints
n_inactive = len(inactive_constraint_indices)
c_x_j = constraints(x_j)[inactive_constraint_indices]
if not np.all(
c_x_j < eps
): # x allowed to lie on constraints but not in forbidden area
c_x = constraints(sol.x)[inactive_constraint_indices]
violated_constraints = sorted(
[
(i, c_x[i] / (c_x[i] - c_x_j[i]))
for i in range(n_inactive)
if c_x_j[i] >= eps
],
key=lambda x: x[1],
)
lmda = lmda * violated_constraints[0][1]
x_j = sol.x + lmda * dx
F_j = F(x_j)
dxbar_j = lu_solve(luJ, -F_j) # this is the simplified newton step
dxbar_j_norm = np.linalg.norm(dxbar_j, ord=2)
if (
all(np.abs(dxbar_j) < tol) # <- Success requirements
and all(np.abs(dx) < np.sqrt(10.0 * tol)) # <- avoids pathological cases
and np.abs(lmda - lmda_bounds[1]) < eps
): # <- end on a maximal newton step
require_posteriori_loop = False # <- No need for the a posteriori loop
converged = True # <- Successful convergence
else:
require_posteriori_loop = True
# Begin the a posteriori loop
while (
require_posteriori_loop
and not minimum_lmda
and not persistent_bound_violation
):
# Monotonicity check
# always based on the Newton step, even if on a constraint
if dxbar_j_norm <= dx_norm:
if (
dxbar_j_norm < eps
): # <- occasionally the simplified newton step finds the exact solution
converged = True
dxbar = dxbar_j
sol.x = x_j
sol.F = F_j
require_posteriori_loop = False # return to Newton step
sol.n_it += 1 # move to next iteration
dxprev = dx # to calculate the next value of h
else:
if np.abs(lmda - lmda_bounds[0]) < eps:
minimum_lmda = True
h_j = (
(2.0 / lmda)
* np.linalg.norm((dxbar_j - (1.0 - lmda) * dx), ord=2)
/ dx_norm
)
lmda_j = min(lmda_bounds[1], 1.0 / h_j)
lmda = min(lmda_j, lmda / 2.0)
lmda = max(
lmda, lmda_bounds[0]
) # allows a check of monotonicity once at minimum lmda
x_j = (
sol.x + lmda * dx
) # because lmda must be getting smaller, no need to check constraints
F_j = F(x_j)
dxbar_j = lu_solve(luJ, -F_j)
dxbar_j_norm = np.linalg.norm(dxbar_j, ord=2)
if store_iterates:
sol.iterates.x.append(sol.x)
sol.iterates.F.append(sol.F)
sol.iterates.lmda.append(lmda)
if converged and not persistent_bound_violation:
sol.x = x_j + dxbar_j
# Even if the solver succeeds, there may be a small chance
# that the last simplified Newton step
# shifts the solution just outside the constraints.
# If so, shift the solution back to the allowed region
c_x = constraints(sol.x)
if not np.all(
c_x <= 0.0
): # x allowed to lie on constraints but not in forbidden area
sol.x -= dxbar_j
sol.F = F(sol.x)
sol.F_norm = np.linalg.norm(sol.F, ord=2)
sol.J = J(sol.x)
if store_iterates:
sol.iterates.x = np.array(sol.iterates.x)
sol.iterates.F = np.array(sol.iterates.F)
sol.success = False
if converged:
sol.success = True
sol.code = 0
sol.text = "The solver successfully found a root after {0} iterations".format(
sol.n_it
)
elif minimum_lmda:
sol.code = 1
sol.text = "The function is too non-linear for lower lambda bound ({0})".format(
lmda_bounds[0]
)
elif persistent_bound_violation:
sol.code = 2
sol.text = "The descent vector crosses the constraints with the following indices: {0}".format(
[i for i, lmda in violated_constraints]
)
elif sol.n_it == max_iterations:
sol.code = 3
sol.text = "The solver reached max_iterations ({0})".format(max_iterations)
else:
raise Exception("Unknown termination of solver")
return sol