Skip to content

Commit ce4ba1c

Browse files
committed
Fix bug in Trust Region subproblem implementation
Changed the ITrustRegionSubproblem interface to directly accept scaled gradient and Hessian matrices instead of accessing them from IObjectiveModel.
1 parent 36159d9 commit ce4ba1c

File tree

5 files changed

+66
-34
lines changed

5 files changed

+66
-34
lines changed

src/Numerics/Optimization/TrustRegion/ITrustRegionSubProblem.cs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,11 @@ public interface ITrustRegionSubproblem
2020
bool HitBoundary { get; }
2121

2222
/// <summary>
23-
/// Solves the trust region subproblem for the specified objective model and trust region radius.
23+
/// Solves the trust region subproblem using the provided gradient and Hessian.
2424
/// </summary>
25-
/// <param name="objective">The objective model containing function, gradient, and Hessian information.</param>
26-
/// <param name="radius">The radius of the trust region.</param>
27-
void Solve(IObjectiveModel objective, double radius);
25+
/// <param name="gradient">The scaled gradient vector</param>
26+
/// <param name="hessian">The scaled Hessian matrix</param>
27+
/// <param name="delta">Trust region radius</param>
28+
void Solve(Vector<double> gradient, Matrix<double> hessian, double delta);
2829
}
2930
}

src/Numerics/Optimization/TrustRegion/Subproblems/DogLegSubproblem.cs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,14 @@ internal class DogLegSubproblem : ITrustRegionSubproblem
88

99
public bool HitBoundary { get; private set; }
1010

11-
public void Solve(IObjectiveModel objective, double delta)
11+
public void Solve(Vector<double> gradient, Matrix<double> hessian, double delta)
1212
{
13-
var Gradient = objective.Gradient;
14-
var Hessian = objective.Hessian;
15-
1613
// newton point, the Gauss–Newton step by solving the normal equations
17-
var Pgn = -Hessian.PseudoInverse() * Gradient; // Hessian.Solve(Gradient) fails so many times...
14+
var Pgn = -hessian.PseudoInverse() * gradient; // hessian.Solve(gradient) fails so many times...
1815

1916
// cauchy point, steepest descent direction is given by
20-
var alpha = Gradient.DotProduct(Gradient) / (Hessian * Gradient).DotProduct(Gradient);
21-
var Psd = -alpha * Gradient;
17+
var alpha = gradient.DotProduct(gradient) / (hessian * gradient).DotProduct(gradient);
18+
var Psd = -alpha * gradient;
2219

2320
// update step and prectted reduction
2421
if (Pgn.L2Norm() <= delta)

src/Numerics/Optimization/TrustRegion/Subproblems/NewtonCGSubproblem.cs

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,36 @@ internal class NewtonCGSubproblem : ITrustRegionSubproblem
99

1010
public bool HitBoundary { get; private set; }
1111

12-
public void Solve(IObjectiveModel objective, double delta)
12+
public void Solve(Vector<double> gradient, Matrix<double> hessian, double delta)
1313
{
14-
var Gradient = objective.Gradient;
15-
var Hessian = objective.Hessian;
16-
1714
// define tolerance
18-
var gnorm = Gradient.L2Norm();
15+
var gnorm = gradient.L2Norm();
1916
var tolerance = Math.Min(0.5, Math.Sqrt(gnorm)) * gnorm;
2017

2118
// initialize internal variables
22-
var z = Vector<double>.Build.Dense(Hessian.RowCount);
23-
var r = Gradient;
19+
var z = Vector<double>.Build.Dense(hessian.RowCount);
20+
var r = gradient;
2421
var d = -r;
2522

2623
while (true)
2724
{
28-
var Bd = Hessian * d;
25+
var Bd = hessian * d;
2926
var dBd = d.DotProduct(Bd);
3027

3128
if (dBd <= 0)
3229
{
30+
// Direction of negative curvature found
31+
// Calculate two boundary points and choose the one with lower model value
3332
var t = Util.FindBeta(1, z, d, delta);
34-
Pstep = z + t.Item1 * d;
33+
var pa = z + t.Item1 * d;
34+
var pb = z + t.Item2 * d;
35+
36+
// Evaluate quadratic model at both points
37+
var valueA = Util.CalculateQuadraticModel(gradient, hessian, pa);
38+
var valueB = Util.CalculateQuadraticModel(gradient, hessian, pb);
39+
40+
// Choose the point with the lower model value
41+
Pstep = valueA < valueB ? pa : pb;
3542
HitBoundary = true;
3643
return;
3744
}

src/Numerics/Optimization/TrustRegion/Subproblems/Util.cs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@ namespace MathNet.Numerics.Optimization.TrustRegion.Subproblems
55
{
66
internal static class Util
77
{
8+
/// <summary>
9+
/// Finds the two intersection points of a line with the trust region boundary.
10+
/// </summary>
11+
/// <param name="alpha">Scaling factor for steepest descent direction</param>
12+
/// <param name="sd">Steepest descent direction vector</param>
13+
/// <param name="gn">Gauss-Newton direction vector</param>
14+
/// <param name="delta">Trust region radius</param>
15+
/// <returns>A tuple containing two beta values, sorted from low to high</returns>
816
public static (double, double) FindBeta(double alpha, Vector<double> sd, Vector<double> gn, double delta)
917
{
1018
// Pstep is intersection of the trust region boundary
@@ -29,5 +37,24 @@ public static (double, double) FindBeta(double alpha, Vector<double> sd, Vector<
2937
// return sorted beta
3038
return beta1 < beta2 ? (beta1, beta2) : (beta2, beta1);
3139
}
40+
41+
/// <summary>
42+
/// Calculates the value of the quadratic model at a given point.
43+
/// The quadratic model is defined as:
44+
/// m(p) = g^T * p + 0.5 * p^T * H * p
45+
/// where g is the gradient and H is the Hessian at the current point.
46+
/// </summary>
47+
/// <param name="gradient">The gradient vector</param>
48+
/// <param name="hessian">The Hessian matrix</param>
49+
/// <param name="p">The point at which to evaluate the quadratic model</param>
50+
/// <returns>The value of the quadratic model at point p</returns>
51+
public static double CalculateQuadraticModel(Vector<double> gradient, Matrix<double> hessian, Vector<double> p)
52+
{
53+
// Quadratic model: m(p) = g^T * p + 0.5 * p^T * H * p
54+
var linearTerm = gradient.DotProduct(p);
55+
var quadraticTerm = 0.5 * p.DotProduct(hessian * p);
56+
57+
return linearTerm + quadraticTerm;
58+
}
3259
}
3360
}

src/Numerics/Optimization/TrustRegion/TrustRegionMinimizerBase.cs

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ public NonlinearMinimizationResult Minimum(ITrustRegionSubproblem subproblem, IO
8888
// Residuals, R = L(y - f(x; p))
8989
// Residual sum of squares, RSS = ||R||^2 = R.DotProduct(R)
9090
// Jacobian J = df(x; p)/dp
91-
// Gradient g = -J'W(y − f(x; p)) = -J'LR
92-
// Approximated Hessian H = J'WJ
91+
// gradient g = -J'W(y − f(x; p)) = -J'LR
92+
// Approximated hessian H = J'WJ
9393
//
9494
// The trust region algorithm is summarized as follows:
9595
// initially set trust-region radius, Δ
@@ -120,7 +120,7 @@ public NonlinearMinimizationResult Minimum(ITrustRegionSubproblem subproblem, IO
120120

121121
objective.SetParameters(initialGuess, isFixed);
122122

123-
ExitCondition exitCondition = ExitCondition.None;
123+
var exitCondition = ExitCondition.None;
124124

125125
// First, calculate function values and setup variables
126126
var P = ProjectToInternalParameters(initialGuess); // current internal parameters
@@ -151,11 +151,11 @@ public NonlinearMinimizationResult Minimum(ITrustRegionSubproblem subproblem, IO
151151
exitCondition = ExitCondition.Converged; // SmallRSS
152152
}
153153

154-
// evaluate projected gradient and Hessian
155-
var (Gradient, Hessian) = EvaluateJacobian(objective, P);
154+
// evaluate projected gradient and hessian
155+
var (gradient, hessian) = EvaluateJacobian(objective, P);
156156

157157
// if ||g||_oo <= gtol, found and stop
158-
if (Gradient.InfinityNorm() <= gradientTolerance)
158+
if (gradient.InfinityNorm() <= gradientTolerance)
159159
{
160160
exitCondition = ExitCondition.RelativeGradient; // SmallGradient
161161
}
@@ -166,22 +166,22 @@ public NonlinearMinimizationResult Minimum(ITrustRegionSubproblem subproblem, IO
166166
}
167167

168168
// initialize trust-region radius, Δ
169-
double delta = Gradient.DotProduct(Gradient) / (Hessian * Gradient).DotProduct(Gradient);
169+
var delta = gradient.DotProduct(gradient) / (hessian * gradient).DotProduct(gradient);
170170
delta = Math.Max(1.0, Math.Min(delta, maxDelta));
171171

172-
int iterations = 0;
172+
var iterations = 0;
173173
bool hitBoundary;
174174
while (iterations < maximumIterations && exitCondition == ExitCondition.None)
175175
{
176176
iterations++;
177177

178178
// solve the subproblem
179-
subproblem.Solve(objective, delta);
179+
subproblem.Solve(gradient, hessian, delta);
180180
Pstep = subproblem.Pstep;
181181
hitBoundary = subproblem.HitBoundary;
182182

183183
// predicted reduction = L(0) - L(Δp) = -Δp'g - 1/2 * Δp'HΔp
184-
var predictedReduction = -Gradient.DotProduct(Pstep) - 0.5 * Pstep.DotProduct(Hessian * Pstep);
184+
var predictedReduction = -gradient.DotProduct(Pstep) - 0.5 * Pstep.DotProduct(hessian * Pstep);
185185

186186
if (Pstep.L2Norm() <= stepTolerance * (stepTolerance + P.L2Norm()))
187187
{
@@ -201,7 +201,7 @@ public NonlinearMinimizationResult Minimum(ITrustRegionSubproblem subproblem, IO
201201
}
202202

203203
// calculate the ratio of the actual to the predicted reduction.
204-
double rho = (predictedReduction != 0)
204+
var rho = (predictedReduction != 0)
205205
? (RSS - RSSnew) / predictedReduction
206206
: 0.0;
207207

@@ -225,11 +225,11 @@ public NonlinearMinimizationResult Minimum(ITrustRegionSubproblem subproblem, IO
225225
Pnew.CopyTo(P);
226226
RSS = RSSnew;
227227

228-
// evaluate projected gradient and Hessian
229-
(Gradient, Hessian) = EvaluateJacobian(objective, P);
228+
// evaluate projected gradient and hessian
229+
(gradient, hessian) = EvaluateJacobian(objective, P);
230230

231231
// if ||g||_oo <= gtol, found and stop
232-
if (Gradient.InfinityNorm() <= gradientTolerance)
232+
if (gradient.InfinityNorm() <= gradientTolerance)
233233
{
234234
exitCondition = ExitCondition.RelativeGradient;
235235
}

0 commit comments

Comments
 (0)