public double Minimize(double[][] inputs, double[] outputs)
{
double sumOfSquaredErrors = 0.0;
// Set upper triangular Hessian to zero
for (int i = 0; i < hessian.Length; i++)
Array.Clear(hessian[i], i, hessian.Length - i);
// Set Gradient vector to zero
Array.Clear(gradient, 0, gradient.Length);
// Divide the problem into blocks. Instead of computing
// a single Jacobian and a single error vector, we will
// be computing multiple Jacobians for smaller problems
// and then sum all blocks into the final Hessian matrix
// and gradient vector.
int blockSize = inputs.Length / Blocks;
int finalBlock = inputs.Length % Blocks;
int jacobianSize = blockSize * outputCount;
// Re-allocate the partial Jacobian matrix only if needed
if (jacobian[0] == null || jacobian[0].Length < jacobianSize)
{
for (int i = 0; i < jacobian.Length; i++)
this.jacobian[i] = new double[jacobianSize];
}
// Re-allocate error vector only if needed
if (errors == null || errors.Length < jacobianSize)
errors = new double[jacobianSize];
// For each block
for (int s = 0; s <= Blocks; s++)
{
if (s == Blocks && finalBlock == 0)
continue;
int B = (s == Blocks) ? finalBlock : blockSize;
int[] block = Vector.Range(s * blockSize, s * blockSize + B);
// Compute the partial residuals vector
sumOfSquaredErrors += computeErrors(inputs, outputs, block);
// Compute the partial Jacobian
computeJacobian(inputs, block);
if (Double.IsNaN(sumOfSquaredErrors))
{
throw new ArithmeticException("Error calculation has produced a non-finite number."
+ " Please make sure that there are no constant columns in the input data.");
}
// Compute error gradient using Jacobian
for (int i = 0; i < jacobian.Length; i++)
{
double sum = 0;
for (int j = 0; j < jacobianSize; j++)
sum += jacobian[i][j] * errors[j];
gradient[i] += sum;
}
// Compute Quasi-Hessian Matrix approximation
// using the outer product Jacobian (H ~ J'J)
//
Parallel.For(0, jacobian.Length, ParallelOptions, i =>
{
double[] ji = jacobian[i];
double[] hi = hessian[i];
for (int j = i; j < hi.Length; j++)
{
double[] jj = jacobian[j];
double sum = 0;
for (int k = 0; k < jj.Length; k++)
sum += ji[k] * jj[k];
// The Hessian need only be upper-triangular, since
// it is symmetric. The Cholesky decomposition will
// make use of this fact and use the lower-triangular
// portion to hold the decomposition, conserving memory.
hi[j] += 2 * sum;
}
});
}
// Store the Hessian's diagonal for future computations. The
// diagonal will be destroyed in the decomposition, so it can
// still be updated on every iteration by restoring this copy.
//
for (int i = 0; i < hessian.Length; i++)
diagonal[i] = hessian[i][i];
// Create the initial weights vector
for (int i = 0; i < solution.Length; i++)
weights[i] = solution[i];
// Define the objective function:
double objective = sumOfSquaredErrors;
double current = objective + 1.0;
// Begin of the main Levenberg-Marquardt method
lambda /= v;
// We'll try to find a direction with less error
// (or where the objective function is smaller)
while (current >= objective && lambda < lambdaMax)
{
if (Token.IsCancellationRequested)
break;
lambda *= v;
// Update diagonal (Levenberg-Marquardt)
for (int i = 0; i < diagonal.Length; i++)
hessian[i][i] = diagonal[i] + 2 * lambda;
// Decompose to solve the linear system. The Cholesky decomposition
// is done in place, occupying the Hessian's lower-triangular part.
decomposition = new JaggedCholeskyDecomposition(hessian, robust: true, inPlace: true);
// Check if the decomposition exists
if (decomposition.IsUndefined)
{
// The Hessian is singular. Continue to the next
// iteration until the diagonal update transforms
// it back to non-singular.
continue;
}
// Solve using Cholesky decomposition
deltas = decomposition.Solve(gradient);
// Update weights using the calculated deltas
for (int i = 0; i < solution.Length; i++)
solution[i] = weights[i] + deltas[i];
// Calculate the new error
sumOfSquaredErrors = ComputeError(inputs, outputs);
// Update the objective function
current = sumOfSquaredErrors;
// If the object function is bigger than before, the method
// is tried again using a greater damping factor.
}
// If this iteration caused a error drop, then next iteration
// will use a smaller damping factor.
lambda /= v;
return Value = sumOfSquaredErrors;
}