summaryrefslogtreecommitdiff
path: root/src/lmfit/lmmin.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lmfit/lmmin.c')
-rw-r--r--src/lmfit/lmmin.c1320
1 files changed, 1320 insertions, 0 deletions
diff --git a/src/lmfit/lmmin.c b/src/lmfit/lmmin.c
new file mode 100644
index 0000000..fdf634e
--- /dev/null
+++ b/src/lmfit/lmmin.c
@@ -0,0 +1,1320 @@
+/*
+ * lmfit
+ *
+ * Solves or minimizes the sum of squares of m nonlinear
+ * functions of n variables.
+ *
+ * From public domain Fortran version
+ * of Argonne National Laboratories MINPACK
+ * argonne national laboratory. minpack project. march 1980.
+ * burton s. garbow, kenneth e. hillstrom, jorge j. more
+ * C translation by Steve Moshier
+ * Joachim Wuttke converted the source into C++ compatible ANSI style
+ * and provided a simplified interface
+ */
+
+
+#include <stdlib.h>
+#include <math.h>
+#include "lmmin.h"
+#define _LMDIF
+
+/* *********************** high-level interface **************************** */
+
+
+void lm_initialize_control( lm_control_type *control )
+{
+ control->maxcall = 100;
+ control->epsilon = 1.e-14;
+ control->stepbound = 100.;
+ control->ftol = 1.e-14;
+ control->xtol = 1.e-14;
+ control->gtol = 1.e-14;
+}
+
+void lm_minimize( int m_dat, int n_par, double* par,
+ lm_evaluate_ftype *evaluate, lm_print_ftype *printout,
+ void *data, lm_control_type *control )
+{
+
+// *** allocate work space.
+
+ double *fvec, *diag, *fjac, *qtf, *wa1, *wa2, *wa3, *wa4;
+ int *ipvt;
+
+ int n = n_par;
+ int m = m_dat;
+
+ if (!(fvec = (double*) malloc( m*sizeof(double))) ||
+ !(diag = (double*) malloc(n* sizeof(double))) ||
+ !(qtf = (double*) malloc(n* sizeof(double))) ||
+ !(fjac = (double*) malloc(n*m*sizeof(double))) ||
+ !(wa1 = (double*) malloc(n* sizeof(double))) ||
+ !(wa2 = (double*) malloc(n* sizeof(double))) ||
+ !(wa3 = (double*) malloc(n* sizeof(double))) ||
+ !(wa4 = (double*) malloc( m*sizeof(double))) ||
+ !(ipvt = (int*) malloc(n* sizeof(int)))) {
+ control->info = 9;
+ return;
+ }
+
+// *** perform fit.
+
+ control->info = 0;
+ control->nfev = 0;
+
+ // this goes through the modified legacy interface:
+ lm_lmdif( m, n, par, fvec, control->ftol, control->xtol, control->gtol,
+ control->maxcall*(n+1), control->epsilon, diag, 1,
+ control->stepbound, &(control->info),
+ &(control->nfev), fjac, ipvt, qtf, wa1, wa2, wa3, wa4,
+ evaluate, printout, data );
+
+ (*printout)( n, par, m, fvec, data, -1, 0, control->nfev );
+ control->fnorm = lm_enorm(m, fvec);
+ if (control->info < 0 ) control->info = 10;
+
+// *** clean up.
+
+ free(fvec);
+ free(diag);
+ free(qtf);
+ free(fjac);
+ free(wa1);
+ free(wa2);
+ free(wa3 );
+ free(wa4);
+ free(ipvt);
+}
+
+
+// ***** the following messages are referenced by the variable info.
+
+char *lm_infmsg[] = {
+ "improper input parameters",
+ "the relative error in the sum of squares is at most tol",
+ "the relative error between x and the solution is at most tol",
+ "both errors are at most tol",
+ "fvec is orthogonal to the columns of the jacobian to machine precision",
+ "number of calls to fcn has reached or exceeded 200*(n+1)",
+ "ftol is too small. no further reduction in the sum of squares is possible",
+ "xtol too small. no further improvement in approximate solution x possible",
+ "gtol too small. no further improvement in approximate solution x possible",
+ "not enough memory",
+ "break requested within function evaluation"
+};
+
+char *lm_shortmsg[] = {
+ "invalid input",
+ "success (f)",
+ "success (p)",
+ "success (f,p)",
+ "degenerate",
+ "call limit",
+ "failed (f)",
+ "failed (p)",
+ "failed (o)",
+ "no memory",
+ "user break"
+};
+
+
+/* ************************** implementation ******************************* */
+
+
+#undef BUG
+#if BUG
+#include <stdio.h>
+#endif
+
+// the following values seem good for an x86:
+#define LM_MACHEP .555e-16 /* resolution of arithmetic */
+#define LM_DWARF 9.9e-324 /* smallest nonzero number */
+// the follwoing values should work on any machine:
+// #define LM_MACHEP 1.2e-16
+// #define LM_DWARF 1.0e-38
+
+// the squares of the following constants shall not under/overflow:
+// these values seem good for an x86:
+#define LM_SQRT_DWARF 1.e-160
+#define LM_SQRT_GIANT 1.e150
+// the following values should work on any machine:
+// #define LM_SQRT_DWARF 3.834e-20
+// #define LM_SQRT_GIANT 1.304e19
+
+
+void lm_qrfac( int m, int n, double* a, int pivot, int* ipvt,
+ double* rdiag, double* acnorm, double* wa);
+void lm_qrsolv(int n, double* r, int ldr, int* ipvt, double* diag,
+ double* qtb, double* x, double* sdiag, double* wa);
+void lm_lmpar( int n, double* r, int ldr, int* ipvt, double* diag, double* qtb,
+ double delta, double* par, double* x, double* sdiag,
+ double* wa1, double* wa2);
+
+#define MIN(a,b) (((a)<=(b)) ? (a) : (b))
+#define MAX(a,b) (((a)>=(b)) ? (a) : (b))
+#define SQR(x) (x)*(x)
+
+
+// ***** the low-level legacy interface for full control.
+
+void lm_lmdif( int m, int n, double* x, double* fvec, double ftol, double xtol,
+ double gtol, int maxfev, double epsfcn, double* diag, int mode,
+ double factor, int *info, int *nfev,
+ double* fjac, int* ipvt, double* qtf,
+ double* wa1, double* wa2, double* wa3, double* wa4,
+ lm_evaluate_ftype *evaluate, lm_print_ftype *printout,
+ void *data )
+{
+/*
+ * the purpose of lmdif is to minimize the sum of the squares of
+ * m nonlinear functions in n variables by a modification of
+ * the levenberg-marquardt algorithm. the user must provide a
+ * subroutine evaluate which calculates the functions. the jacobian
+ * is then calculated by a forward-difference approximation.
+ *
+ * the multi-parameter interface lm_lmdif is for users who want
+ * full control and flexibility. most users will be better off using
+ * the simpler interface lmfit provided above.
+ *
+ * the parameters are the same as in the legacy FORTRAN implementation,
+ * with the following exceptions:
+ * the old parameter ldfjac which gave leading dimension of fjac has
+ * been deleted because this C translation makes no use of two-
+ * dimensional arrays;
+ * the old parameter nprint has been deleted; printout is now controlled
+ * by the user-supplied routine *printout;
+ * the parameter field *data and the function parameters *evaluate and
+ * *printout have been added; they help avoiding global variables.
+ *
+ * parameters:
+ *
+ * m is a positive integer input variable set to the number
+ * of functions.
+ *
+ * n is a positive integer input variable set to the number
+ * of variables. n must not exceed m.
+ *
+ * x is an array of length n. on input x must contain
+ * an initial estimate of the solution vector. on output x
+ * contains the final estimate of the solution vector.
+ *
+ * fvec is an output array of length m which contains
+ * the functions evaluated at the output x.
+ *
+ * ftol is a nonnegative input variable. termination
+ * occurs when both the actual and predicted relative
+ * reductions in the sum of squares are at most ftol.
+ * therefore, ftol measures the relative error desired
+ * in the sum of squares.
+ *
+ * xtol is a nonnegative input variable. termination
+ * occurs when the relative error between two consecutive
+ * iterates is at most xtol. therefore, xtol measures the
+ * relative error desired in the approximate solution.
+ *
+ * gtol is a nonnegative input variable. termination
+ * occurs when the cosine of the angle between fvec and
+ * any column of the jacobian is at most gtol in absolute
+ * value. therefore, gtol measures the orthogonality
+ * desired between the function vector and the columns
+ * of the jacobian.
+ *
+ * maxfev is a positive integer input variable. termination
+ * occurs when the number of calls to lm_fcn is at least
+ * maxfev by the end of an iteration.
+ *
+ * epsfcn is an input variable used in determining a suitable
+ * step length for the forward-difference approximation. this
+ * approximation assumes that the relative errors in the
+ * functions are of the order of epsfcn. if epsfcn is less
+ * than the machine precision, it is assumed that the relative
+ * errors in the functions are of the order of the machine
+ * precision.
+ *
+ * diag is an array of length n. if mode = 1 (see below), diag is
+ * internally set. if mode = 2, diag must contain positive entries
+ * that serve as multiplicative scale factors for the variables.
+ *
+ * mode is an integer input variable. if mode = 1, the
+ * variables will be scaled internally. if mode = 2,
+ * the scaling is specified by the input diag. other
+ * values of mode are equivalent to mode = 1.
+ *
+ * factor is a positive input variable used in determining the
+ * initial step bound. this bound is set to the product of
+ * factor and the euclidean norm of diag*x if nonzero, or else
+ * to factor itself. in most cases factor should lie in the
+ * interval (.1,100.). 100. is a generally recommended value.
+ *
+ * info is an integer output variable that indicates the termination
+ * status of lm_lmdif as follows:
+ *
+ * info < 0 termination requested by user-supplied routine *evaluate;
+ *
+ * info = 0 improper input parameters;
+ *
+ * info = 1 both actual and predicted relative reductions
+ * in the sum of squares are at most ftol;
+ *
+ * info = 2 relative error between two consecutive iterates
+ * is at most xtol;
+ *
+ * info = 3 conditions for info = 1 and info = 2 both hold;
+ *
+ * info = 4 the cosine of the angle between fvec and any
+ * column of the jacobian is at most gtol in
+ * absolute value;
+ *
+ * info = 5 number of calls to lm_fcn has reached or
+ * exceeded maxfev;
+ *
+ * info = 6 ftol is too small. no further reduction in
+ * the sum of squares is possible;
+ *
+ * info = 7 xtol is too small. no further improvement in
+ * the approximate solution x is possible;
+ *
+ * info = 8 gtol is too small. fvec is orthogonal to the
+ * columns of the jacobian to machine precision;
+ *
+ * nfev is an output variable set to the number of calls to the
+ * user-supplied routine *evaluate.
+ *
+ * fjac is an output m by n array. the upper n by n submatrix
+ * of fjac contains an upper triangular matrix r with
+ * diagonal elements of nonincreasing magnitude such that
+ *
+ * t t t
+ * p *(jac *jac)*p = r *r,
+ *
+ * where p is a permutation matrix and jac is the final
+ * calculated jacobian. column j of p is column ipvt(j)
+ * (see below) of the identity matrix. the lower trapezoidal
+ * part of fjac contains information generated during
+ * the computation of r.
+ *
+ * ipvt is an integer output array of length n. ipvt
+ * defines a permutation matrix p such that jac*p = q*r,
+ * where jac is the final calculated jacobian, q is
+ * orthogonal (not stored), and r is upper triangular
+ * with diagonal elements of nonincreasing magnitude.
+ * column j of p is column ipvt(j) of the identity matrix.
+ *
+ * qtf is an output array of length n which contains
+ * the first n elements of the vector (q transpose)*fvec.
+ *
+ * wa1, wa2, and wa3 are work arrays of length n.
+ *
+ * wa4 is a work array of length m.
+ *
+ * the following parameters are newly introduced in this C translation:
+ *
+ * evaluate is the name of the subroutine which calculates the functions.
+ * a default implementation lm_evaluate_default is provided in lm_eval.c;
+ * alternatively, evaluate can be provided by a user calling program.
+ * it should be written as follows:
+ *
+ * void evaluate ( double* par, int m_dat, double* fvec,
+ * void *data, int *info )
+ * {
+ * // for ( i=0; i<m_dat; ++i )
+ * // calculate fvec[i] for given parameters par;
+ * // to stop the minimization,
+ * // set *info to a negative integer.
+ * }
+ *
+ * printout is the name of the subroutine which nforms about fit progress.
+ * a default implementation lm_print_default is provided in lm_eval.c;
+ * alternatively, printout can be provided by a user calling program.
+ * it should be written as follows:
+ *
+ * void printout ( int n_par, double* par, int m_dat, double* fvec,
+ * void *data, int iflag, int iter, int nfev )
+ * {
+ * // iflag : 0 (init) 1 (outer loop) 2(inner loop) -1(terminated)
+ * // iter : outer loop counter
+ * // nfev : number of calls to *evaluate
+ * }
+ *
+ * data is an input pointer to an arbitrary structure that is passed to
+ * evaluate. typically, it contains experimental data to be fitted.
+ *
+ */
+ int i, iter, j;
+ double actred, delta, dirder, eps, fnorm, fnorm1, gnorm, par, pnorm,
+ prered, ratio, step, sum, temp, temp1, temp2, temp3, xnorm;
+ static double p1 = 0.1;
+ static double p5 = 0.5;
+ static double p25 = 0.25;
+ static double p75 = 0.75;
+ static double p0001 = 1.0e-4;
+
+ *nfev = 0; // function evaluation counter
+ iter = 1; // outer loop counter
+ par = 0; // levenberg-marquardt parameter
+ delta = 0; // just to prevent a warning (initialization within if-clause)
+ xnorm = 0; // dito
+
+ temp = MAX(epsfcn,LM_MACHEP);
+ eps = sqrt(temp); // used in calculating the Jacobian by forward differences
+
+// *** check the input parameters for errors.
+
+ if ( (n <= 0) || (m < n) || (ftol < 0.)
+ || (xtol < 0.) || (gtol < 0.) || (maxfev <= 0) || (factor <= 0.) )
+ {
+ *info = 0; // invalid parameter
+ return;
+ }
+ if ( mode == 2 ) /* scaling by diag[] */
+ {
+ for ( j=0; j<n; j++ ) /* check for nonpositive elements */
+ {
+ if ( diag[j] <= 0.0 )
+ {
+ *info = 0; // invalid parameter
+ return;
+ }
+ }
+ }
+#if BUG
+ printf( "lmdif\n" );
+#endif
+
+// *** evaluate the function at the starting point and calculate its norm.
+
+ *info = 0;
+ (*evaluate)( x, m, fvec, data, info );
+ (*printout)( n, x, m, fvec, data, 0, 0, ++(*nfev) );
+ if ( *info < 0 ) return;
+ fnorm = lm_enorm(m,fvec);
+
+// *** the outer loop.
+
+ do {
+#if BUG
+ printf( "lmdif/ outer loop iter=%d nfev=%d fnorm=%.10e\n",
+ iter, *nfev, fnorm );
+#endif
+
+// O** calculate the jacobian matrix.
+
+ for ( j=0; j<n; j++ )
+ {
+ temp = x[j];
+ step = eps * fabs(temp);
+ if (step == 0.) step = eps;
+ x[j] = temp + step;
+ *info = 0;
+ (*evaluate)( x, m, wa4, data, info );
+ (*printout)( n, x, m, wa4, data, 1, iter, ++(*nfev) );
+ if ( *info < 0 ) return; // user requested break
+ x[j] = temp;
+ for ( i=0; i<m; i++ )
+ fjac[j*m+i] = (wa4[i] - fvec[i]) / step;
+ }
+#if BUG>1
+ // DEBUG: print the entire matrix
+ for ( i=0; i<m; i++ )
+ {
+ for ( j=0; j<n; j++ )
+ printf( "%.5e ", y[j*m+i] );
+ printf( "\n" );
+ }
+#endif
+
+// O** compute the qr factorization of the jacobian.
+
+ lm_qrfac( m, n, fjac, 1, ipvt, wa1, wa2, wa3);
+
+// O** on the first iteration ...
+
+ if (iter == 1)
+ {
+ if (mode != 2)
+// ... scale according to the norms of the columns of the initial jacobian.
+ {
+ for ( j=0; j<n; j++ )
+ {
+ diag[j] = wa2[j];
+ if ( wa2[j] == 0. )
+ diag[j] = 1.;
+ }
+ }
+
+// ... calculate the norm of the scaled x and
+// initialize the step bound delta.
+
+ for ( j=0; j<n; j++ )
+ wa3[j] = diag[j] * x[j];
+
+ xnorm = lm_enorm( n, wa3 );
+ delta = factor*xnorm;
+ if (delta == 0.)
+ delta = factor;
+ }
+
+// O** form (q transpose)*fvec and store the first n components in qtf.
+
+ for ( i=0; i<m; i++ )
+ wa4[i] = fvec[i];
+
+ for ( j=0; j<n; j++ )
+ {
+ temp3 = fjac[j*m+j];
+ if (temp3 != 0.)
+ {
+ sum = 0;
+ for ( i=j; i<m; i++ )
+ sum += fjac[j*m+i] * wa4[i];
+ temp = -sum / temp3;
+ for ( i=j; i<m; i++ )
+ wa4[i] += fjac[j*m+i] * temp;
+ }
+ fjac[j*m+j] = wa1[j];
+ qtf[j] = wa4[j];
+ }
+
+// O** compute the norm of the scaled gradient and test for convergence.
+
+ gnorm = 0;
+ if ( fnorm != 0 )
+ {
+ for ( j=0; j<n; j++ )
+ {
+ if ( wa2[ ipvt[j] ] == 0 ) continue;
+
+ sum = 0.;
+ for ( i=0; i<=j; i++ )
+ sum += fjac[j*m+i] * qtf[i] / fnorm;
+ gnorm = MAX( gnorm, fabs(sum/wa2[ ipvt[j] ]) );
+ }
+ }
+
+ if ( gnorm <= gtol )
+ {
+ *info = 4;
+ return;
+ }
+
+// O** rescale if necessary.
+
+ if ( mode != 2 )
+ {
+ for ( j=0; j<n; j++ )
+ diag[j] = MAX(diag[j],wa2[j]);
+ }
+
+// O** the inner loop.
+
+ do {
+#if BUG
+ printf( "lmdif/ inner loop iter=%d nfev=%d\n", iter, *nfev );
+#endif
+
+// OI* determine the levenberg-marquardt parameter.
+
+ lm_lmpar( n,fjac,m,ipvt,diag,qtf,delta,&par,wa1,wa2,wa3,wa4 );
+
+// OI* store the direction p and x + p. calculate the norm of p.
+
+ for ( j=0; j<n; j++ )
+ {
+ wa1[j] = -wa1[j];
+ wa2[j] = x[j] + wa1[j];
+ wa3[j] = diag[j]*wa1[j];
+ }
+ pnorm = lm_enorm(n,wa3);
+
+// OI* on the first iteration, adjust the initial step bound.
+
+ if ( *nfev<= 1+n ) // bug corrected by J. Wuttke in 2004
+ delta = MIN(delta,pnorm);
+
+// OI* evaluate the function at x + p and calculate its norm.
+
+ *info = 0;
+ (*evaluate)( wa2, m, wa4, data, info );
+ (*printout)( n, x, m, wa4, data, 2, iter, ++(*nfev) );
+ if ( *info < 0 ) return; // user requested break
+
+ fnorm1 = lm_enorm(m,wa4);
+#if BUG
+ printf( "lmdif/ pnorm %.10e fnorm1 %.10e fnorm %.10e"
+ " delta=%.10e par=%.10e\n",
+ pnorm, fnorm1, fnorm, delta, par );
+#endif
+
+// OI* compute the scaled actual reduction.
+
+ if ( p1*fnorm1 < fnorm )
+ actred = 1 - SQR( fnorm1/fnorm );
+ else
+ actred = -1;
+
+// OI* compute the scaled predicted reduction and
+// the scaled directional derivative.
+
+ for ( j=0; j<n; j++ )
+ {
+ wa3[j] = 0;
+ for ( i=0; i<=j; i++ )
+ wa3[i] += fjac[j*m+i]*wa1[ ipvt[j] ];
+ }
+ temp1 = lm_enorm(n,wa3) / fnorm;
+ temp2 = sqrt(par) * pnorm / fnorm;
+ prered = SQR(temp1) + 2 * SQR(temp2);
+ dirder = - ( SQR(temp1) + SQR(temp2) );
+
+// OI* compute the ratio of the actual to the predicted reduction.
+
+ ratio = prered!=0 ? actred/prered : 0;
+#if BUG
+ printf( "lmdif/ actred=%.10e prered=%.10e ratio=%.10e"
+ " sq(1)=%.10e sq(2)=%.10e dd=%.10e\n",
+ actred, prered, prered!=0 ? ratio : 0.,
+ SQR(temp1), SQR(temp2), dirder );
+#endif
+
+// OI* update the step bound.
+
+ if (ratio <= p25)
+ {
+ if (actred >= 0.)
+ temp = p5;
+ else
+ temp = p5*dirder/(dirder + p5*actred);
+ if ( p1*fnorm1 >= fnorm || temp < p1 )
+ temp = p1;
+ delta = temp * MIN(delta,pnorm/p1);
+ par /= temp;
+ }
+ else if ( par == 0. || ratio >= p75 )
+ {
+ delta = pnorm/p5;
+ par *= p5;
+ }
+
+// OI* test for successful iteration...
+
+ if (ratio >= p0001)
+ {
+
+// ... successful iteration. update x, fvec, and their norms.
+
+ for ( j=0; j<n; j++ )
+ {
+ x[j] = wa2[j];
+ wa2[j] = diag[j]*x[j];
+ }
+ for ( i=0; i<m; i++ )
+ fvec[i] = wa4[i];
+ xnorm = lm_enorm(n,wa2);
+ fnorm = fnorm1;
+ iter++;
+ }
+#if BUG
+ else {
+ printf( "ATTN: iteration considered unsuccessful\n" );
+ }
+#endif
+
+// OI* tests for convergence ( otherwise *info = 1, 2, or 3 )
+
+ *info = 0; // do not terminate (unless overwritten by nonzero value)
+ if ( fabs(actred) <= ftol && prered <= ftol && p5*ratio <= 1 )
+ *info = 1;
+ if (delta <= xtol*xnorm)
+ *info += 2;
+ if ( *info != 0)
+ return;
+
+// OI* tests for termination and stringent tolerances.
+
+ if ( *nfev >= maxfev)
+ *info = 5;
+ if ( fabs(actred) <= LM_MACHEP &&
+ prered <= LM_MACHEP && p5*ratio <= 1 )
+ *info = 6;
+ if (delta <= LM_MACHEP*xnorm)
+ *info = 7;
+ if (gnorm <= LM_MACHEP)
+ *info = 8;
+ if ( *info != 0)
+ return;
+
+// OI* end of the inner loop. repeat if iteration unsuccessful.
+
+ } while (ratio < p0001);
+
+// O** end of the outer loop.
+
+ } while (1);
+
+}
+
+
+
+void lm_lmpar(int n, double* r, int ldr, int* ipvt, double* diag, double* qtb,
+ double delta, double* par, double* x, double* sdiag,
+ double* wa1, double* wa2)
+{
+/* given an m by n matrix a, an n by n nonsingular diagonal
+ * matrix d, an m-vector b, and a positive number delta,
+ * the problem is to determine a value for the parameter
+ * par such that if x solves the system
+ *
+ * a*x = b , sqrt(par)*d*x = 0 ,
+ *
+ * in the least squares sense, and dxnorm is the euclidean
+ * norm of d*x, then either par is 0. and
+ *
+ * (dxnorm-delta) .le. 0.1*delta ,
+ *
+ * or par is positive and
+ *
+ * abs(dxnorm-delta) .le. 0.1*delta .
+ *
+ * this subroutine completes the solution of the problem
+ * if it is provided with the necessary information from the
+ * qr factorization, with column pivoting, of a. that is, if
+ * a*p = q*r, where p is a permutation matrix, q has orthogonal
+ * columns, and r is an upper triangular matrix with diagonal
+ * elements of nonincreasing magnitude, then lmpar expects
+ * the full upper triangle of r, the permutation matrix p,
+ * and the first n components of (q transpose)*b. on output
+ * lmpar also provides an upper triangular matrix s such that
+ *
+ * t t t
+ * p *(a *a + par*d*d)*p = s *s .
+ *
+ * s is employed within lmpar and may be of separate interest.
+ *
+ * only a few iterations are generally needed for convergence
+ * of the algorithm. if, however, the limit of 10 iterations
+ * is reached, then the output par will contain the best
+ * value obtained so far.
+ *
+ * parameters:
+ *
+ * n is a positive integer input variable set to the order of r.
+ *
+ * r is an n by n array. on input the full upper triangle
+ * must contain the full upper triangle of the matrix r.
+ * on output the full upper triangle is unaltered, and the
+ * strict lower triangle contains the strict upper triangle
+ * (transposed) of the upper triangular matrix s.
+ *
+ * ldr is a positive integer input variable not less than n
+ * which specifies the leading dimension of the array r.
+ *
+ * ipvt is an integer input array of length n which defines the
+ * permutation matrix p such that a*p = q*r. column j of p
+ * is column ipvt(j) of the identity matrix.
+ *
+ * diag is an input array of length n which must contain the
+ * diagonal elements of the matrix d.
+ *
+ * qtb is an input array of length n which must contain the first
+ * n elements of the vector (q transpose)*b.
+ *
+ * delta is a positive input variable which specifies an upper
+ * bound on the euclidean norm of d*x.
+ *
+ * par is a nonnegative variable. on input par contains an
+ * initial estimate of the levenberg-marquardt parameter.
+ * on output par contains the final estimate.
+ *
+ * x is an output array of length n which contains the least
+ * squares solution of the system a*x = b, sqrt(par)*d*x = 0,
+ * for the output par.
+ *
+ * sdiag is an output array of length n which contains the
+ * diagonal elements of the upper triangular matrix s.
+ *
+ * wa1 and wa2 are work arrays of length n.
+ *
+ */
+ int i, iter, j, nsing;
+ double dxnorm, fp, fp_old, gnorm, parc, parl, paru;
+ double sum, temp;
+ static double p1 = 0.1;
+ static double p001 = 0.001;
+
+#if BUG
+ printf( "lmpar\n" );
+#endif
+
+// *** compute and store in x the gauss-newton direction. if the
+// jacobian is rank-deficient, obtain a least squares solution.
+
+ nsing = n;
+ for ( j=0; j<n; j++ )
+ {
+ wa1[j] = qtb[j];
+ if ( r[j*ldr+j] == 0 && nsing == n )
+ nsing = j;
+ if (nsing < n)
+ wa1[j] = 0;
+ }
+#if BUG
+ printf( "nsing %d ", nsing );
+#endif
+ for ( j=nsing-1; j>=0; j-- )
+ {
+ wa1[j] = wa1[j]/r[j+ldr*j];
+ temp = wa1[j];
+ for ( i=0; i<j; i++ )
+ wa1[i] -= r[j*ldr+i]*temp;
+ }
+
+ for ( j=0; j<n; j++ )
+ x[ ipvt[j] ] = wa1[j];
+
+// *** initialize the iteration counter.
+// evaluate the function at the origin, and test
+// for acceptance of the gauss-newton direction.
+
+ iter = 0;
+ for ( j=0; j<n; j++ )
+ wa2[j] = diag[j]*x[j];
+ dxnorm = lm_enorm(n,wa2);
+ fp = dxnorm - delta;
+ if (fp <= p1*delta)
+ {
+#if BUG
+ printf( "lmpar/ terminate (fp<delta/10\n" );
+#endif
+ *par = 0;
+ return;
+ }
+
+// *** if the jacobian is not rank deficient, the newton
+// step provides a lower bound, parl, for the 0. of
+// the function. otherwise set this bound to 0..
+
+ parl = 0;
+ if (nsing >= n)
+ {
+ for ( j=0; j<n; j++ )
+ wa1[j] = diag[ ipvt[j] ] * wa2[ ipvt[j] ] / dxnorm;
+
+ for ( j=0; j<n; j++ )
+ {
+ sum = 0.;
+ for ( i=0; i<j; i++ )
+ sum += r[j*ldr+i]*wa1[i];
+ wa1[j] = (wa1[j] - sum)/r[j+ldr*j];
+ }
+ temp = lm_enorm(n,wa1);
+ parl = fp/delta/temp/temp;
+ }
+
+// *** calculate an upper bound, paru, for the 0. of the function.
+
+ for ( j=0; j<n; j++ )
+ {
+ sum = 0;
+ for ( i=0; i<=j; i++ )
+ sum += r[j*ldr+i]*qtb[i];
+ wa1[j] = sum/diag[ ipvt[j] ];
+ }
+ gnorm = lm_enorm(n,wa1);
+ paru = gnorm/delta;
+ if (paru == 0.)
+ paru = LM_DWARF/MIN(delta,p1);
+
+// *** if the input par lies outside of the interval (parl,paru),
+// set par to the closer endpoint.
+
+ *par = MAX( *par,parl);
+ *par = MIN( *par,paru);
+ if ( *par == 0.)
+ *par = gnorm/dxnorm;
+#if BUG
+ printf( "lmpar/ parl %.4e par %.4e paru %.4e\n", parl, *par, paru );
+#endif
+
+// *** iterate.
+
+ for ( ; ; iter++ ) {
+
+// *** evaluate the function at the current value of par.
+
+ if ( *par == 0.)
+ *par = MAX(LM_DWARF,p001*paru);
+ temp = sqrt( *par );
+ for ( j=0; j<n; j++ )
+ wa1[j] = temp*diag[j];
+ lm_qrsolv( n, r, ldr, ipvt, wa1, qtb, x, sdiag, wa2);
+ for ( j=0; j<n; j++ )
+ wa2[j] = diag[j]*x[j];
+ dxnorm = lm_enorm(n,wa2);
+ fp_old = fp;
+ fp = dxnorm - delta;
+
+// *** if the function is small enough, accept the current value
+// of par. also test for the exceptional cases where parl
+// is 0. or the number of iterations has reached 10.
+
+ if ( fabs(fp) <= p1*delta
+ || (parl == 0. && fp <= fp_old && fp_old < 0.)
+ || iter == 10 )
+ break; // the only exit from this loop
+
+// *** compute the Newton correction.
+
+ for ( j=0; j<n; j++ )
+ wa1[j] = diag[ ipvt[j] ] * wa2[ ipvt[j] ] / dxnorm;
+
+ for ( j=0; j<n; j++ )
+ {
+ wa1[j] = wa1[j]/sdiag[j];
+ for ( i=j+1; i<n; i++ )
+ wa1[i] -= r[j*ldr+i]*wa1[j];
+ }
+ temp = lm_enorm( n, wa1);
+ parc = fp/delta/temp/temp;
+
+// *** depending on the sign of the function, update parl or paru.
+
+ if (fp > 0)
+ parl = MAX(parl, *par);
+ else if (fp < 0)
+ paru = MIN(paru, *par);
+ // the case fp==0 is precluded by the break condition
+
+// *** compute an improved estimate for par.
+
+ *par = MAX(parl, *par + parc);
+
+ }
+
+}
+
+
+
+void lm_qrfac(int m, int n, double* a, int pivot, int* ipvt,
+ double* rdiag, double* acnorm, double* wa)
+{
+/*
+ * this subroutine uses householder transformations with column
+ * pivoting (optional) to compute a qr factorization of the
+ * m by n matrix a. that is, qrfac determines an orthogonal
+ * matrix q, a permutation matrix p, and an upper trapezoidal
+ * matrix r with diagonal elements of nonincreasing magnitude,
+ * such that a*p = q*r. the householder transformation for
+ * column k, k = 1,2,...,min(m,n), is of the form
+ *
+ * t
+ * i - (1/u(k))*u*u
+ *
+ * where u has 0.s in the first k-1 positions. the form of
+ * this transformation and the method of pivoting first
+ * appeared in the corresponding linpack subroutine.
+ *
+ * parameters:
+ *
+ * m is a positive integer input variable set to the number
+ * of rows of a.
+ *
+ * n is a positive integer input variable set to the number
+ * of columns of a.
+ *
+ * a is an m by n array. on input a contains the matrix for
+ * which the qr factorization is to be computed. on output
+ * the strict upper trapezoidal part of a contains the strict
+ * upper trapezoidal part of r, and the lower trapezoidal
+ * part of a contains a factored form of q (the non-trivial
+ * elements of the u vectors described above).
+ *
+ * pivot is a logical input variable. if pivot is set true,
+ * then column pivoting is enforced. if pivot is set false,
+ * then no column pivoting is done.
+ *
+ * ipvt is an integer output array of length lipvt. ipvt
+ * defines the permutation matrix p such that a*p = q*r.
+ * column j of p is column ipvt(j) of the identity matrix.
+ * if pivot is false, ipvt is not referenced.
+ *
+ * rdiag is an output array of length n which contains the
+ * diagonal elements of r.
+ *
+ * acnorm is an output array of length n which contains the
+ * norms of the corresponding columns of the input matrix a.
+ * if this information is not needed, then acnorm can coincide
+ * with rdiag.
+ *
+ * wa is a work array of length n. if pivot is false, then wa
+ * can coincide with rdiag.
+ *
+ */
+ int i, j, k, kmax, minmn;
+ double ajnorm, sum, temp;
+ static double p05 = 0.05;
+
+// *** compute the initial column norms and initialize several arrays.
+
+ for ( j=0; j<n; j++ )
+ {
+ acnorm[j] = lm_enorm(m, &a[j*m]);
+ rdiag[j] = acnorm[j];
+ wa[j] = rdiag[j];
+ if ( pivot )
+ ipvt[j] = j;
+ }
+#if BUG
+ printf( "qrfac\n" );
+#endif
+
+// *** reduce a to r with householder transformations.
+
+ minmn = MIN(m,n);
+ for ( j=0; j<minmn; j++ )
+ {
+ if ( !pivot ) goto pivot_ok;
+
+// *** bring the column of largest norm into the pivot position.
+
+ kmax = j;
+ for ( k=j+1; k<n; k++ )
+ if (rdiag[k] > rdiag[kmax])
+ kmax = k;
+ if (kmax == j) goto pivot_ok; // bug fixed in rel 2.1
+
+ for ( i=0; i<m; i++ )
+ {
+ temp = a[j*m+i];
+ a[j*m+i] = a[kmax*m+i];
+ a[kmax*m+i] = temp;
+ }
+ rdiag[kmax] = rdiag[j];
+ wa[kmax] = wa[j];
+ k = ipvt[j];
+ ipvt[j] = ipvt[kmax];
+ ipvt[kmax] = k;
+
+ pivot_ok:
+
+// *** compute the Householder transformation to reduce the
+// j-th column of a to a multiple of the j-th unit vector.
+
+ ajnorm = lm_enorm( m-j, &a[j*m+j] );
+ if (ajnorm == 0.)
+ {
+ rdiag[j] = 0;
+ continue;
+ }
+
+ if (a[j*m+j] < 0.)
+ ajnorm = -ajnorm;
+ for ( i=j; i<m; i++ )
+ a[j*m+i] /= ajnorm;
+ a[j*m+j] += 1;
+
+// *** apply the transformation to the remaining columns
+// and update the norms.
+
+ for ( k=j+1; k<n; k++ )
+ {
+ sum = 0;
+
+ for ( i=j; i<m; i++ )
+ sum += a[j*m+i]*a[k*m+i];
+
+ temp = sum/a[j+m*j];
+
+ for ( i=j; i<m; i++ )
+ a[k*m+i] -= temp * a[j*m+i];
+
+ if ( pivot && rdiag[k] != 0. )
+ {
+ temp = a[m*k+j]/rdiag[k];
+ temp = MAX( 0., 1-temp*temp );
+ rdiag[k] *= sqrt(temp);
+ temp = rdiag[k]/wa[k];
+ if ( p05*SQR(temp) <= LM_MACHEP )
+ {
+ rdiag[k] = lm_enorm( m-j-1, &a[m*k+j+1]);
+ wa[k] = rdiag[k];
+ }
+ }
+ }
+
+ rdiag[j] = -ajnorm;
+ }
+}
+
+
+
+void lm_qrsolv(int n, double* r, int ldr, int* ipvt, double* diag,
+ double* qtb, double* x, double* sdiag, double* wa)
+{
+/*
+ * given an m by n matrix a, an n by n diagonal matrix d,
+ * and an m-vector b, the problem is to determine an x which
+ * solves the system
+ *
+ * a*x = b , d*x = 0 ,
+ *
+ * in the least squares sense.
+ *
+ * this subroutine completes the solution of the problem
+ * if it is provided with the necessary information from the
+ * qr factorization, with column pivoting, of a. that is, if
+ * a*p = q*r, where p is a permutation matrix, q has orthogonal
+ * columns, and r is an upper triangular matrix with diagonal
+ * elements of nonincreasing magnitude, then qrsolv expects
+ * the full upper triangle of r, the permutation matrix p,
+ * and the first n components of (q transpose)*b. the system
+ * a*x = b, d*x = 0, is then equivalent to
+ *
+ * t t
+ * r*z = q *b , p *d*p*z = 0 ,
+ *
+ * where x = p*z. if this system does not have full rank,
+ * then a least squares solution is obtained. on output qrsolv
+ * also provides an upper triangular matrix s such that
+ *
+ * t t t
+ * p *(a *a + d*d)*p = s *s .
+ *
+ * s is computed within qrsolv and may be of separate interest.
+ *
+ * parameters
+ *
+ * n is a positive integer input variable set to the order of r.
+ *
+ * r is an n by n array. on input the full upper triangle
+ * must contain the full upper triangle of the matrix r.
+ * on output the full upper triangle is unaltered, and the
+ * strict lower triangle contains the strict upper triangle
+ * (transposed) of the upper triangular matrix s.
+ *
+ * ldr is a positive integer input variable not less than n
+ * which specifies the leading dimension of the array r.
+ *
+ * ipvt is an integer input array of length n which defines the
+ * permutation matrix p such that a*p = q*r. column j of p
+ * is column ipvt(j) of the identity matrix.
+ *
+ * diag is an input array of length n which must contain the
+ * diagonal elements of the matrix d.
+ *
+ * qtb is an input array of length n which must contain the first
+ * n elements of the vector (q transpose)*b.
+ *
+ * x is an output array of length n which contains the least
+ * squares solution of the system a*x = b, d*x = 0.
+ *
+ * sdiag is an output array of length n which contains the
+ * diagonal elements of the upper triangular matrix s.
+ *
+ * wa is a work array of length n.
+ *
+ */
+ int i, kk, j, k, nsing;
+ double qtbpj, sum, temp;
+ double sin, cos, tan, cotan; // these are local variables, not functions
+ static double p25 = 0.25;
+ static double p5 = 0.5;
+
+// *** copy r and (q transpose)*b to preserve input and initialize s.
+// in particular, save the diagonal elements of r in x.
+
+ for ( j=0; j<n; j++ )
+ {
+ for ( i=j; i<n; i++ )
+ r[j*ldr+i] = r[i*ldr+j];
+ x[j] = r[j*ldr+j];
+ wa[j] = qtb[j];
+ }
+#if BUG
+ printf( "qrsolv\n" );
+#endif
+
+// *** eliminate the diagonal matrix d using a givens rotation.
+
+ for ( j=0; j<n; j++ )
+ {
+
+// *** prepare the row of d to be eliminated, locating the
+// diagonal element using p from the qr factorization.
+
+ if (diag[ ipvt[j] ] == 0.)
+ goto L90;
+ for ( k=j; k<n; k++ )
+ sdiag[k] = 0.;
+ sdiag[j] = diag[ ipvt[j] ];
+
+// *** the transformations to eliminate the row of d
+// modify only a single element of (q transpose)*b
+// beyond the first n, which is initially 0..
+
+ qtbpj = 0.;
+ for ( k=j; k<n; k++ )
+ {
+
+// determine a givens rotation which eliminates the
+// appropriate element in the current row of d.
+
+ if (sdiag[k] == 0.)
+ continue;
+ kk = k + ldr * k; // <! keep this shorthand !>
+ if ( fabs(r[kk]) < fabs(sdiag[k]) )
+ {
+ cotan = r[kk]/sdiag[k];
+ sin = p5/sqrt(p25+p25*SQR(cotan));
+ cos = sin*cotan;
+ }
+ else
+ {
+ tan = sdiag[k]/r[kk];
+ cos = p5/sqrt(p25+p25*SQR(tan));
+ sin = cos*tan;
+ }
+
+// *** compute the modified diagonal element of r and
+// the modified element of ((q transpose)*b,0).
+
+ r[kk] = cos*r[kk] + sin*sdiag[k];
+ temp = cos*wa[k] + sin*qtbpj;
+ qtbpj = -sin*wa[k] + cos*qtbpj;
+ wa[k] = temp;
+
+// *** accumulate the tranformation in the row of s.
+
+ for ( i=k+1; i<n; i++ )
+ {
+ temp = cos*r[k*ldr+i] + sin*sdiag[i];
+ sdiag[i] = -sin*r[k*ldr+i] + cos*sdiag[i];
+ r[k*ldr+i] = temp;
+ }
+ }
+ L90:
+
+// *** store the diagonal element of s and restore
+// the corresponding diagonal element of r.
+
+ sdiag[j] = r[j*ldr+j];
+ r[j*ldr+j] = x[j];
+ }
+
+// *** solve the triangular system for z. if the system is
+// singular, then obtain a least squares solution.
+
+ nsing = n;
+ for ( j=0; j<n; j++ )
+ {
+ if ( sdiag[j] == 0. && nsing == n )
+ nsing = j;
+ if (nsing < n)
+ wa[j] = 0;
+ }
+
+ for ( j=nsing-1; j>=0; j-- )
+ {
+ sum = 0;
+ for ( i=j+1; i<nsing; i++ )
+ sum += r[j*ldr+i]*wa[i];
+ wa[j] = (wa[j] - sum)/sdiag[j];
+ }
+
+// *** permute the components of z back to components of x.
+
+ for ( j=0; j<n; j++ )
+ x[ ipvt[j] ] = wa[j];
+}
+
+
+
+double lm_enorm( int n, double *x )
+{
+/* given an n-vector x, this function calculates the
+ * euclidean norm of x.
+ *
+ * the euclidean norm is computed by accumulating the sum of
+ * squares in three different sums. the sums of squares for the
+ * small and large components are scaled so that no overflows
+ * occur. non-destructive underflows are permitted. underflows
+ * and overflows do not occur in the computation of the unscaled
+ * sum of squares for the intermediate components.
+ * the definitions of small, intermediate and large components
+ * depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. the main
+ * restrictions on these constants are that LM_SQRT_DWARF**2 not
+ * underflow and LM_SQRT_GIANT**2 not overflow.
+ *
+ * parameters
+ *
+ * n is a positive integer input variable.
+ *
+ * x is an input array of length n.
+ */
+ int i;
+ double agiant, s1, s2, s3, xabs, x1max, x3max, temp;
+
+ s1 = 0;
+ s2 = 0;
+ s3 = 0;
+ x1max = 0;
+ x3max = 0;
+ agiant = LM_SQRT_GIANT/( (double) n);
+
+ for ( i=0; i<n; i++ )
+ {
+ xabs = fabs(x[i]);
+ if ( xabs > LM_SQRT_DWARF && xabs < agiant )
+ {
+// ** sum for intermediate components.
+ s2 += xabs*xabs;
+ continue;
+ }
+
+ if ( xabs > LM_SQRT_DWARF )
+ {
+// ** sum for large components.
+ if (xabs > x1max)
+ {
+ temp = x1max/xabs;
+ s1 = 1 + s1*SQR(temp);
+ x1max = xabs;
+ }
+ else
+ {
+ temp = xabs/x1max;
+ s1 += SQR(temp);
+ }
+ continue;
+ }
+// ** sum for small components.
+ if (xabs > x3max)
+ {
+ temp = x3max/xabs;
+ s3 = 1 + s3*SQR(temp);
+ x3max = xabs;
+ }
+ else
+ {
+ if (xabs != 0.)
+ {
+ temp = xabs/x3max;
+ s3 += SQR(temp);
+ }
+ }
+ }
+
+// *** calculation of norm.
+
+ if (s1 != 0)
+ return x1max*sqrt(s1 + (s2/x1max)/x1max);
+ if (s2 != 0)
+ {
+ if (s2 >= x3max)
+ return sqrt( s2*(1+(x3max/s2)*(x3max*s3)) );
+ else
+ return sqrt( x3max*((s2/x3max)+(x3max*s3)) );
+ }
+
+ return x3max*sqrt(s3);
+}