LSSTApplications  10.0+286,10.0+36,10.0+46,10.0-2-g4f67435,10.1+152,10.1+37,11.0,11.0+1,11.0-1-g47edd16,11.0-1-g60db491,11.0-1-g7418c06,11.0-2-g04d2804,11.0-2-g68503cd,11.0-2-g818369d,11.0-2-gb8b8ce7
LSSTDataManagementBasePackage
NLSolver.h
Go to the documentation of this file.
1 #ifndef MeasAlgoShapeletNLSolver_H
2 #define MeasAlgoShapeletNLSolver_H
3 
4 // The class NLSolver is designed as a base class. Therefore, you should
5 // define a class with your particular function as a derived class of
6 // NLSolver:
7 //
8 // class MyNLFunction : public NLSolver { ... }
9 //
10 // You need to define the virtual functions calculateF and calculateJ.
11 // J = dF/dx is a MxN matrix where M is the number of elements
12 // in the F vector and N is the number of elements in the x vector.
13 // This allows you to put any kind of global information that
14 // might be needed for evaluating the function into the class.
15 //
16 // Then:
17 //
18 // MyNLSolver nls;
19 // tmv::Vector<double> x(n);
20 // tmv::Vector<double> f(m);
21 // x = [initial guess]
22 // bool success = nls.solve(x,f);
23 // [ if success, x is now a solution to the function. ]
24 // [ f is returned as F(x), regardless of success. ]
25 //
26 // Note: for some functions, the calculation of J is
27 // partially degenerate with the calculations needed for F.
28 // Therefore, we do two things to help out.
29 // First, the function call for calculateJ includes the value for
30 // f that has previously been calculated for that x.
31 // Second, we guarantee that every call to calculateJ will be for the
32 // same x argument as the previous call to calculateF. This allows
33 // the class to potentially store useful auxiliary information
34 // which might make the J calculation easier.
35 //
36 // If m > n, then the solution sought is the
37 // least squares solution which minimizes
38 // Q = 1/2 Sum |F_i|^2
39 //
40 // If you want the covariance matrix of the variables at the
41 // solution location, you can do:
42 //
43 // tmv::Matrix<double> cov(n,n);
44 // nls.getCovariance(cov);
45 //
46 // This needs to be done _after_ the solve function returns a success.
47 //
48 // Sometimes it is more useful to have the inverse covariance matrix rather
49 // than the covariance. It turns out that it is more efficient to
50 // calculate the inverse covariance matrix directly here, rather than
51 // the covariance matrix and then invert it using a normal inversion algorithm.
52 // So we provide the option of getting this instead:
53 //
54 // tmv::Matrix<double> invCov(n,n);
55 // nls.getInverseCovariance(invCov);
56 //
57 // There are 5 methods implemented here, all of which are based on
58 // "Methods for Non-Linear Least Squares Problems", 2nd edition,
59 // April, 2004, K. Madsen, H.B. Nielsen, O. Tingleff,
60 // Informatics and Mathematical Modelling, Technical University of Denmark.
61 //
62 // The Newton method is a simple method which can be very fast for cases
63 // where the solution is at f=0, but it is also more liable to fail
64 // than the below methods.
65 //
66 // The Hybrid method is usually the best choice when the number of
67 // equations (m) > the number of variables (n).
68 //
69 // The Dogleg method is usually best when n == m and you are expecting an
70 // exact solution.
71 //
72 // The LM method is often good in both cases, but sometimes has more trouble
73 // converging than the above choices. For some situations, though, it may
74 // be faster than these.
75 //
76 // The SecantDogleg method may be best when m == n and a direct calculation of
77 // J is not possible.
78 //
79 // The SecantLM method may be best when m > n and a direct calculation of
80 // J is not possible.
81 //
82 // To set which method to use for a particular solver objects type:
83 // solver.useNewton();
84 // solver.useHybrid();
85 // solver.useDogleg();
86 // solver.useLM();
87 // solver.useSecantDogleg();
88 // solver.useSecantLM();
89 //
90 //
91 // There are two ways that the algorithms can seccessfully converge:
92 //
93 // success if NormInf(f) < ftol
94 // success if NormInf(grad(1/2 NormSq(f)) < gtol
95 //
96 // There are also a number of ways they can fail.
97 // Two important ones which you should set appropriately are:
98 //
99 // fail if Norm2(delta x) < minStep * (Norm2(x) + epsilon2)
100 // fail if number of iterations > max_iter
101 //
102 // The defaults are:
103 //
104 // fTol = 1.e-8
105 // gTol = 1.e-8
106 // minStep = 1.e-8
107 // maxIter = 200
108 //
109 // These are set with the methods:
110 // solver.setFTol(fTol)
111 // solver.setGTol(gTol
112 // solver.setTol(fTol,gTol)
113 // solver.setMinStep(minStep)
114 // solver.setMaxIter(maxIter)
115 //
116 // There are other failure modes for the various algorithms which use
117 // the above parameters for their tests, so no need to set anything else.
118 // However, there are a couple more parameters which affect how
119 // various algorithms are initialized:
120 //
121 // solver.setTau(tau)
122 // tau is a parameter used in LM, Hybrid, and SecantLM.
123 // If the initial guess is thought to be very good, use tau = 1.e-6.
124 // If it's somewhat reasonable, use tau = 1.e-3. (default)
125 // If it's bad, use tau = 1.
126 //
127 // solver.setDelta0(delta0)
128 // delta0 is a parameter used in Dogleg and SecantDogleg.
129 // It gives the initial scale size allowed for steps in x.
130 // The algorithm will increase or decrease this as appropriate.
131 // The default value is 1.
132 //
133 // solver.setOuput(os)
134 // You can have some information about the solver progress sent to
135 // an ostream, os. e.g. solver.setOutput(std::cout);
136 // This will print basic information about each iteration.
137 // If you want much more information about what's going on,
138 // you can get verbose output with:
139 // solver.useVerboseOutput();
140 //
141 // solver.useDirectH()
142 // solver.noUseDirectH()
143 // This tells the solver whether you have defined the calculateH
144 // function, which may make the solver a bit faster.
145 // This is only used for the Hybrid method.
146 // If you don't define the calculateH function, then the algorithm
147 // builds up successive approximations to H with each pass of the
148 // calculation. This is often good enough and is the default behavior.
149 // However, if the H matrix happens to be particularly easy to calculate
150 // directly, then it can be worth it to do so.
151 //
152 // solver.useCholesky()
153 // solver.noUseCholesky()
154 // This tells the solver whether you want to start out using Cholesky
155 // decomposition for the matrix solvers in LM, Hybrid and SecantLM.
156 // This is the default, and is usually faster. The algorithm automatically
157 // detects if the matrix becomes non-positive-definite, in which case,
158 // the solver switches over to Bunch-Kauffman decomposition instead.
159 // A (very) slight improvement in speed could be obtained if you know that
160 // your matrix is not going to remain positive definite very long,
161 // in which case setting startwithch=false will use Bunch-Kauffman from
162 // the beginning.
163 //
164 // solver.useSVD()
165 // solver.noUseSVD()
166 // This tells the sovler whether you want to use singular value decomposition
167 // for the division. The default is not to, since it is slow, and it doesn't
168 // usually produce much improvement in the results.
169 // But in some cases, SVD can find a valid step for a singular (or nearly
170 // singular) jacobian matrix, which moves you closer to the correct solution
171 // (and often away from singularity).
172 // So it may be worth giving it a try if the solver gets stuck at some
173 // point. Call useSVD(), and then rerun solve.
174 // Also, if the hessian is singular at the solution, then the routines may
175 // produce a good result, but the covariance matrix will be garbage. So
176 // in that case, it may be worth calling useSVD() _after_ solve is done,
177 // but before calling getCovariance.
178 
179 #include <stdexcept>
180 #include <memory>
181 
182 // There are too many changes required for this class, so here
183 // and in NLSolver.cpp, I do a full separate definition for Eigen.
184 // In part, this is because I didn't want to bother going through and
185 // making the changes for all of the methods, so I dropped down to
186 // just Hybrid and Dogleg, the ones I use most often.
187 // And second, Eigen isn't as flexible about its division method, so
188 // I removed the options of using SVD or Choleskey, and only use LU.
189 
190 #ifdef USE_TMV
191 
192 #include "TMV.h"
193 #include "TMV_Sym.h"
194 
195 namespace lsst {
196 namespace meas {
197 namespace algorithms {
198 namespace shapelet {
199 
200  class NLSolver
201  {
202  public :
203 
204  NLSolver();
205  virtual ~NLSolver() {}
206 
207  // This is the basic function that needs to be overridden.
208  virtual void calculateF(
209  const tmv::Vector<double>& x, tmv::Vector<double>& f) const =0;
210 
211  // J(i,j) = df_i/dx_j
212  // If you don't overload the J function, then a finite
213  // difference calculation will be performed.
214  virtual void calculateJ(
215  const tmv::Vector<double>& x, const tmv::Vector<double>& f,
216  tmv::Matrix<double>& j) const;
217 
218  // Try to solve for F(x) = 0.
219  // Returns whether it succeeded.
220  // On exit, x is the best solution found.
221  // Also, f is returned as the value of F(x) for the best x,
222  // regardless of whether the fit succeeded or not.
223  virtual bool solve(tmv::Vector<double>& x, tmv::Vector<double>& f) const;
224 
225  // Get the covariance matrix of the solution.
226  // This only works if solve() returns true.
227  // So it should be called after a successful solution is returned.
228  virtual void getCovariance(tmv::Matrix<double>& cov) const;
229 
230  // You can also get the inverse covariance matrix if you prefer.
231  virtual void getInverseCovariance(tmv::Matrix<double>& invcov) const;
232 
233  // H(i,j) = d^2 Q / dx_i dx_j
234  // where Q = 1/2 Sum_k |f_k|^2
235  // H = JT J + Sum_k f_k d^2f_k/(dx_i dx_j)
236  //
237  // This is only used for the Hybrid method, and if it is not
238  // overloaded, then an approximation is calculated on the fly.
239  // It's not really important to overload this, but in case
240  // the calculation is very easy, a direct calculation would
241  // be faster, so we allow for that possibility.
242  virtual void calculateH(
243  const tmv::Vector<double>& x, const tmv::Vector<double>& f,
244  const tmv::Matrix<double>& j, tmv::SymMatrix<double>& h) const;
245 
246  // This tests whether the direct calculation of J matches
247  // the numerical approximation of J calculated from finite differences.
248  // It is useful as a test that your analytic formula was coded correctly.
249  //
250  // If you pass it an ostream (e.g. os = &cout), then debugging
251  // info will be sent to that stream.
252  //
253  // The second optional parameter, relerr, is for functions which
254  // are merely good approximations of the correct J, rather than
255  // exact. Normally, the routine tests whether the J function
256  // calculates the same jacobian as a numerical approximation to
257  // within the numerical precision possible for doubles.
258  // If J is only approximate, you can set relerr as the relative
259  // error in J to be tested for.
260  // If relerr == 0 then sqrt(numeric_limits<double>::epsilon())
261  // = 1.56e-7 is used. This is the default.
262  //
263  // Parameters are x, f, os, relerr.
264  virtual bool testJ(
265  const tmv::Vector<double>& , tmv::Vector<double>& ,
266  std::ostream* os=0, double relerr=0.) const;
267 
268  // H(i,j) = d^2 Q / dx_i dx_j
269  // where Q = 1/2 Sum_k |f_k|^2
270  // H = JT J + Sum_k f_k d^2f_k/(dx_i dx_j)
271  virtual void calculateNumericH(
272  const tmv::Vector<double>& x, const tmv::Vector<double>& f,
273  tmv::SymMatrix<double>& h) const;
274 
275  virtual void useNewton() { _method = NEWTON; }
276  virtual void useHybrid() { _method = HYBRID; }
277  virtual void useLM() { _method = LM; }
278  virtual void useDogleg() { _method = DOGLEG; }
279  virtual void useSecantLM() { _method = SECANT_LM; }
280  virtual void useSecantDogleg() { _method = SECANT_DOGLEG; }
281 
282  virtual void setFTol(double fTol) { _fTol = fTol; }
283  virtual void setGTol(double gTol) { _gTol = gTol; }
284  virtual void setTol(double fTol, double gTol)
285  { _fTol = fTol; _gTol = gTol; }
286 
287  virtual void setMinStep(double minStep) { _minStep = minStep; }
288  virtual void setMaxIter(int maxIter) { _maxIter = maxIter; }
289  virtual void setTau(double tau) { _tau = tau; }
290  virtual void setDelta0(double delta0) { _delta0 = delta0; }
291 
292  virtual double getFTol() { return _fTol; }
293  virtual double getGTol() { return _gTol; }
294  virtual double getMinStep() { return _minStep; }
295  virtual int getMaxIter() { return _maxIter; }
296  virtual double getTau() { return _tau; }
297  virtual double getDelta0() { return _delta0; }
298 
299  virtual void setOutput(std::ostream& os) { _nlOut = &os; }
300  virtual void useVerboseOutput() { _verbose = 1; }
301  virtual void useExtraVerboseOutput() { _verbose = 2; }
302  virtual void noUseVerboseOutput() { _verbose = 0; }
303 
304  virtual void useDirectH() { _hasDirectH = true; }
305  virtual void useSVD() { _shouldUseSvd = true; }
306  virtual void useCholesky() { _shouldUseCh = true; }
307  virtual void noUseDirectH() { _hasDirectH = false; }
308  virtual void noUseSVD() { _shouldUseSvd = false; }
309  virtual void noUseCholesky() { _shouldUseCh = false; }
310 
311  private :
312 
313  enum Method { NEWTON, HYBRID, DOGLEG, LM, SECANT_DOGLEG, SECANT_LM };
314 
315  Method _method;
316  double _fTol;
317  double _gTol;
318  double _minStep;
319  int _maxIter;
320  double _tau;
321  double _delta0;
322  std::ostream* _nlOut;
323  int _verbose;
324  bool _hasDirectH;
325  bool _shouldUseCh;
326  bool _shouldUseSvd;
327 
328  bool solveNewton(
329  tmv::Vector<double>& x, tmv::Vector<double>& f) const;
330  bool solveLM(
331  tmv::Vector<double>& x, tmv::Vector<double>& f) const;
332  bool solveDogleg(
333  tmv::Vector<double>& x, tmv::Vector<double>& f) const;
334  bool solveHybrid(
335  tmv::Vector<double>& x, tmv::Vector<double>& f) const;
336  bool solveSecantLM(
337  tmv::Vector<double>& x, tmv::Vector<double>& f) const;
338  bool solveSecantDogleg(
339  tmv::Vector<double>& x, tmv::Vector<double>& f) const;
340 
341  mutable std::auto_ptr<tmv::Matrix<double> > _pJ;
342 
343  };
344 
345 }}}}
346 
347 #else
348 
350 
351 namespace lsst {
352 namespace meas {
353 namespace algorithms {
354 namespace shapelet {
355 
356  class NLSolver
357  {
358  public :
359 
360  NLSolver();
361  virtual ~NLSolver() {}
362 
363  // This is the basic function that needs to be overridden.
364  virtual void calculateF(const DVector& x, DVector& f) const =0;
365 
366  // J(i,j) = df_i/dx_j
367  // If you don't overload the J function, then a finite
368  // difference calculation will be performed.
369  virtual void calculateJ(
370  const DVector& x, const DVector& f, DMatrix& j) const;
371 
372  // Try to solve for F(x) = 0.
373  // Returns whether it succeeded.
374  // On exit, x is the best solution found.
375  // Also, f is returned as the value of F(x) for the best x,
376  // regardless of whether the fit succeeded or not.
377  virtual bool solve(DVector& x, DVector& f) const;
378 
379  // Get the covariance matrix of the solution.
380  // This only works if solve() returns true.
381  // So it should be called after a successful solution is returned.
382  virtual void getCovariance(DMatrix& cov) const;
383 
384  // You can also get the inverse covariance matrix if you prefer.
385  virtual void getInverseCovariance(DMatrix& invcov) const;
386 
387  virtual bool testJ(const DVector& , DVector& ,
388  std::ostream* os=0, double relerr=0.) const;
389 
390  virtual void useHybrid() { _method = HYBRID; }
391  virtual void useDogleg() { _method = DOGLEG; }
392 
393  virtual void setFTol(double fTol) { _fTol = fTol; }
394  virtual void setGTol(double gTol) { _gTol = gTol; }
395  virtual void setTol(double fTol, double gTol)
396  { _fTol = fTol; _gTol = gTol; }
397 
398  virtual void setMinStep(double minStep) { _minStep = minStep; }
399  virtual void setMaxIter(int maxIter) { _maxIter = maxIter; }
400  virtual void setTau(double tau) { _tau = tau; }
401  virtual void setDelta0(double delta0) { _delta0 = delta0; }
402 
403  virtual double getFTol() { return _fTol; }
404  virtual double getGTol() { return _gTol; }
405  virtual double getMinStep() { return _minStep; }
406  virtual int getMaxIter() { return _maxIter; }
407  virtual double getTau() { return _tau; }
408  virtual double getDelta0() { return _delta0; }
409 
410  virtual void setOutput(std::ostream& os) { _nlOut = &os; }
411  virtual void useVerboseOutput() { _verbose = 1; }
412  virtual void useExtraVerboseOutput() { _verbose = 2; }
413  virtual void noUseVerboseOutput() { _verbose = 0; }
414 
415  virtual void useDirectH() {}
416  virtual void useSVD() { _shouldUseSvd = true; }
417  virtual void useCholesky() {}
418  virtual void noUseDirectH() {}
419  virtual void noUseSVD() { _shouldUseSvd = false; }
420  virtual void noUseCholesky() {}
421 
422  private :
423 
424  enum Method { HYBRID, DOGLEG };
425 
427  double _fTol;
428  double _gTol;
429  double _minStep;
430  int _maxIter;
431  double _tau;
432  double _delta0;
433  std::ostream* _nlOut;
434  int _verbose;
438 
439  bool solveDogleg(DVector& x, DVector& f) const;
440  bool solveHybrid(DVector& x, DVector& f) const;
441 
442  mutable std::auto_ptr<DMatrix> _pJ;
443 
444  };
445 
446 }}}}
447 
448 #endif
449 
450 #endif
virtual void getCovariance(DMatrix &cov) const
Definition: NLSolver.cc:1638
virtual void calculateF(const DVector &x, DVector &f) const =0
bool solveDogleg(DVector &x, DVector &f) const
Definition: NLSolver.cc:1307
virtual void setTau(double tau)
Definition: NLSolver.h:400
virtual bool solve(DVector &x, DVector &f) const
Definition: NLSolver.cc:1620
bool solveHybrid(DVector &x, DVector &f) const
Definition: NLSolver.cc:1426
virtual void setOutput(std::ostream &os)
Definition: NLSolver.h:410
virtual void calculateJ(const DVector &x, const DVector &f, DMatrix &j) const
Definition: NLSolver.cc:1201
virtual void setTol(double fTol, double gTol)
Definition: NLSolver.h:395
double x
virtual void setDelta0(double delta0)
Definition: NLSolver.h:401
virtual void setFTol(double fTol)
Definition: NLSolver.h:393
virtual void setMinStep(double minStep)
Definition: NLSolver.h:398
virtual bool testJ(const DVector &, DVector &, std::ostream *os=0, double relerr=0.) const
Definition: NLSolver.cc:1224
virtual void getInverseCovariance(DMatrix &invcov) const
Definition: NLSolver.cc:1675
virtual void setMaxIter(int maxIter)
Definition: NLSolver.h:399
virtual void setGTol(double gTol)
Definition: NLSolver.h:394