LSSTApplications  18.0.0+106,18.0.0+50,19.0.0,19.0.0+1,19.0.0+10,19.0.0+11,19.0.0+13,19.0.0+17,19.0.0+2,19.0.0-1-g20d9b18+6,19.0.0-1-g425ff20,19.0.0-1-g5549ca4,19.0.0-1-g580fafe+6,19.0.0-1-g6fe20d0+1,19.0.0-1-g7011481+9,19.0.0-1-g8c57eb9+6,19.0.0-1-gb5175dc+11,19.0.0-1-gdc0e4a7+9,19.0.0-1-ge272bc4+6,19.0.0-1-ge3aa853,19.0.0-10-g448f008b,19.0.0-12-g6990b2c,19.0.0-2-g0d9f9cd+11,19.0.0-2-g3d9e4fb2+11,19.0.0-2-g5037de4,19.0.0-2-gb96a1c4+3,19.0.0-2-gd955cfd+15,19.0.0-3-g2d13df8,19.0.0-3-g6f3c7dc,19.0.0-4-g725f80e+11,19.0.0-4-ga671dab3b+1,19.0.0-4-gad373c5+3,19.0.0-5-ga2acb9c+2,19.0.0-5-gfe96e6c+2,w.2020.01
LSSTDataManagementBasePackage
FitterBase.cc
Go to the documentation of this file.
1 // -*- LSST-C++ -*-
2 /*
3  * This file is part of jointcal.
4  *
5  * Developed for the LSST Data Management System.
6  * This product includes software developed by the LSST Project
7  * (https://www.lsst.org).
8  * See the COPYRIGHT file at the top-level directory of this distribution
9  * for details of code ownership.
10  *
11  * This program is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation, either version 3 of the License, or
14  * (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program. If not, see <https://www.gnu.org/licenses/>.
23  */
24 
25 #include <vector>
26 #include "Eigen/Core"
27 
28 #include <boost/math/tools/minima.hpp>
29 
30 #include "lsst/log/Log.h"
31 
32 #include "lsst/jointcal/Chi2.h"
33 #include "lsst/jointcal/CcdImage.h"
38 
39 namespace lsst {
40 namespace jointcal {
41 
43  Chi2Statistic chi2;
44  accumulateStatImageList(_associations->getCcdImageList(), chi2);
46  // chi2.ndof contains the number of squares.
47  // So subtract the number of parameters.
48  chi2.ndof -= _nParTot;
49  return chi2;
50 }
51 
53  FittedStarList &fsOutliers) const {
54  // collect chi2 contributions
55  Chi2List chi2List;
56  chi2List.reserve(_nMeasuredStars + _associations->refStarList.size());
57  // contributions from measurement terms:
58  accumulateStatImageList(_associations->ccdImageList, chi2List);
59  // and from reference terms
60  accumulateStatRefStars(chi2List);
61 
62  // compute some statistics
63  size_t nval = chi2List.size();
64  if (nval == 0) return 0;
65  sort(chi2List.begin(), chi2List.end());
66  double median = (nval & 1) ? chi2List[nval / 2].chi2
67  : 0.5 * (chi2List[nval / 2 - 1].chi2 + chi2List[nval / 2].chi2);
68  auto averageAndSigma = chi2List.computeAverageAndSigma();
69  LOGLS_DEBUG(_log, "findOutliers chi2 stat: mean/median/sigma " << averageAndSigma.first << '/' << median
70  << '/' << averageAndSigma.second);
71  double cut = averageAndSigma.first + nSigmaCut * averageAndSigma.second;
72  /* For each of the parameters, we will not remove more than 1
73  measurement that contributes to constraining it. Keep track using
74  of what we are touching using an integer vector. This is the
75  trick that Marc Betoule came up to for outlier removals in "star
76  flats" fits. */
77  Eigen::VectorXi affectedParams(_nParTot);
78  affectedParams.setZero();
79 
80  std::size_t nOutliers = 0; // returned to the caller
81  // start from the strongest outliers.
82  for (auto chi2 = chi2List.rbegin(); chi2 != chi2List.rend(); ++chi2) {
83  if (chi2->chi2 < cut) break; // because the array is sorted.
84  IndexVector indices;
85  /* now, we want to get the indices of the parameters this chi2
86  term depends on. We have to figure out which kind of term it
87  is; we use for that the type of the star attached to the Chi2Star. */
88  auto measuredStar = std::dynamic_pointer_cast<MeasuredStar>(chi2->star);
89  std::shared_ptr<FittedStar> fittedStar; // To add to fsOutliers if it is a reference outlier.
90  if (measuredStar == nullptr) {
91  // it is a reference outlier
92  fittedStar = std::dynamic_pointer_cast<FittedStar>(chi2->star);
93  if (fittedStar->getMeasurementCount() == 0) {
94  LOGLS_WARN(_log, "FittedStar with no measuredStars found as an outlier: " << *fittedStar);
95  continue;
96  }
97  // NOTE: Stars contribute twice to astrometry (x,y), but once to photometry (flux),
98  // NOTE: but we only need to mark one index here because both will be removed with that star.
99  indices.push_back(fittedStar->getIndexInMatrix());
100  LOGLS_TRACE(_log, "Removing refStar " << *(fittedStar->getRefStar()) << " chi2: " << chi2->chi2);
101  /* One might think it would be useful to account for PM
102  parameters here, but it is just useless */
103  } else {
104  // it is a measurement outlier
105  auto tempFittedStar = measuredStar->getFittedStar();
106  if (tempFittedStar->getMeasurementCount() == 1 && tempFittedStar->getRefStar() == nullptr) {
107  LOGLS_WARN(_log, "FittedStar with 1 measuredStar and no refStar found as an outlier: "
108  << *tempFittedStar);
109  continue;
110  }
111  getIndicesOfMeasuredStar(*measuredStar, indices);
112  LOGLS_TRACE(_log, "Removing measStar " << *measuredStar << " chi2: " << chi2->chi2);
113  }
114 
115  /* Find out if we already discarded a stronger outlier
116  constraining some parameter this one constrains as well. If
117  yes, we keep this one, because this stronger outlier could be
118  causing the large chi2 we have in hand. */
119  bool drop_it = true;
120  for (auto const &i : indices) {
121  if (affectedParams(i) != 0) {
122  drop_it = false;
123  }
124  }
125 
126  if (drop_it) // store the outlier in one of the lists:
127  {
128  if (measuredStar == nullptr) {
129  // reference term
130  fsOutliers.push_back(fittedStar);
131  } else {
132  // measurement term
133  msOutliers.push_back(measuredStar);
134  }
135  // mark the parameters as directly changed when we discard this chi2 term.
136  for (auto const &i : indices) {
137  affectedParams(i)++;
138  }
139  nOutliers++;
140  }
141  } // end loop on measurements/references
142  LOGLS_INFO(_log, "findOutliers: found " << msOutliers.size() << " meas outliers and " << fsOutliers.size()
143  << " ref outliers ");
144 
145  return nOutliers;
146 }
147 
148 namespace {
150 SparseMatrixD createHessian(std::size_t nParTot, TripletList const &tripletList) {
151  SparseMatrixD jacobian(nParTot, tripletList.getNextFreeIndex());
152  jacobian.setFromTriplets(tripletList.begin(), tripletList.end());
153  return jacobian * jacobian.transpose();
154 }
155 
157 void dumpMatrixAndGradient(SparseMatrixD const &matrix, Eigen::VectorXd const &grad,
158  std::string const &dumpFile, LOG_LOGGER _log) {
159  std::string ext = ".txt";
160  Eigen::MatrixXd matrixDense(matrix);
161  std::string dumpMatrixPath = dumpFile + "-mat" + ext;
162  std::ofstream matfile(dumpMatrixPath);
163  matfile << matrixDense << std::endl;
164  std::string dumpGradPath = dumpFile + "-grad" + ext;
165  std::ofstream gradfile(dumpGradPath);
166  gradfile << grad << std::endl;
167  LOGLS_INFO(_log, "Dumped Hessian, gradient to: '" << dumpMatrixPath << "', '" << dumpGradPath << "'");
168 }
169 } // namespace
170 
171 MinimizeResult FitterBase::minimize(std::string const &whatToFit, double nSigmaCut, bool doRankUpdate,
172  bool const doLineSearch, std::string const &dumpMatrixFile) {
173  assignIndices(whatToFit);
174 
176 
177  // TODO : write a guesser for the number of triplets
178  std::size_t nTrip = (_lastNTrip) ? _lastNTrip : 1e6;
179  TripletList tripletList(nTrip);
180  Eigen::VectorXd grad(_nParTot);
181  grad.setZero();
182  double scale = 1.0;
183 
184  // Fill the triplets
185  leastSquareDerivatives(tripletList, grad);
186  _lastNTrip = tripletList.size();
187 
188  LOGLS_DEBUG(_log, "End of triplet filling, ntrip = " << tripletList.size());
189 
190  SparseMatrixD hessian = createHessian(_nParTot, tripletList);
191  tripletList.clear(); // we don't need it any more after we have the hessian.
192 
193  LOGLS_DEBUG(_log, "Starting factorization, hessian: dim="
194  << hessian.rows() << " non-zeros=" << hessian.nonZeros()
195  << " filling-frac = " << hessian.nonZeros() / std::pow(hessian.rows(), 2));
196 
197  if (dumpMatrixFile != "") {
198  if (hessian.rows() * hessian.cols() > 2e8) {
199  LOGLS_WARN(_log, "Hessian matrix is too big to dump to file, with rows, columns: "
200  << hessian.rows() << ", " << hessian.cols());
201  } else {
202  dumpMatrixAndGradient(hessian, grad, dumpMatrixFile, _log);
203  }
204  }
205 
207  if (chol.info() != Eigen::Success) {
208  LOGLS_ERROR(_log, "minimize: factorization failed ");
209  return MinimizeResult::Failed;
210  }
211 
212  std::size_t totalMeasOutliers = 0;
213  std::size_t totalRefOutliers = 0;
214  double oldChi2 = computeChi2().chi2;
215 
216  while (true) {
217  Eigen::VectorXd delta = chol.solve(grad);
218  if (doLineSearch) {
219  scale = _lineSearch(delta);
220  }
221  offsetParams(scale * delta);
222  Chi2Statistic currentChi2(computeChi2());
223  LOGLS_DEBUG(_log, currentChi2);
224  if (!isfinite(currentChi2.chi2)) {
225  LOGL_ERROR(_log, "chi2 is not finite. Aborting outlier rejection.");
226  returnCode = MinimizeResult::NonFinite;
227  break;
228  }
229  if (currentChi2.chi2 > oldChi2 && totalMeasOutliers + totalRefOutliers != 0) {
230  LOGL_WARN(_log, "chi2 went up, skipping outlier rejection loop");
231  returnCode = MinimizeResult::Chi2Increased;
232  break;
233  }
234  oldChi2 = currentChi2.chi2;
235 
236  if (nSigmaCut == 0) break; // no rejection step to perform
237  MeasuredStarList msOutliers;
238  FittedStarList fsOutliers;
239  // keep nOutliers so we don't have to sum msOutliers.size()+fsOutliers.size() twice below.
240  std::size_t nOutliers = findOutliers(nSigmaCut, msOutliers, fsOutliers);
241  totalMeasOutliers += msOutliers.size();
242  totalRefOutliers += fsOutliers.size();
243  if (nOutliers == 0) break;
244  TripletList outlierTriplets(nOutliers);
245  grad.setZero(); // recycle the gradient
246  // compute the contributions of outliers to derivatives
247  outliersContributions(msOutliers, fsOutliers, outlierTriplets, grad);
248  // Remove significant outliers
249  removeMeasOutliers(msOutliers);
250  removeRefOutliers(fsOutliers);
251  if (doRankUpdate) {
252  // convert triplet list to eigen internal format
253  SparseMatrixD H(_nParTot, outlierTriplets.getNextFreeIndex());
254  H.setFromTriplets(outlierTriplets.begin(), outlierTriplets.end());
255  chol.update(H, false /* means downdate */);
256  // The contribution of outliers to the gradient is the opposite
257  // of the contribution of all other terms, because they add up to 0
258  grad *= -1;
259  } else {
260  // don't reuse tripletList because we want a new nextFreeIndex.
261  TripletList nextTripletList(_lastNTrip);
262  grad.setZero();
263  // Rebuild the matrix and gradient
264  leastSquareDerivatives(nextTripletList, grad);
265  _lastNTrip = nextTripletList.size();
266  LOGLS_DEBUG(_log, "Triplets recomputed, ntrip = " << nextTripletList.size());
267 
268  hessian = createHessian(_nParTot, nextTripletList);
269  nextTripletList.clear(); // we don't need it any more after we have the hessian.
270 
272  "Restarting factorization, hessian: dim="
273  << hessian.rows() << " non-zeros=" << hessian.nonZeros()
274  << " filling-frac = " << hessian.nonZeros() / std::pow(hessian.rows(), 2));
275  chol.compute(hessian);
276  if (chol.info() != Eigen::Success) {
277  LOGLS_ERROR(_log, "minimize: factorization failed ");
278  return MinimizeResult::Failed;
279  }
280  }
281  }
282 
283  // only print the outlier summary if outlier rejection was turned on.
284  if (nSigmaCut != 0) {
285  LOGLS_INFO(_log, "Number of outliers (Measured + Reference = Total): "
286  << totalMeasOutliers << " + " << totalRefOutliers << " = "
287  << totalMeasOutliers + totalRefOutliers);
288  }
289  return returnCode;
290 }
291 
293  TripletList &tripletList, Eigen::VectorXd &grad) {
294  for (auto &outlier : msOutliers) {
295  MeasuredStarList tmp;
296  tmp.push_back(outlier);
297  const CcdImage &ccdImage = outlier->getCcdImage();
298  leastSquareDerivativesMeasurement(ccdImage, tripletList, grad, &tmp);
299  }
300  leastSquareDerivativesReference(fsOutliers, tripletList, grad);
301 }
302 
304  for (auto &measuredStar : outliers) {
305  auto fittedStar = measuredStar->getFittedStar();
306  measuredStar->setValid(false);
307  fittedStar->getMeasurementCount()--; // could be put in setValid
308  }
309 }
310 
312  for (auto &fittedStar : outliers) {
313  fittedStar->setRefStar(nullptr);
314  }
315 }
316 
317 void FitterBase::leastSquareDerivatives(TripletList &tripletList, Eigen::VectorXd &grad) const {
318  auto ccdImageList = _associations->getCcdImageList();
319  for (auto const &ccdImage : ccdImageList) {
320  leastSquareDerivativesMeasurement(*ccdImage, tripletList, grad);
321  }
322  leastSquareDerivativesReference(_associations->fittedStarList, tripletList, grad);
323 }
324 
325 void FitterBase::saveChi2Contributions(std::string const &baseName) const {
326  std::string replaceStr = "{type}";
327  auto pos = baseName.find(replaceStr);
328  std::string measFilename(baseName);
329  measFilename.replace(pos, replaceStr.size(), "-meas.csv");
330  std::string refFilename(baseName);
331  refFilename.replace(pos, replaceStr.size(), "-ref.csv");
332  saveChi2MeasContributions(measFilename);
333  saveChi2RefContributions(refFilename);
334 }
335 
336 double FitterBase::_lineSearch(Eigen::VectorXd const &delta) {
337  auto func = [this, &delta](double scale) {
338  auto offset = scale * delta;
339  offsetParams(offset);
340  auto chi2 = computeChi2();
341  // reset the system to where it was before offsetting.
342  offsetParams(-offset);
343  return chi2.chi2;
344  };
345  // The maximum theoretical precision is half the number of bits in the mantissa (see boost docs).
346  auto bits = std::numeric_limits<double>::digits / 2;
347  auto result = boost::math::tools::brent_find_minima(func, -1.0, 2.0, bits);
348  LOGLS_DEBUG(_log, "Line search scale factor: " << result.first);
349  return result.first;
350 }
351 
352 } // namespace jointcal
353 } // namespace lsst
#define LOGLS_WARN(logger, message)
Log a warn-level message using an iostream-based interface.
Definition: Log.h:648
virtual void accumulateStatRefStars(Chi2Accumulator &accum) const =0
Compute the chi2 (per star or total, depending on which Chi2Accumulator is used) for RefStars...
virtual void leastSquareDerivativesReference(FittedStarList const &fittedStarList, TripletList &tripletList, Eigen::VectorXd &grad) const =0
Compute the derivatives of the reference terms.
Simple structure to accumulate chi2 and ndof.
Definition: Chi2.h:52
#define LOG_LOGGER
Definition: Log.h:703
Eigen::Index getNextFreeIndex() const
Definition: Tripletlist.h:47
virtual void accumulateStatImageList(CcdImageList const &ccdImageList, Chi2Accumulator &accum) const =0
Compute the chi2 (per star or total, depending on which Chi2Accumulator is used) for measurements...
std::size_t findOutliers(double nSigmaCut, MeasuredStarList &msOutliers, FittedStarList &fsOutliers) const
Find Measurements and references contributing more than a cut, computed as The outliers are NOT remo...
Definition: FitterBase.cc:52
T rend(T... args)
virtual void saveChi2RefContributions(std::string const &filename) const =0
Save a CSV file containing residuals of reference terms.
def scale(algorithm, min, max=None, frame=None)
Definition: ds9.py:109
T endl(T... args)
T end(T... args)
#define LOGL_ERROR(logger, message...)
Log a error-level message using a varargs/printf style interface.
Definition: Log.h:552
void removeMeasOutliers(MeasuredStarList &outliers)
Remove measuredStar outliers from the fit. No Refit done.
Definition: FitterBase.cc:303
void update(SparseMatrixD const &H, bool UpOrDown)
Definition: Eigenstuff.h:68
A list of MeasuredStar. They are usually filled in Associations::createCcdImage.
Definition: MeasuredStar.h:146
virtual void saveChi2Contributions(std::string const &baseName) const
Save the full chi2 term per star that was used in the minimization, for debugging.
Definition: FitterBase.cc:325
MinimizeResult
Return value of minimize()
Definition: FitterBase.h:40
STL class.
MinimizeResult minimize(std::string const &whatToFit, double const nSigmaCut=0, bool const doRankUpdate=true, bool const doLineSearch=false, std::string const &dumpMatrixFile="")
Does a 1 step minimization, assuming a linear model.
Definition: FitterBase.cc:171
LSST DM logging module built on log4cxx.
Structure to accumulate the chi2 contributions per each star (to help find outliers).
Definition: Chi2.h:100
T push_back(T... args)
STL class.
Eigen::SparseMatrix< double, 0, Eigen::Index > SparseMatrixD
Definition: Eigenstuff.h:35
#define LOGLS_DEBUG(logger, message)
Log a debug-level message using an iostream-based interface.
Definition: Log.h:608
A base class for image defects.
#define LOGLS_TRACE(logger, message)
Log a trace-level message using an iostream-based interface.
Definition: Log.h:588
T replace(T... args)
A list of FittedStar s. Such a list is typically constructed by Associations.
Definition: FittedStar.h:123
void removeRefOutliers(FittedStarList &outliers)
Remove refStar outliers from the fit. No Refit done.
Definition: FitterBase.cc:311
T isfinite(T... args)
objects measured on actual images.
Definition: MeasuredStar.h:46
T dynamic_pointer_cast(T... args)
#define LOGL_WARN(logger, message...)
Log a warn-level message using a varargs/printf style interface.
Definition: Log.h:536
T clear(T... args)
std::shared_ptr< Associations > _associations
Definition: FitterBase.h:153
virtual void saveChi2MeasContributions(std::string const &filename) const =0
Save a CSV file containing residuals of measurement terms.
virtual void assignIndices(std::string const &whatToFit)=0
Set parameters to fit and assign indices in the big matrix.
#define LOGLS_INFO(logger, message)
Log a info-level message using an iostream-based interface.
Definition: Log.h:628
T find(T... args)
T size(T... args)
STL class.
Chi2Statistic computeChi2() const
Returns the chi2 for the current state.
Definition: FitterBase.cc:42
T begin(T... args)
T pow(T... args)
void leastSquareDerivatives(TripletList &tripletList, Eigen::VectorXd &grad) const
Evaluates the chI^2 derivatives (Jacobian and gradient) for the current whatToFit setting...
Definition: FitterBase.cc:317
T sort(T... args)
Handler of an actual image from a single CCD.
Definition: CcdImage.h:64
virtual void offsetParams(Eigen::VectorXd const &delta)=0
Offset the parameters by the requested quantities.
#define LOGLS_ERROR(logger, message)
Log a error-level message using an iostream-based interface.
Definition: Log.h:668
py::object result
Definition: _schema.cc:429
void outliersContributions(MeasuredStarList &msOutliers, FittedStarList &fsOutliers, TripletList &tripletList, Eigen::VectorXd &grad)
Contributions to derivatives from (presumably) outlier terms.
Definition: FitterBase.cc:292
The objects which have been measured several times.
Definition: FittedStar.h:61
std::pair< double, double > computeAverageAndSigma()
Compute the average and std-deviation of these chisq values.
Definition: Chi2.cc:33
T reserve(T... args)
Eigen::Index _nMeasuredStars
Definition: FitterBase.h:158
virtual void leastSquareDerivativesMeasurement(CcdImage const &ccdImage, TripletList &tripletList, Eigen::VectorXd &grad, MeasuredStarList const *measuredStarList=nullptr) const =0
Compute the derivatives of the measured stars and model for one CcdImage.
virtual void getIndicesOfMeasuredStar(MeasuredStar const &measuredStar, IndexVector &indices) const =0
Set the indices of a measured star from the full matrix, for outlier removal.
T rbegin(T... args)