LSSTApplications  10.0-2-g4f67435,11.0.rc2+1,11.0.rc2+12,11.0.rc2+3,11.0.rc2+4,11.0.rc2+5,11.0.rc2+6,11.0.rc2+7,11.0.rc2+8
LSSTDataManagementBasePackage
Histogram.cc
Go to the documentation of this file.
1 // -*- LSST-C++ -*-
2 
3 /*
4  * LSST Data Management System
5  * Copyright 2008, 2009, 2010 LSST Corporation.
6  *
7  * This product includes software developed by the
8  * LSST Project (http://www.lsst.org/).
9  *
10  * This program is free software: you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation, either version 3 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the LSST License Statement and
21  * the GNU General Public License along with this program. If not,
22  * see <http://www.lsstcorp.org/LegalNotices/>.
23  */
24 
25 #include <algorithm>
26 
31 
32 namespace lsst {
33 namespace meas {
34 namespace algorithms {
35 namespace shapelet {
36 
37  template <typename T>
38  Histogram<T>::Histogram(double binSize, double minValue, double maxValue) :
39  _binSize(binSize)
40  {
41  // Add an extra bin to prevent rounding errors from giving you a
42  // value outsize the allowed range.
43  int nBins = int(ceil((maxValue-minValue)/_binSize))+1;
44  _minValue = minValue - binSize/2.;
45  _maxValue = maxValue + binSize/2.;
46  _refs = std::vector<std::vector<T> >(nBins);
47  _values = std::vector<std::vector<double> >(nBins);
48  if (value(nBins-1) > _maxValue) _maxValue = value(nBins-1)+binSize/4.;
49 
50  dbg<<"made histogram:\n";
51  dbg<<"minvalue="<<_minValue<<
52  " index(minvalue)="<<index(_minValue)<<std::endl;
53  dbg<<"maxvalue="<<_maxValue<<
54  " index(maxvalue)="<<index(_maxValue)<<std::endl;
55  dbg<<"mini=0 value(mini)="<<value(0)<<std::endl;
56  dbg<<"maxi="<<_refs.size()-1;
57  dbg<<" value(maxi)="<<value(_refs.size()-1)<<std::endl;
58  }
59 
60  template <typename T>
61  void Histogram<T>::add(double value,const T& ref)
62  {
63  _refs[index(value)].push_back(ref);
64  _values[index(value)].push_back(value);
65  }
66 
67  template <typename T>
68  int Histogram<T>::getCount(int i) const
69  {
70  Assert(i<int(_refs.size()));
71  Assert(_values[i].size() == _refs[i].size());
72  return _refs[i].size();
73  }
74 
75  template <typename T>
76  double Histogram<T>::findPeak(double val1, double val2) const
77  {
78  if (val1<_minValue) val1 = _minValue;
79  if (val2>_maxValue) val2 = _maxValue;
80  int i1=index(val1), i2=index(val2);
81  Assert(i1<int(_refs.size()));
82  Assert(i2<int(_refs.size()));
83  int iPeak=i1;
84  int maxCount = getCount(i1);
85 
86  for(int i=i1+1;i<=i2;++i) {
87  if (getCount(i) > maxCount) {
88  maxCount = getCount(i);
89  iPeak = i;
90  }
91  }
92  return value(iPeak);
93  }
94 
95  template <typename T>
96  bool Histogram<T>::hasSinglePeak(double val1, double val2) const
97  // Finds the first peak from the left... set to iPeak1
98  // and the first peak from the right... set to iPeak2
99  // Returns whether they are the same peak
100  {
101  if (val1<_minValue) val1 = _minValue;
102  if (val2>_maxValue) val2 = _maxValue;
103  int i1=index(val1), i2=index(val2);
104  Assert(i1<int(_refs.size()));
105  Assert(i2<int(_refs.size()));
106  int iPeak1=i1,iPeak2=i2;
107  while(iPeak1+1<=i2 && getCount(iPeak1+1)>=getCount(iPeak1)) ++iPeak1;
108  while(iPeak2-1>=i1 && getCount(iPeak2-1)>=getCount(iPeak2)) --iPeak2;
109  return iPeak1 >= iPeak2; // Usually ==, but if plateau, they swap
110  }
111 
112  template <typename T>
113  double Histogram<T>::findValley(double val1, double val2) const
114  {
115  if (val1<_minValue) val1 = _minValue;
116  if (val2>_maxValue) val2 = _maxValue;
117  int i1=index(val1), i2=index(val2);
118  Assert(i1<int(_refs.size()));
119  Assert(i2<int(_refs.size()));
120  int iValley=i1;
121  int minCount = getCount(i1);
122 
123  for(int i=i1+1;i<=i2;++i) {
124  if (getCount(i) < minCount) {
125  minCount = getCount(i);
126  iValley = i;
127  }
128  }
129  // Make sure you're not still sloping down. If so, keep going.
130  if (iValley==i2) {
131  int nRefs = _refs.size();
132  while(iValley+1 < nRefs && getCount(iValley+1) < getCount(iValley))
133  ++iValley;
134  }
135 
136  return value(iValley);
137  }
138 
139  template <typename T>
140  double Histogram<T>::findFirstValleyAfter(double val1,bool hasPoissonNoise) const
141  {
142  dbg<<"Start FindFirstValleyAfter "<<val1<<std::endl;
143  if (val1<_minValue) val1 = _minValue;
144  int i1=index(val1);
145  Assert(i1<int(_refs.size()));
146  // Keep going until it stops dropping
147  // The sqrt bit allows for Poisson noise in the counts
148  // Otherwise you can get caught early by ledges.
149  // eg. 15 9 10 4 1 3 3 ... would stop at the 9 instead of at the 1
150  int iValley=i1;
151  double minCount = getCount(i1);
152  int iCheck=i1+1;
153  const int nRefs = _refs.size();
154  while(iCheck < nRefs && getCount(iCheck) <= minCount +
155  (hasPoissonNoise ? int(sqrt(float(getCount(iCheck)))) : 0)) {
156  if (getCount(iCheck) < minCount) {
157  minCount = getCount(iCheck);
158  iValley = iCheck;
159  }
160  if (++iCheck == nRefs) break;
161  }
162  dbg<<"valley = "<<value(iValley)<<std::endl;
163  return value(iValley);
164  }
165 
166  template <typename T>
167  double Histogram<T>::findFirstValleyBefore(double val1,bool hasPoissonNoise) const
168  {
169  if (val1>_maxValue) val1 = _maxValue;
170  int i1=index(val1);
171  Assert(i1<int(_refs.size()));
172  // Keep going until it stops dropping
173  // The sqrt bit allows for Poisson noise in the counts
174  // Otherwise you can get caught early by ledges.
175  // eg. 15 9 10 4 1 3 3 ... would stop at the 9 instead of at the 1
176  int iValley=i1;
177  double minCount = getCount(i1);
178  if (i1 > 0) {
179  int iCheck=i1-1;
180  while(getCount(iCheck) <= minCount +
181  (hasPoissonNoise ? int(sqrt(float(getCount(iCheck)))) : 0)) {
182  if (getCount(iCheck) < minCount) {
183  minCount = getCount(iCheck);
184  iValley = iCheck;
185  }
186  if (iCheck == 0) break;
187  --iCheck;
188  }
189  }
190  return value(iValley);
191  }
192 
193  template <typename T>
194  double Histogram<T>::findFirstValueAfter(double start) const
195  {
196  int i = (start<_minValue) ? index(_minValue) : index(start)+1;
197  const int nValues = _values.size();
198  while (i<nValues && _values[i].size() == 0) ++i;
199  if (i == nValues) return _maxValue;
200  else return *std::min_element(_values[i].begin(),_values[i].end());
201  }
202 
203  template <typename T>
204  double Histogram<T>::findFirstPeakAfter(double val1,bool hasPoissonNoise) const
205  {
206  if (val1<_minValue) val1 = _minValue;
207  int i1=index(val1);
208  const int nRefs = _refs.size();
209  Assert(i1 < nRefs);
210  // Keep going until it stops rising
211  int iPeak=i1;
212  double maxCount = getCount(i1);
213  int iCheck=i1+1;
214  while(iCheck < nRefs && maxCount <= getCount(iCheck) +
215  (hasPoissonNoise ? int(sqrt(float(getCount(iCheck)))) : 0)) {
216  if (getCount(iCheck) > maxCount) {
217  maxCount = getCount(iCheck);
218  iPeak = iCheck;
219  }
220  if (++iCheck == nRefs) break;
221  }
222  return value(iPeak);
223  }
224 
225  template <typename T>
226  double Histogram<T>::findFirstPeakBefore(double val1,bool hasPoissonNoise) const
227  {
228  if (val1>_maxValue) val1 = _maxValue;
229  int i1=index(val1);
230  Assert(i1 < int(_refs.size()));
231  // Keep going until it stops rising
232  int iPeak=i1;
233  double maxCount = getCount(i1);
234  if (i1 > 0) {
235  int iCheck=i1-1;
236  while(maxCount <= getCount(iCheck) +
237  (hasPoissonNoise ? int(sqrt(float(getCount(iCheck)))) : 0)) {
238  if (getCount(iCheck) > maxCount) {
239  maxCount = getCount(iCheck);
240  iPeak = iCheck;
241  }
242  if (iCheck == 0) break;
243  --iCheck;
244  }
245  }
246  return value(iPeak);
247  }
248 
249  template <typename T>
250  int Histogram<T>::getTotalCountBefore(double val1) const
251  {
252  int count=0;
253  int i1 = index(val1);
254  Assert(i1 < int(_refs.size()));
255  for(int i=0;i<i1;++i) count += getCount(i);
256  return count;
257  }
258 
259  template <typename T>
260  int Histogram<T>::getTotalCountAfter(double val1) const
261  {
262  int count=0;
263  int i1 = index(val1);
264  const int nRefs = _refs.size();
265  Assert(i1 < nRefs);
266  for(int i=i1+1;i<nRefs;++i) count += getCount(i);
267  return count;
268  }
269 
270  template <typename T>
271  int Histogram<T>::getRefinedPeakCount(double* peak) const
272  // Takes the three bins around the peak and find the maximum number of
273  // objects which fit into a bin width.
274  // It does this by sliding a window of width binSize along a sorted list
275  // of values and counting how many objects fit into the window.
276  {
277  std::vector<double> vals =
278  getValuesInRange(*peak-1.5*_binSize,
279  *peak+1.5*_binSize);
280  Assert(vals.size()>=1);
281  std::sort(vals.begin(),vals.end());
282  int bestCount=0;
283  double bestVal=*peak;
284  // i is the object at the left side of the virtual bin
285  // j is the first object after the end of the virtual bin
286  const int nVals = vals.size();
287  for(int i=0,j=0;j<nVals;++i) {
288  // Determine j for this value of i
289  while (j<nVals && vals[j] < vals[i]+_binSize) ++j;
290  // If the corresponding count is better than bestCount, update it
291  Assert(j>i);
292  if (j-i > bestCount) {
293  bestCount = j-i;
294  bestVal = (vals[i]+vals[j-1])/2.;
295  }
296  }
297  *peak = bestVal;
298  return bestCount;
299  }
300 
301  template <typename T>
302  int Histogram<T>::getRefinedValleyCount(double* valley) const
303  // Just like getRefinedPeakCount, but for a valley.
304  // One slight difference is that for the best peak, you want to
305  // include as many objects as possible, so the "i" value is included
306  // For the best valley, we imagine that the "i" value is just slightly
307  // to the left of the start of the bin. So the count is j-i-1 rather
308  // than j-i.
309  {
310  std::vector<double> vals =
311  getValuesInRange(*valley-1.5*_binSize,
312  *valley+1.5*_binSize);
313  const int nVals = vals.size();
314  if (nVals == 0) return 0;
315  std::sort(vals.begin(),vals.end());
316  // start high since want to find the lowest count
317  int bestCount=nVals;
318  double bestVal=*valley;
319  // i is the last object before the start of the virtual bin
320  // j is the first object after the end of the virtual bin
321  for(int i=0,j=0; i<nVals && vals[i]<*valley+0.5*_binSize; ++i) {
322  // Determine j for this value of i
323  while (j<nVals && vals[j] < vals[i]+_binSize) ++j;
324  // If we can get rid of any i values, given this j, do so.
325  double nextBinStart = j<nVals ? vals[j] : *valley+1.5*_binSize;
326  while (i+1<nVals && vals[i+1]+_binSize < nextBinStart) ++i;
327  // If the corresponding count is better than bestCount, update it
328  Assert(j>i);
329  if (j-i-1 < bestCount) {
330  bestCount = j-i-1;
331  bestVal = vals[i]+_binSize/2.;
332  }
333  }
334  *valley = bestVal;
335  return bestCount;
336  }
337 
338  template <typename T>
339  int Histogram<T>::operator[](double value) const
340  {
341  if (index(value) == int(_refs.size())) return 0;
342  Assert(index(value) < int(_refs.size()));
343  return getCount(index(value));
344  }
345 
346  template <typename T>
347  double Histogram<T>::findThresh(double minVal, double maxVal) const
348  // This function finds the threshold point which maximizes the
349  // "Normalized Between Group Variance" (var).
350  // For a given threshold point, we have two groups, the left side and
351  // the right side.
352  // Define fL and fR to be the fraction of points in each group.
353  // xL and xR are the centroids of each group (the mean value)
354  // vL and vR are the variances for each group.
355  // Then the Between Group Variance is just:
356  // bgv = fL fR (xR - xL)^2
357  // Maximizing this works well for distributions which may have different
358  // numbers of elements so long as the widths of the two distributions
359  // are similar.
360  // When the widths are not similar, it is better to normalize this by the
361  // effective sigmas, or sqrt(variance).
362  // var = fL fR (xR-xL)^2 / sqrt(vL vR)
363  // Maximizing this seems to work well for separating stars from galaxies.
364  //
365  // To calculate this efficiently, we want to be able to keep track of all
366  // the values as we go along the histogram to make it order N, rather than
367  // N^2.
368  //
369  // nTot = Sumtot(N(i))
370  // meanTot = Sumtot(N(i)*i)/Ntot
371  // meanSqTot = Sumtot(N(i)*i*i)/Ntot
372  //
373  // (All sums below are from i=0..i1, "sumtot" above means from i=0..maxi)
374  //
375  // fL = Sum(N(i))/nTot
376  // fR = 1-fL
377  // xL = Sum(N(i)*i)/nTot/fL
378  // xR = (meanTot - fL*xL)/fR
379  // vL = Sum(N(i)*i*i)/nTot/fL - xL^2
380  // vR = (meanSqTot - fL*(vL+xL^2))/fR
381  {
382  const double EPSILON = 1.e-6;
383 
384  dbg<<"starting FindThresh\n";
385  if (dbgout) print(*dbgout);
386 
387  double nTot=0.,meanTot=0.,meanSqTot=0.;
388 
389  if (minVal < _minValue) minVal = _minValue;
390  if (maxVal > _maxValue) maxVal = _maxValue;
391  int i1 = index(minVal);
392  int i2 = index(maxVal);
393  Assert(i1 < int(_refs.size()));
394  Assert(i2 < int(_refs.size()));
395 
396  // First calculate the "tot" values:
397  for(int i=i1;i<=i2;++i) {
398  double dcount = getCount(i);
399  nTot += dcount;
400  meanTot += dcount*i;
401  meanSqTot += dcount*i*i;
402  }
403  meanTot /= nTot;
404  meanSqTot /= nTot;
405  dbg<<"ntot="<<nTot<<", meantot="<<meanTot<<
406  ", meansqtot="<<meanSqTot<<std::endl;
407 
408  double sumN=0.,sumNi=0.,sumNii=0.,varBest=-1.;
409  int iBest=-1;
410 
411  const double minVariance = 0.25;
412 
413  for(int i=i1;i<=i2;++i) {
414  double dcount = getCount(i);
415  sumN += dcount;
416  sumNi += dcount*i;
417  sumNii += dcount*i*i;
418  double fL = sumN/nTot;
419  double fR = 1.-fL;
420  Assert(fL >=0. && fR >=0.);
421  if (fL == 0. || fR == 0.) continue; // var = 0
422  double xL = sumNi/nTot; // actuall fL*xL now
423  double xR = (meanTot-xL)/fR;
424  xL /= fL;
425  double vL = sumNii/nTot; // actually fL*(vL+xL^2)
426  double vR = (meanSqTot-vL)/fR - xR*xR;
427  vL = vL/fL - xL*xL;
428  if (vL<minVariance) vL = minVariance;
429  if (vR<minVariance) vR = minVariance;
430  double var = fL*fR*std::pow(xR-xL,2)/sqrt(vL*vR);
431  Assert(var >= 0.);
432  if (var > varBest+EPSILON) {
433  iBest = i;
434  varBest = var;
435  }
436  }
437 
438  dbg<<"besti = "<<iBest<<", bestvar = "<<varBest<<std::endl;
439  Assert(iBest >= 0);
440  Assert(varBest > 0.);
441  // +1 below forces thresh to be at least 1 bin away from first peak.
442  // otherwise if peak is only 1 bin, can get iBest = that bin.
443  dbg<<"returning thresh = "<<value(iBest)<<std::endl;
444  return value(iBest);
445  }
446 
447  template <typename T>
448  std::vector<T> Histogram<T>::getRefsInRange(double min, double max) const
449  {
450  if (min < _minValue) min = _minValue;
451  if (max > _maxValue) max = _maxValue;
452  int i1=index(min);
453  int i2=index(max);
454 
455  dbg<<"in getrefs: min,max = "<<min<<','<<max<<std::endl;
456 
457  std::vector<T> temp;
458  const int nRefs1 = _refs[i1].size();
459  for(int k=0; k<nRefs1; ++k) {
460  if(_values[i1][k]>=min) {
461  dbg<<"i1 - add ref @ "<<_values[i1][k]<<std::endl;
462  temp.push_back(_refs[i1][k]);
463  }
464  }
465  for(int i=i1+1;i<i2;++i) {
466  if (_refs[i].size() > 0)
467  dbg<<"i - add all ("<<_refs[i].size()<<") refs near "<<
468  _values[i].front()<<std::endl;
469  temp.insert(temp.end(),_refs[i].begin(),_refs[i].end());
470  }
471  const int nRefs2 = _refs[i2].size();
472  for(int k=0; k<nRefs2; ++k) {
473  if(_values[i2][k] <= max) {
474  dbg<<"i2 - add ref @ "<<_values[i2][k]<<std::endl;
475  temp.push_back(_refs[i2][k]);
476  }
477  }
478  return temp;
479  }
480 
481  template <typename T>
482  std::vector<double> Histogram<T>::getValuesInRange(
483  double min, double max) const
484  {
485  if (min < _minValue) min = _minValue;
486  if (max > _maxValue) max = _maxValue;
487  int i1=index(min);
488  int i2=index(max);
489 
490  std::vector<double> temp;
491  const int nVals1 = _values[i1].size();
492  for(int k=0;k<nVals1;++k)
493  if(_values[i1][k]>=min) temp.push_back(_values[i1][k]);
494  for(int i=i1+1;i<i2;++i)
495  temp.insert(temp.end(),_values[i].begin(),_values[i].end());
496  const int nVals2 = _values[i2].size();
497  for(int k=0;k<nVals2;++k)
498  if(_values[i2][k]<=max) temp.push_back(_values[i2][k]);
499  return temp;
500  }
501 
502  template <typename T>
503  int Histogram<T>::index(double value) const
504  {
505  Assert (value >= _minValue && value <= _maxValue);
506  return int(floor((value - _minValue)/_binSize));
507  }
508 
509  template <typename T>
510  double Histogram<T>::value(int i) const
511  {
512  Assert(i<int(_refs.size()));
513  return _binSize*(i+0.5)+_minValue;
514  }
515 
516  template <typename T>
517  void Histogram<T>::print(std::ostream& fout,double val1,double val2) const
518  {
519  if (val1 < _minValue) val1 = _minValue;
520  if (val2 > _maxValue) val2 = _maxValue;
521  int i1=index(val1);
522  int i2=index(val2);
523  Assert(i1 < int(_refs.size()));
524  Assert(i2 < int(_refs.size()));
525  Form sci2; sci2.sci().width(10).prec(2);
526 
527  for(int i=i1;i<=i2;++i) {
528  //if ((i-i1)%5 == 0)
529  fout << sci2(value(i));
530  //else fout << " ";
531  for(int j=0;j<getCount(i);++j) fout<<'*';
532  fout<<std::endl;
533  }
534  }
535 
536  template class Histogram<PotentialStar*>;
537 
538 }}}}
int getRefinedPeakCount(double *peak) const
Definition: Histogram.cc:271
bool hasSinglePeak(double minVal, double maxVal) const
Definition: Histogram.cc:96
double findFirstPeakBefore(double val1, bool hasPoissonNoise=false) const
Definition: Histogram.cc:226
int getTotalCountBefore(double val1) const
Definition: Histogram.cc:250
Extent< int, N > ceil(Extent< double, N > const &input)
double findFirstValleyAfter(double val1, bool hasPoissonNoise=false) const
Definition: Histogram.cc:140
std::vector< std::vector< double > > _values
Definition: Histogram.h:47
double findFirstValleyBefore(double val1, bool hasPoissonNoise=false) const
Definition: Histogram.cc:167
Histogram(double binSize, double minValue, double maxValue)
Definition: Histogram.cc:38
double findPeak(double minVal, double maxVal) const
Definition: Histogram.cc:76
double findFirstPeakAfter(double val1, bool hasPoissonNoise=false) const
Definition: Histogram.cc:204
std::vector< T > getRefsInRange(double min, double max) const
Definition: Histogram.cc:448
double findValley(double minVal, double maxVal) const
Definition: Histogram.cc:113
void print(std::ostream &fout, double val1=-1.e10, double val2=1.e10) const
Definition: Histogram.cc:517
double findFirstValueAfter(double start) const
Definition: Histogram.cc:194
int getTotalCountAfter(double val1) const
Definition: Histogram.cc:260
Extent< int, N > floor(Extent< double, N > const &input)
int getRefinedValleyCount(double *valley) const
Definition: Histogram.cc:302
std::vector< double > getValuesInRange(double min, double max) const
Definition: Histogram.cc:482
void add(double value, const T &ref)
Definition: Histogram.cc:61
double findThresh(double minVal, double maxVal) const
Definition: Histogram.cc:347
std::vector< std::vector< T > > _refs
Definition: Histogram.h:46
#define dbg
Definition: dbg.h:69
std::ostream *const dbgout
Definition: dbg.h:64
#define Assert(x)
Definition: dbg.h:73