LSSTApplications  18.1.0
LSSTDataManagementBasePackage
dataIds.py
Go to the documentation of this file.
1 # This file is part of jointcal.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 # Copied from HyperSuprime-Cam/pipe_tasks
23 import collections
24 
25 import lsst.log
27 import lsst.afw.table
28 import lsst.afw.image
29 import lsst.meas.base
30 
31 from lsst.coadd.utils import CoaddDataIdContainer
32 
33 __all__ = ["PerTractCcdDataIdContainer"]
34 
35 
37  """A version of lsst.pipe.base.DataIdContainer that combines raw data IDs (defined as whatever we use
38  for 'src') with a tract.
39  """
40 
41  def castDataIds(self, butler):
42  """Validate data IDs and cast them to the correct type (modify idList in place).
43 
44  Parameters
45  ----------
46  butler
47  data butler
48  """
49  try:
50  idKeyTypeDict = butler.getKeys(datasetType="src", level=self.level)
51  except KeyError as e:
52  raise KeyError("Cannot get keys for datasetType %s at level %s: %s" % ("src", self.level, e))
53 
54  idKeyTypeDict = idKeyTypeDict.copy()
55  idKeyTypeDict["tract"] = int
56 
57  for dataDict in self.idList:
58  for key, strVal in dataDict.items():
59  try:
60  keyType = idKeyTypeDict[key]
61  except KeyError:
62  validKeys = sorted(idKeyTypeDict.keys())
63  raise KeyError("Unrecognized ID key %r; valid keys are: %s" % (key, validKeys))
64  if keyType != str:
65  try:
66  castVal = keyType(strVal)
67  except Exception:
68  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
69  dataDict[key] = castVal
70 
71  def _addDataRef(self, namespace, dataId, tract):
72  """Construct a dataRef based on dataId, but with an added tract key"""
73  forcedDataId = dataId.copy()
74  forcedDataId['tract'] = tract
75  dataRef = namespace.butler.dataRef(datasetType=self.datasetType, dataId=forcedDataId)
76  self.refList.append(dataRef)
77 
78  def makeDataRefList(self, namespace):
79  """Make self.refList from self.idList
80  """
81  if self.datasetType is None:
82  raise RuntimeError("Must call setDatasetType first")
83  skymap = None
84  log = lsst.log.Log.getLogger("jointcal.dataIds")
85  visitTract = {} # Set of tracts for each visit
86  visitRefs = {} # List of data references for each visit
87  for dataId in self.idList:
88  if "tract" not in dataId:
89  # Discover which tracts the data overlaps
90  log.infof("Reading WCS to determine tracts for components of dataId={}", dict(dataId))
91  if skymap is None:
92  skymap = self.getSkymap(namespace)
93 
94  for ref in namespace.butler.subset("calexp", dataId=dataId):
95  if not ref.datasetExists("calexp"):
96  log.warnf("calexp with dataId: {} not found.", dict(dataId))
97  continue
98 
99  # XXX fancier mechanism to select an individual exposure than just pulling out "visit"?
100  if "visit" in ref.dataId.keys():
101  visit = ref.dataId["visit"]
102  else:
103  # Fallback if visit is not in the dataId
104  visit = namespace.butler.queryMetadata("calexp", ("visit"), ref.dataId)[0]
105  if visit not in visitRefs:
106  visitRefs[visit] = list()
107  visitRefs[visit].append(ref)
108 
109  wcs = ref.get("calexp_wcs", immediate=True)
110  box = lsst.afw.geom.Box2D(ref.get("calexp_bbox"))
111  # Going with just the nearest tract. Since we're throwing all tracts for the visit
112  # together, this shouldn't be a problem unless the tracts are much smaller than a CCD.
113  tract = skymap.findTract(wcs.pixelToSky(box.getCenter()))
114  if lsst.meas.base.imageOverlapsTract(tract, wcs, box):
115  if visit not in visitTract:
116  visitTract[visit] = set()
117  visitTract[visit].add(tract.getId())
118  else:
119  tract = dataId.pop("tract")
120  # making a DataRef for src fills out any missing keys and allows us to iterate
121  for ref in namespace.butler.subset("src", dataId=dataId):
122  if ref.datasetExists():
123  self._addDataRef(namespace, ref.dataId, tract)
124 
125  # Ensure all components of a visit are kept together by putting them all in the same set of tracts
126  # NOTE: sorted() here is to keep py2 and py3 dataRefs in the same order.
127  # NOTE: see DM-9393 for details.
128  for visit, tractSet in sorted(visitTract.items()):
129  for ref in visitRefs[visit]:
130  for tract in sorted(tractSet):
131  self._addDataRef(namespace, ref.dataId, tract)
132  if visitTract:
133  tractCounter = collections.Counter()
134  for tractSet in visitTract.values():
135  tractCounter.update(tractSet)
136  log.infof("Number of visits per tract: {}", dict(tractCounter))
def _addDataRef(self, namespace, dataId, tract)
Definition: dataIds.py:71
A floating-point coordinate rectangle geometry.
Definition: Box.h:305
std::shared_ptr< FrameSet > append(FrameSet const &first, FrameSet const &second)
Construct a FrameSet that performs two transformations in series.
Definition: functional.cc:33
Definition: Log.h:691
STL class.
STL class.
static Log getLogger(Log const &logger)
Definition: Log.h:745
Backwards-compatibility support for depersisting the old Calib (FluxMag0/FluxMag0Err) objects...