LSST Applications  21.0.0+75b29a8a7f,21.0.0+e70536a077,21.0.0-1-ga51b5d4+62c747d40b,21.0.0-10-gbfb87ad6+3307648ee3,21.0.0-15-gedb9d5423+47cba9fc36,21.0.0-2-g103fe59+fdf0863a2a,21.0.0-2-g1367e85+d38a93257c,21.0.0-2-g45278ab+e70536a077,21.0.0-2-g5242d73+d38a93257c,21.0.0-2-g7f82c8f+e682ffb718,21.0.0-2-g8dde007+d179fbfa6a,21.0.0-2-g8f08a60+9402881886,21.0.0-2-ga326454+e682ffb718,21.0.0-2-ga63a54e+08647d4b1b,21.0.0-2-gde069b7+26c92b3210,21.0.0-2-gecfae73+0445ed2f95,21.0.0-2-gfc62afb+d38a93257c,21.0.0-27-gbbd0d29+ae871e0f33,21.0.0-28-g5fc5e037+feb0e9397b,21.0.0-3-g21c7a62+f4b9c0ff5c,21.0.0-3-g357aad2+57b0bddf0b,21.0.0-3-g4be5c26+d38a93257c,21.0.0-3-g65f322c+3f454acf5d,21.0.0-3-g7d9da8d+75b29a8a7f,21.0.0-3-gaa929c8+9e4ef6332c,21.0.0-3-ge02ed75+4b120a55c4,21.0.0-4-g3300ddd+e70536a077,21.0.0-4-g591bb35+4b120a55c4,21.0.0-4-gc004bbf+4911b9cd27,21.0.0-4-gccdca77+f94adcd104,21.0.0-4-ge8fba5a+2b3a696ff9,21.0.0-5-gb155db7+2c5429117a,21.0.0-5-gdf36809+637e4641ee,21.0.0-6-g00874e7+c9fd7f7160,21.0.0-6-g4e60332+4b120a55c4,21.0.0-7-gc8ca178+40eb9cf840,21.0.0-8-gfbe0b4b+9e4ef6332c,21.0.0-9-g2fd488a+d83b7cd606,w.2021.05
LSST Data Management Base Package
mapping.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 from collections import OrderedDict
23 import os
24 import re
25 from lsst.daf.base import PropertySet
26 from lsst.daf.persistence import ButlerLocation, NoResults
27 from lsst.utils import doImport
28 from lsst.afw.image import Exposure, MaskedImage, Image, DecoratedImage
29 
30 __all__ = ["Mapping", "ImageMapping", "ExposureMapping", "CalibrationMapping", "DatasetMapping"]
31 
32 
33 class Mapping(object):
34 
35  """Mapping is a base class for all mappings. Mappings are used by
36  the Mapper to map (determine a path to some data given some
37  identifiers) and standardize (convert data into some standard
38  format or type) data, and to query the associated registry to see
39  what data is available.
40 
41  Subclasses must specify self.storage or else override self.map().
42 
43  Public methods: lookup, have, need, getKeys, map
44 
45  Mappings are specified mainly by policy. A Mapping policy should
46  consist of:
47 
48  template (string): a Python string providing the filename for that
49  particular dataset type based on some data identifiers. In the
50  case of redundancy in the path (e.g., file uniquely specified by
51  the exposure number, but filter in the path), the
52  redundant/dependent identifiers can be looked up in the registry.
53 
54  python (string): the Python type for the retrieved data (e.g.
55  lsst.afw.image.ExposureF)
56 
57  persistable (string): the Persistable registration for the on-disk data
58  (e.g. ImageU)
59 
60  storage (string, optional): Storage type for this dataset type (e.g.
61  "FitsStorage")
62 
63  level (string, optional): the level in the camera hierarchy at which the
64  data is stored (Amp, Ccd or skyTile), if relevant
65 
66  tables (string, optional): a whitespace-delimited list of tables in the
67  registry that can be NATURAL JOIN-ed to look up additional
68  information.
69 
70  Parameters
71  ----------
72  datasetType : `str`
73  Butler dataset type to be mapped.
74  policy : `daf_persistence.Policy`
75  Mapping Policy.
76  registry : `lsst.obs.base.Registry`
77  Registry for metadata lookups.
78  rootStorage : Storage subclass instance
79  Interface to persisted repository data.
80  provided : `list` of `str`
81  Keys provided by the mapper.
82  """
83 
84  def __init__(self, datasetType, policy, registry, rootStorage, provided=None):
85 
86  if policy is None:
87  raise RuntimeError("No policy provided for mapping")
88 
89  self.datasetTypedatasetType = datasetType
90  self.registryregistry = registry
91  self.rootStoragerootStorage = rootStorage
92 
93  self._template_template = policy['template'] # Template path
94  # in most cases, the template can not be used if it is empty, and is
95  # accessed via a property that will raise if it is used while
96  # `not self._template`. In this case we *do* allow it to be empty, for
97  # the purpose of fetching the key dict so that the mapping can be
98  # constructed, so that it can raise if it's invalid. I know it's a
99  # little odd, but it allows this template check to be introduced
100  # without a major refactor.
101  if self._template_template:
102  self.keyDictkeyDict = dict([
103  (k, _formatMap(v, k, datasetType))
104  for k, v in
105  re.findall(r'\%\‍((\w+)\‍).*?([diouxXeEfFgGcrs])', self.templatetemplate)
106  ])
107  else:
108  self.keyDictkeyDict = {}
109  if provided is not None:
110  for p in provided:
111  if p in self.keyDictkeyDict:
112  del self.keyDictkeyDict[p]
113  self.pythonpython = policy['python'] # Python type
114  self.persistablepersistable = policy['persistable'] # Persistable type
115  self.storagestorage = policy['storage']
116  if 'level' in policy:
117  self.levellevel = policy['level'] # Level in camera hierarchy
118  if 'tables' in policy:
119  self.tablestables = policy.asArray('tables')
120  else:
121  self.tablestables = None
122  self.rangerange = None
123  self.columnscolumns = None
124  self.obsTimeNameobsTimeName = policy['obsTimeName'] if 'obsTimeName' in policy else None
125  self.reciperecipe = policy['recipe'] if 'recipe' in policy else 'default'
126 
127  @property
128  def template(self):
129  if self._template_template: # template must not be an empty string or None
130  return self._template_template
131  else:
132  raise RuntimeError(f"Template is not defined for the {self.datasetType} dataset type, "
133  "it must be set before it can be used.")
134 
135  def keys(self):
136  """Return the dict of keys and value types required for this mapping.
137  """
138  return self.keyDictkeyDict
139 
140  def map(self, mapper, dataId, write=False):
141  """Standard implementation of map function.
142 
143  Parameters
144  ----------
145  mapper: `lsst.daf.persistence.Mapper`
146  Object to be mapped.
147  dataId: `dict`
148  Dataset identifier.
149 
150  Returns
151  -------
152  lsst.daf.persistence.ButlerLocation
153  Location of object that was mapped.
154  """
155  actualId = self.needneed(iter(self.keyDictkeyDict.keys()), dataId)
156  usedDataId = {key: actualId[key] for key in self.keyDictkeyDict.keys()}
157  path = mapper._mapActualToPath(self.templatetemplate, actualId)
158  if os.path.isabs(path):
159  raise RuntimeError("Mapped path should not be absolute.")
160  if not write:
161  # This allows mapped files to be compressed, ending in .gz or .fz,
162  # without any indication from the policy that the file should be
163  # compressed, easily allowing repositories to contain a combination
164  # of comporessed and not-compressed files.
165  # If needed we can add a policy flag to allow compressed files or
166  # not, and perhaps a list of allowed extensions that may exist
167  # at the end of the template.
168  for ext in (None, '.gz', '.fz'):
169  if ext and path.endswith(ext):
170  continue # if the path already ends with the extension
171  extPath = path + ext if ext else path
172  newPath = self.rootStoragerootStorage.instanceSearch(extPath)
173  if newPath:
174  path = newPath
175  break
176  assert path, "Fully-qualified filename is empty."
177 
178  addFunc = "add_" + self.datasetTypedatasetType # Name of method for additionalData
179  if hasattr(mapper, addFunc):
180  addFunc = getattr(mapper, addFunc)
181  additionalData = addFunc(self.datasetTypedatasetType, actualId)
182  assert isinstance(additionalData, PropertySet), \
183  "Bad type for returned data: %s" % (type(additionalData),)
184  else:
185  additionalData = None
186 
187  return ButlerLocation(pythonType=self.pythonpython, cppType=self.persistablepersistable, storageName=self.storagestorage,
188  locationList=path, dataId=actualId.copy(), mapper=mapper,
189  storage=self.rootStoragerootStorage, usedDataId=usedDataId, datasetType=self.datasetTypedatasetType,
190  additionalData=additionalData)
191 
192  def lookup(self, properties, dataId):
193  """Look up properties for in a metadata registry given a partial
194  dataset identifier.
195 
196  Parameters
197  ----------
198  properties : `list` of `str`
199  What to look up.
200  dataId : `dict`
201  Dataset identifier
202 
203  Returns
204  -------
205  `list` of `tuple`
206  Values of properties.
207  """
208  if self.registryregistry is None:
209  raise RuntimeError("No registry for lookup")
210 
211  skyMapKeys = ("tract", "patch")
212 
213  where = []
214  values = []
215 
216  # Prepare to remove skymap entries from properties list. These must
217  # be in the data ID, so we store which ones we're removing and create
218  # an OrderedDict that tells us where to re-insert them. That maps the
219  # name of the property to either its index in the properties list
220  # *after* the skymap ones have been removed (for entries that aren't
221  # skymap ones) or the value from the data ID (for those that are).
222  removed = set()
223  substitutions = OrderedDict()
224  index = 0
225  properties = list(properties) # don't modify the original list
226  for p in properties:
227  if p in skyMapKeys:
228  try:
229  substitutions[p] = dataId[p]
230  removed.add(p)
231  except KeyError:
232  raise RuntimeError(
233  "Cannot look up skymap key '%s'; it must be explicitly included in the data ID" % p
234  )
235  else:
236  substitutions[p] = index
237  index += 1
238  # Can't actually remove while iterating above, so we do it here.
239  for p in removed:
240  properties.remove(p)
241 
242  fastPath = True
243  for p in properties:
244  if p not in ('filter', 'expTime', 'taiObs'):
245  fastPath = False
246  break
247  if fastPath and 'visit' in dataId and "raw" in self.tablestables:
248  lookupDataId = {'visit': dataId['visit']}
249  result = self.registryregistry.lookup(properties, 'raw_visit', lookupDataId, template=self.templatetemplate)
250  else:
251  if dataId is not None:
252  for k, v in dataId.items():
253  if self.columnscolumns and k not in self.columnscolumns:
254  continue
255  if k == self.obsTimeNameobsTimeName:
256  continue
257  if k in skyMapKeys:
258  continue
259  where.append((k, '?'))
260  values.append(v)
261  lookupDataId = {k[0]: v for k, v in zip(where, values)}
262  if self.rangerange:
263  # format of self.range is
264  # ('?', isBetween-lowKey, isBetween-highKey)
265  # here we transform that to {(lowKey, highKey): value}
266  lookupDataId[(self.rangerange[1], self.rangerange[2])] = dataId[self.obsTimeNameobsTimeName]
267  result = self.registryregistry.lookup(properties, self.tablestables, lookupDataId, template=self.templatetemplate)
268  if not removed:
269  return result
270  # Iterate over the query results, re-inserting the skymap entries.
271  result = [tuple(v if k in removed else item[v] for k, v in substitutions.items())
272  for item in result]
273  return result
274 
275  def have(self, properties, dataId):
276  """Returns whether the provided data identifier has all
277  the properties in the provided list.
278 
279  Parameters
280  ----------
281  properties : `list of `str`
282  Properties required.
283  dataId : `dict`
284  Dataset identifier.
285 
286  Returns
287  -------
288  bool
289  True if all properties are present.
290  """
291  for prop in properties:
292  if prop not in dataId:
293  return False
294  return True
295 
296  def need(self, properties, dataId):
297  """Ensures all properties in the provided list are present in
298  the data identifier, looking them up as needed. This is only
299  possible for the case where the data identifies a single
300  exposure.
301 
302  Parameters
303  ----------
304  properties : `list` of `str`
305  Properties required.
306  dataId : `dict`
307  Partial dataset identifier
308 
309  Returns
310  -------
311  `dict`
312  Copy of dataset identifier with enhanced values.
313  """
314  newId = dataId.copy()
315  newProps = [] # Properties we don't already have
316  for prop in properties:
317  if prop not in newId:
318  newProps.append(prop)
319  if len(newProps) == 0:
320  return newId
321 
322  lookups = self.lookuplookup(newProps, newId)
323  if len(lookups) != 1:
324  raise NoResults("No unique lookup for %s from %s: %d matches" %
325  (newProps, newId, len(lookups)),
326  self.datasetTypedatasetType, dataId)
327  for i, prop in enumerate(newProps):
328  newId[prop] = lookups[0][i]
329  return newId
330 
331 
332 def _formatMap(ch, k, datasetType):
333  """Convert a format character into a Python type."""
334  if ch in "diouxX":
335  return int
336  elif ch in "eEfFgG":
337  return float
338  elif ch in "crs":
339  return str
340  else:
341  raise RuntimeError("Unexpected format specifier %s"
342  " for field %s in template for dataset %s" %
343  (ch, k, datasetType))
344 
345 
347  """ImageMapping is a Mapping subclass for non-camera images.
348 
349  Parameters
350  ----------
351  datasetType : `str`
352  Butler dataset type to be mapped.
353  policy : `daf_persistence.Policy`
354  Mapping Policy.
355  registry : `lsst.obs.base.Registry`
356  Registry for metadata lookups
357  root : `str`
358  Path of root directory
359  """
360 
361  def __init__(self, datasetType, policy, registry, root, **kwargs):
362  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
363  self.columnscolumnscolumns = policy.asArray('columns') if 'columns' in policy else None
364 
365 
367  """ExposureMapping is a Mapping subclass for normal exposures.
368 
369  Parameters
370  ----------
371  datasetType : `str`
372  Butler dataset type to be mapped.
373  policy : `daf_persistence.Policy`
374  Mapping Policy.
375  registry : `lsst.obs.base.Registry`
376  Registry for metadata lookups
377  root : `str`
378  Path of root directory
379  """
380 
381  def __init__(self, datasetType, policy, registry, root, **kwargs):
382  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
383  self.columnscolumnscolumns = policy.asArray('columns') if 'columns' in policy else None
384 
385  def standardize(self, mapper, item, dataId):
386  return mapper._standardizeExposure(self, item, dataId)
387 
388 
390  """CalibrationMapping is a Mapping subclass for calibration-type products.
391 
392  The difference is that data properties in the query or template
393  can be looked up using a reference Mapping in addition to this one.
394 
395  CalibrationMapping Policies can contain the following:
396 
397  reference (string, optional)
398  a list of tables for finding missing dataset
399  identifier components (including the observation time, if a validity
400  range is required) in the exposure registry; note that the "tables"
401  entry refers to the calibration registry
402 
403  refCols (string, optional)
404  a list of dataset properties required from the
405  reference tables for lookups in the calibration registry
406 
407  validRange (bool)
408  true if the calibration dataset has a validity range
409  specified by a column in the tables of the reference dataset in the
410  exposure registry) and two columns in the tables of this calibration
411  dataset in the calibration registry)
412 
413  obsTimeName (string, optional)
414  the name of the column in the reference
415  dataset tables containing the observation time (default "taiObs")
416 
417  validStartName (string, optional)
418  the name of the column in the
419  calibration dataset tables containing the start of the validity range
420  (default "validStart")
421 
422  validEndName (string, optional)
423  the name of the column in the
424  calibration dataset tables containing the end of the validity range
425  (default "validEnd")
426 
427  Parameters
428  ----------
429  datasetType : `str`
430  Butler dataset type to be mapped.
431  policy : `daf_persistence.Policy`
432  Mapping Policy.
433  registry : `lsst.obs.base.Registry`
434  Registry for metadata lookups
435  calibRegistry : `lsst.obs.base.Registry`
436  Registry for calibration metadata lookups.
437  calibRoot : `str`
438  Path of calibration root directory.
439  dataRoot : `str`
440  Path of data root directory; used for outputs only.
441  """
442 
443  def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, **kwargs):
444  Mapping.__init__(self, datasetType, policy, calibRegistry, calibRoot, **kwargs)
445  self.referencereference = policy.asArray("reference") if "reference" in policy else None
446  self.refColsrefCols = policy.asArray("refCols") if "refCols" in policy else None
447  self.refRegistryrefRegistry = registry
448  self.dataRootdataRoot = dataRoot
449  if "validRange" in policy and policy["validRange"]:
450  self.rangerangerange = ("?", policy["validStartName"], policy["validEndName"])
451  if "columns" in policy:
452  self.columnscolumnscolumns = policy.asArray("columns")
453  if "filter" in policy:
454  self.setFiltersetFilter = policy["filter"]
455  self.metadataKeysmetadataKeys = None
456  if "metadataKey" in policy:
457  self.metadataKeysmetadataKeys = policy.asArray("metadataKey")
458 
459  def map(self, mapper, dataId, write=False):
460  location = Mapping.map(self, mapper, dataId, write=write)
461  # Want outputs to be in the output directory
462  if write and self.dataRootdataRoot:
463  location.storage = self.dataRootdataRoot
464  return location
465 
466  def lookup(self, properties, dataId):
467  """Look up properties for in a metadata registry given a partial
468  dataset identifier.
469 
470  Parameters
471  ----------
472  properties : `list` of `str`
473  Properties to look up.
474  dataId : `dict`
475  Dataset identifier.
476 
477  Returns
478  -------
479  `list` of `tuple`
480  Values of properties.
481  """
482 
483 # Either look up taiObs in reference and then all in calibRegistry
484 # Or look up all in registry
485 
486  newId = dataId.copy()
487  if self.referencereference is not None:
488  where = []
489  values = []
490  for k, v in dataId.items():
491  if self.refColsrefCols and k not in self.refColsrefCols:
492  continue
493  where.append(k)
494  values.append(v)
495 
496  # Columns we need from the regular registry
497  if self.columnscolumnscolumns is not None:
498  columns = set(self.columnscolumnscolumns)
499  for k in dataId.keys():
500  columns.discard(k)
501  else:
502  columns = set(properties)
503 
504  if not columns:
505  # Nothing to lookup in reference registry; continue with calib
506  # registry
507  return Mapping.lookup(self, properties, newId)
508 
509  lookupDataId = dict(zip(where, values))
510  lookups = self.refRegistryrefRegistry.lookup(columns, self.referencereference, lookupDataId)
511  if len(lookups) != 1:
512  raise RuntimeError("No unique lookup for %s from %s: %d matches" %
513  (columns, dataId, len(lookups)))
514  if columns == set(properties):
515  # Have everything we need
516  return lookups
517  for i, prop in enumerate(columns):
518  newId[prop] = lookups[0][i]
519  return Mapping.lookup(self, properties, newId)
520 
521  def standardize(self, mapper, item, dataId):
522  """Default standardization function for calibration datasets.
523 
524  If the item is of a type that should be standardized, the base class
525  ``standardizeExposure`` method is called, otherwise the item is
526  returned unmodified.
527 
528  Parameters
529  ----------
530  mapping : `lsst.obs.base.Mapping`
531  Mapping object to pass through.
532  item : object
533  Will be standardized if of type lsst.afw.image.Exposure,
534  lsst.afw.image.DecoratedImage, lsst.afw.image.Image
535  or lsst.afw.image.MaskedImage
536 
537  dataId : `dict`
538  Dataset identifier
539 
540  Returns
541  -------
542  `lsst.afw.image.Exposure` or item
543  The standardized object.
544  """
545  if issubclass(doImport(self.pythonpython), (Exposure, MaskedImage, Image, DecoratedImage)):
546  return mapper._standardizeExposure(self, item, dataId, filter=self.setFiltersetFilter)
547  return item
548 
549 
551  """DatasetMapping is a Mapping subclass for non-Exposure datasets that can
552  be retrieved by the standard daf_persistence mechanism.
553 
554  The differences are that the Storage type must be specified and no
555  Exposure standardization is performed.
556 
557  The "storage" entry in the Policy is mandatory; the "tables" entry is
558  optional; no "level" entry is allowed.
559 
560  Parameters
561  ----------
562  datasetType : `str`
563  Butler dataset type to be mapped.
564  policy : `daf_persistence.Policy`
565  Mapping Policy.
566  registry : `lsst.obs.base.Registry`
567  Registry for metadata lookups
568  root : `str`
569  Path of root directory
570  """
571 
572  def __init__(self, datasetType, policy, registry, root, **kwargs):
573  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
574  self.storagestoragestorage = policy["storage"] # Storage type
def lookup(self, properties, dataId)
Definition: mapping.py:466
def map(self, mapper, dataId, write=False)
Definition: mapping.py:459
def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, **kwargs)
Definition: mapping.py:443
def standardize(self, mapper, item, dataId)
Definition: mapping.py:521
def __init__(self, datasetType, policy, registry, root, **kwargs)
Definition: mapping.py:572
def standardize(self, mapper, item, dataId)
Definition: mapping.py:385
def __init__(self, datasetType, policy, registry, root, **kwargs)
Definition: mapping.py:381
def __init__(self, datasetType, policy, registry, root, **kwargs)
Definition: mapping.py:361
def lookup(self, properties, dataId)
Definition: mapping.py:192
def __init__(self, datasetType, policy, registry, rootStorage, provided=None)
Definition: mapping.py:84
def have(self, properties, dataId)
Definition: mapping.py:275
def map(self, mapper, dataId, write=False)
Definition: mapping.py:140
def need(self, properties, dataId)
Definition: mapping.py:296
Backwards-compatibility support for depersisting the old Calib (FluxMag0/FluxMag0Err) objects.
table::Key< int > type
Definition: Detector.cc:163
daf::base::PropertyList * list
Definition: fits.cc:913
daf::base::PropertySet * set
Definition: fits.cc:912