LSSTApplications  17.0+124,17.0+14,17.0+73,18.0.0+37,18.0.0+80,18.0.0-4-g68ffd23+4,18.1.0-1-g0001055+12,18.1.0-1-g03d53ef+5,18.1.0-1-g1349e88+55,18.1.0-1-g2505f39+44,18.1.0-1-g5315e5e+4,18.1.0-1-g5e4b7ea+14,18.1.0-1-g7e8fceb+4,18.1.0-1-g85f8cd4+48,18.1.0-1-g8ff0b9f+4,18.1.0-1-ga2c679d+1,18.1.0-1-gd55f500+35,18.1.0-10-gb58edde+2,18.1.0-11-g0997b02+4,18.1.0-13-gfe4edf0b+12,18.1.0-14-g259bd21+21,18.1.0-19-gdb69f3f+2,18.1.0-2-g5f9922c+24,18.1.0-2-gd3b74e5+11,18.1.0-2-gfbf3545+32,18.1.0-26-g728bddb4+5,18.1.0-27-g6ff7ca9+2,18.1.0-3-g52aa583+25,18.1.0-3-g8ea57af+9,18.1.0-3-gb69f684+42,18.1.0-3-gfcaddf3+6,18.1.0-32-gd8786685a,18.1.0-4-gf3f9b77+6,18.1.0-5-g1dd662b+2,18.1.0-5-g6dbcb01+41,18.1.0-6-gae77429+3,18.1.0-7-g9d75d83+9,18.1.0-7-gae09a6d+30,18.1.0-9-gc381ef5+4,w.2019.45
LSSTDataManagementBasePackage
mapping.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008, 2009, 2010 LSST Corporation.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 
23 from collections import OrderedDict
24 import os
25 import re
26 from lsst.daf.base import PropertySet
27 from lsst.daf.persistence import ButlerLocation, NoResults
28 from lsst.utils import doImport
29 from lsst.afw.image import Exposure, MaskedImage, Image, DecoratedImage
30 
31 __all__ = ["Mapping", "ImageMapping", "ExposureMapping", "CalibrationMapping", "DatasetMapping"]
32 
33 
34 class Mapping(object):
35 
36  """Mapping is a base class for all mappings. Mappings are used by
37  the Mapper to map (determine a path to some data given some
38  identifiers) and standardize (convert data into some standard
39  format or type) data, and to query the associated registry to see
40  what data is available.
41 
42  Subclasses must specify self.storage or else override self.map().
43 
44  Public methods: lookup, have, need, getKeys, map
45 
46  Mappings are specified mainly by policy. A Mapping policy should
47  consist of:
48 
49  template (string): a Python string providing the filename for that
50  particular dataset type based on some data identifiers. In the
51  case of redundancy in the path (e.g., file uniquely specified by
52  the exposure number, but filter in the path), the
53  redundant/dependent identifiers can be looked up in the registry.
54 
55  python (string): the Python type for the retrieved data (e.g.
56  lsst.afw.image.ExposureF)
57 
58  persistable (string): the Persistable registration for the on-disk data
59  (e.g. ImageU)
60 
61  storage (string, optional): Storage type for this dataset type (e.g.
62  "FitsStorage")
63 
64  level (string, optional): the level in the camera hierarchy at which the
65  data is stored (Amp, Ccd or skyTile), if relevant
66 
67  tables (string, optional): a whitespace-delimited list of tables in the
68  registry that can be NATURAL JOIN-ed to look up additional
69  information.
70 
71  Parameters
72  ----------
73  datasetType : `str`
74  Butler dataset type to be mapped.
75  policy : `daf_persistence.Policy`
76  Mapping Policy.
77  registry : `lsst.obs.base.Registry`
78  Registry for metadata lookups.
79  rootStorage : Storage subclass instance
80  Interface to persisted repository data.
81  provided : `list` of `str`
82  Keys provided by the mapper.
83  """
84 
85  def __init__(self, datasetType, policy, registry, rootStorage, provided=None):
86 
87  if policy is None:
88  raise RuntimeError("No policy provided for mapping")
89 
90  self.datasetType = datasetType
91  self.registry = registry
92  self.rootStorage = rootStorage
93 
94  self._template = policy['template'] # Template path
95  # in most cases, the template can not be used if it is empty, and is accessed via a property that will
96  # raise if it is used while `not self._template`. In this case we *do* allow it to be empty, for the
97  # purpose of fetching the key dict so that the mapping can be constructed, so that it can raise if
98  # it's invalid. I know it's a little odd, but it allows this template check to be introduced without a
99  # major refactor.
100  if self._template:
101  self.keyDict = dict([
102  (k, _formatMap(v, k, datasetType))
103  for k, v in
104  re.findall(r'\%\((\w+)\).*?([diouxXeEfFgGcrs])', self.template)
105  ])
106  else:
107  self.keyDict = {}
108  if provided is not None:
109  for p in provided:
110  if p in self.keyDict:
111  del self.keyDict[p]
112  self.python = policy['python'] # Python type
113  self.persistable = policy['persistable'] # Persistable type
114  self.storage = policy['storage']
115  if 'level' in policy:
116  self.level = policy['level'] # Level in camera hierarchy
117  if 'tables' in policy:
118  self.tables = policy.asArray('tables')
119  else:
120  self.tables = None
121  self.range = None
122  self.columns = None
123  self.obsTimeName = policy['obsTimeName'] if 'obsTimeName' in policy else None
124  self.recipe = policy['recipe'] if 'recipe' in policy else 'default'
125 
126  @property
127  def template(self):
128  if self._template: # template must not be an empty string or None
129  return self._template
130  else:
131  raise RuntimeError("Template is not defined for the {} dataset type, ".format(self.datasetType) +
132  "it must be set before it can be used.")
133 
134  def keys(self):
135  """Return the dict of keys and value types required for this mapping."""
136  return self.keyDict
137 
138  def map(self, mapper, dataId, write=False):
139  """Standard implementation of map function.
140 
141  Parameters
142  ----------
143  mapper: `lsst.daf.persistence.Mapper`
144  Object to be mapped.
145  dataId: `dict`
146  Dataset identifier.
147 
148  Returns
149  -------
150  lsst.daf.persistence.ButlerLocation
151  Location of object that was mapped.
152  """
153  actualId = self.need(iter(self.keyDict.keys()), dataId)
154  usedDataId = {key: actualId[key] for key in self.keyDict.keys()}
155  path = mapper._mapActualToPath(self.template, actualId)
156  if os.path.isabs(path):
157  raise RuntimeError("Mapped path should not be absolute.")
158  if not write:
159  # This allows mapped files to be compressed, ending in .gz or .fz, without any indication from the
160  # policy that the file should be compressed, easily allowing repositories to contain a combination
161  # of comporessed and not-compressed files.
162  # If needed we can add a policy flag to allow compressed files or not, and perhaps a list of
163  # allowed extensions that may exist at the end of the template.
164  for ext in (None, '.gz', '.fz'):
165  if ext and path.endswith(ext):
166  continue # if the path already ends with the extension
167  extPath = path + ext if ext else path
168  newPath = self.rootStorage.instanceSearch(extPath)
169  if newPath:
170  path = newPath
171  break
172  assert path, "Fully-qualified filename is empty."
173 
174  addFunc = "add_" + self.datasetType # Name of method for additionalData
175  if hasattr(mapper, addFunc):
176  addFunc = getattr(mapper, addFunc)
177  additionalData = addFunc(self.datasetType, actualId)
178  assert isinstance(additionalData, PropertySet), \
179  "Bad type for returned data: %s" (type(additionalData),)
180  else:
181  additionalData = None
182 
183  return ButlerLocation(pythonType=self.python, cppType=self.persistable, storageName=self.storage,
184  locationList=path, dataId=actualId.copy(), mapper=mapper,
185  storage=self.rootStorage, usedDataId=usedDataId, datasetType=self.datasetType,
186  additionalData=additionalData)
187 
188  def lookup(self, properties, dataId):
189  """Look up properties for in a metadata registry given a partial
190  dataset identifier.
191 
192  Parameters
193  ----------
194  properties : `list` of `str`
195  What to look up.
196  dataId : `dict`
197  Dataset identifier
198 
199  Returns
200  -------
201  `list` of `tuple`
202  Values of properties.
203  """
204  if self.registry is None:
205  raise RuntimeError("No registry for lookup")
206 
207  skyMapKeys = ("tract", "patch")
208 
209  where = []
210  values = []
211 
212  # Prepare to remove skymap entries from properties list. These must
213  # be in the data ID, so we store which ones we're removing and create
214  # an OrderedDict that tells us where to re-insert them. That maps the
215  # name of the property to either its index in the properties list
216  # *after* the skymap ones have been removed (for entries that aren't
217  # skymap ones) or the value from the data ID (for those that are).
218  removed = set()
219  substitutions = OrderedDict()
220  index = 0
221  properties = list(properties) # don't modify the original list
222  for p in properties:
223  if p in skyMapKeys:
224  try:
225  substitutions[p] = dataId[p]
226  removed.add(p)
227  except KeyError:
228  raise RuntimeError(
229  "Cannot look up skymap key '%s'; it must be explicitly included in the data ID" % p
230  )
231  else:
232  substitutions[p] = index
233  index += 1
234  # Can't actually remove while iterating above, so we do it here.
235  for p in removed:
236  properties.remove(p)
237 
238  fastPath = True
239  for p in properties:
240  if p not in ('filter', 'expTime', 'taiObs'):
241  fastPath = False
242  break
243  if fastPath and 'visit' in dataId and "raw" in self.tables:
244  lookupDataId = {'visit': dataId['visit']}
245  result = self.registry.lookup(properties, 'raw_visit', lookupDataId, template=self.template)
246  else:
247  if dataId is not None:
248  for k, v in dataId.items():
249  if self.columns and k not in self.columns:
250  continue
251  if k == self.obsTimeName:
252  continue
253  if k in skyMapKeys:
254  continue
255  where.append((k, '?'))
256  values.append(v)
257  lookupDataId = {k[0]: v for k, v in zip(where, values)}
258  if self.range:
259  # format of self.range is ('?', isBetween-lowKey, isBetween-highKey)
260  # here we transform that to {(lowKey, highKey): value}
261  lookupDataId[(self.range[1], self.range[2])] = dataId[self.obsTimeName]
262  result = self.registry.lookup(properties, self.tables, lookupDataId, template=self.template)
263  if not removed:
264  return result
265  # Iterate over the query results, re-inserting the skymap entries.
266  result = [tuple(v if k in removed else item[v] for k, v in substitutions.items())
267  for item in result]
268  return result
269 
270  def have(self, properties, dataId):
271  """Returns whether the provided data identifier has all
272  the properties in the provided list.
273 
274  Parameters
275  ----------
276  properties : `list of `str`
277  Properties required.
278  dataId : `dict`
279  Dataset identifier.
280 
281  Returns
282  -------
283  bool
284  True if all properties are present.
285  """
286  for prop in properties:
287  if prop not in dataId:
288  return False
289  return True
290 
291  def need(self, properties, dataId):
292  """Ensures all properties in the provided list are present in
293  the data identifier, looking them up as needed. This is only
294  possible for the case where the data identifies a single
295  exposure.
296 
297  Parameters
298  ----------
299  properties : `list` of `str`
300  Properties required.
301  dataId : `dict`
302  Partial dataset identifier
303 
304  Returns
305  -------
306  `dict`
307  Copy of dataset identifier with enhanced values.
308  """
309  newId = dataId.copy()
310  newProps = [] # Properties we don't already have
311  for prop in properties:
312  if prop not in newId:
313  newProps.append(prop)
314  if len(newProps) == 0:
315  return newId
316 
317  lookups = self.lookup(newProps, newId)
318  if len(lookups) != 1:
319  raise NoResults("No unique lookup for %s from %s: %d matches" %
320  (newProps, newId, len(lookups)),
321  self.datasetType, dataId)
322  for i, prop in enumerate(newProps):
323  newId[prop] = lookups[0][i]
324  return newId
325 
326 
327 def _formatMap(ch, k, datasetType):
328  """Convert a format character into a Python type."""
329  if ch in "diouxX":
330  return int
331  elif ch in "eEfFgG":
332  return float
333  elif ch in "crs":
334  return str
335  else:
336  raise RuntimeError("Unexpected format specifier %s"
337  " for field %s in template for dataset %s" %
338  (ch, k, datasetType))
339 
340 
342  """ImageMapping is a Mapping subclass for non-camera images.
343 
344  Parameters
345  ----------
346  datasetType : `str`
347  Butler dataset type to be mapped.
348  policy : `daf_persistence.Policy`
349  Mapping Policy.
350  registry : `lsst.obs.base.Registry`
351  Registry for metadata lookups
352  root : `str`
353  Path of root directory
354  """
355 
356  def __init__(self, datasetType, policy, registry, root, **kwargs):
357  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
358  self.columns = policy.asArray('columns') if 'columns' in policy else None
359 
360 
362  """ExposureMapping is a Mapping subclass for normal exposures.
363 
364  Parameters
365  ----------
366  datasetType : `str`
367  Butler dataset type to be mapped.
368  policy : `daf_persistence.Policy`
369  Mapping Policy.
370  registry : `lsst.obs.base.Registry`
371  Registry for metadata lookups
372  root : `str`
373  Path of root directory
374  """
375 
376  def __init__(self, datasetType, policy, registry, root, **kwargs):
377  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
378  self.columns = policy.asArray('columns') if 'columns' in policy else None
379 
380  def standardize(self, mapper, item, dataId):
381  return mapper._standardizeExposure(self, item, dataId)
382 
383 
385  """CalibrationMapping is a Mapping subclass for calibration-type products.
386 
387  The difference is that data properties in the query or template
388  can be looked up using a reference Mapping in addition to this one.
389 
390  CalibrationMapping Policies can contain the following:
391 
392  reference (string, optional)
393  a list of tables for finding missing dataset
394  identifier components (including the observation time, if a validity range
395  is required) in the exposure registry; note that the "tables" entry refers
396  to the calibration registry
397 
398  refCols (string, optional)
399  a list of dataset properties required from the
400  reference tables for lookups in the calibration registry
401 
402  validRange (bool)
403  true if the calibration dataset has a validity range
404  specified by a column in the tables of the reference dataset in the
405  exposure registry) and two columns in the tables of this calibration
406  dataset in the calibration registry)
407 
408  obsTimeName (string, optional)
409  the name of the column in the reference
410  dataset tables containing the observation time (default "taiObs")
411 
412  validStartName (string, optional)
413  the name of the column in the
414  calibration dataset tables containing the start of the validity range
415  (default "validStart")
416 
417  validEndName (string, optional)
418  the name of the column in the
419  calibration dataset tables containing the end of the validity range
420  (default "validEnd")
421 
422  Parameters
423  ----------
424  datasetType : `str`
425  Butler dataset type to be mapped.
426  policy : `daf_persistence.Policy`
427  Mapping Policy.
428  registry : `lsst.obs.base.Registry`
429  Registry for metadata lookups
430  calibRegistry : `lsst.obs.base.Registry`
431  Registry for calibration metadata lookups.
432  calibRoot : `str`
433  Path of calibration root directory.
434  dataRoot : `str`
435  Path of data root directory; used for outputs only.
436  """
437 
438  def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, **kwargs):
439  Mapping.__init__(self, datasetType, policy, calibRegistry, calibRoot, **kwargs)
440  self.reference = policy.asArray("reference") if "reference" in policy else None
441  self.refCols = policy.asArray("refCols") if "refCols" in policy else None
442  self.refRegistry = registry
443  self.dataRoot = dataRoot
444  if "validRange" in policy and policy["validRange"]:
445  self.range = ("?", policy["validStartName"], policy["validEndName"])
446  if "columns" in policy:
447  self.columns = policy.asArray("columns")
448  if "filter" in policy:
449  self.setFilter = policy["filter"]
450  self.metadataKeys = None
451  if "metadataKey" in policy:
452  self.metadataKeys = policy.asArray("metadataKey")
453 
454  def map(self, mapper, dataId, write=False):
455  location = Mapping.map(self, mapper, dataId, write=write)
456  # Want outputs to be in the output directory
457  if write and self.dataRoot:
458  location.storage = self.dataRoot
459  return location
460 
461  def lookup(self, properties, dataId):
462  """Look up properties for in a metadata registry given a partial
463  dataset identifier.
464 
465  Parameters
466  ----------
467  properties : `list` of `str`
468  Properties to look up.
469  dataId : `dict`
470  Dataset identifier.
471 
472  Returns
473  -------
474  `list` of `tuple`
475  Values of properties.
476  """
477 
478 # Either look up taiObs in reference and then all in calibRegistry
479 # Or look up all in registry
480 
481  newId = dataId.copy()
482  if self.reference is not None:
483  where = []
484  values = []
485  for k, v in dataId.items():
486  if self.refCols and k not in self.refCols:
487  continue
488  where.append(k)
489  values.append(v)
490 
491  # Columns we need from the regular registry
492  if self.columns is not None:
493  columns = set(self.columns)
494  for k in dataId.keys():
495  columns.discard(k)
496  else:
497  columns = set(properties)
498 
499  if not columns:
500  # Nothing to lookup in reference registry; continue with calib registry
501  return Mapping.lookup(self, properties, newId)
502 
503  lookupDataId = dict(zip(where, values))
504  lookups = self.refRegistry.lookup(columns, self.reference, lookupDataId)
505  if len(lookups) != 1:
506  raise RuntimeError("No unique lookup for %s from %s: %d matches" %
507  (columns, dataId, len(lookups)))
508  if columns == set(properties):
509  # Have everything we need
510  return lookups
511  for i, prop in enumerate(columns):
512  newId[prop] = lookups[0][i]
513  return Mapping.lookup(self, properties, newId)
514 
515  def standardize(self, mapper, item, dataId):
516  """Default standardization function for calibration datasets.
517 
518  If the item is of a type that should be standardized, the base class
519  ``standardizeExposure`` method is called, otherwise the item is returned
520  unmodified.
521 
522  Parameters
523  ----------
524  mapping : `lsst.obs.base.Mapping`
525  Mapping object to pass through.
526  item : object
527  Will be standardized if of type lsst.afw.image.Exposure,
528  lsst.afw.image.DecoratedImage, lsst.afw.image.Image
529  or lsst.afw.image.MaskedImage
530 
531  dataId : `dict`
532  Dataset identifier
533 
534  Returns
535  -------
536  `lsst.afw.image.Exposure` or item
537  The standardized object.
538  """
539  if issubclass(doImport(self.python), (Exposure, MaskedImage, Image, DecoratedImage)):
540  return mapper._standardizeExposure(self, item, dataId, filter=self.setFilter)
541  return item
542 
543 
545  """DatasetMapping is a Mapping subclass for non-Exposure datasets that can
546  be retrieved by the standard daf_persistence mechanism.
547 
548  The differences are that the Storage type must be specified and no
549  Exposure standardization is performed.
550 
551  The "storage" entry in the Policy is mandatory; the "tables" entry is
552  optional; no "level" entry is allowed.
553 
554  Parameters
555  ----------
556  datasetType : `str`
557  Butler dataset type to be mapped.
558  policy : `daf_persistence.Policy`
559  Mapping Policy.
560  registry : `lsst.obs.base.Registry`
561  Registry for metadata lookups
562  root : `str`
563  Path of root directory
564  """
565 
566  def __init__(self, datasetType, policy, registry, root, **kwargs):
567  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
568  self.storage = policy["storage"] # Storage type
def format(config, name=None, writeSourceLine=True, prefix="", verbose=False)
Definition: history.py:174
def __init__(self, datasetType, policy, registry, root, kwargs)
Definition: mapping.py:376
def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, kwargs)
Definition: mapping.py:438
daf::base::PropertySet * set
Definition: fits.cc:902
def __init__(self, datasetType, policy, registry, root, kwargs)
Definition: mapping.py:356
def standardize(self, mapper, item, dataId)
Definition: mapping.py:515
table::Key< int > type
Definition: Detector.cc:163
def map(self, mapper, dataId, write=False)
Definition: mapping.py:454
def have(self, properties, dataId)
Definition: mapping.py:270
def standardize(self, mapper, item, dataId)
Definition: mapping.py:380
def doImport(pythonType)
Definition: utils.py:106
def need(self, properties, dataId)
Definition: mapping.py:291
def lookup(self, properties, dataId)
Definition: mapping.py:461
def __init__(self, datasetType, policy, registry, root, kwargs)
Definition: mapping.py:566
def lookup(self, properties, dataId)
Definition: mapping.py:188
def __init__(self, datasetType, policy, registry, rootStorage, provided=None)
Definition: mapping.py:85
Backwards-compatibility support for depersisting the old Calib (FluxMag0/FluxMag0Err) objects...
daf::base::PropertyList * list
Definition: fits.cc:903
def map(self, mapper, dataId, write=False)
Definition: mapping.py:138