LSSTApplications  10.0+286,10.0+36,10.0+46,10.0-2-g4f67435,10.1+152,10.1+37,11.0,11.0+1,11.0-1-g47edd16,11.0-1-g60db491,11.0-1-g7418c06,11.0-2-g04d2804,11.0-2-g68503cd,11.0-2-g818369d,11.0-2-gb8b8ce7
LSSTDataManagementBasePackage
mapping.py
Go to the documentation of this file.
1 #!/bin/env python
2 #
3 # LSST Data Management System
4 # Copyright 2008, 2009, 2010 LSST Corporation.
5 #
6 # This product includes software developed by the
7 # LSST Project (http://www.lsst.org/).
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the LSST License Statement and
20 # the GNU General Public License along with this program. If not,
21 # see <http://www.lsstcorp.org/LegalNotices/>.
22 #
23 
24 import os
25 import re
26 from lsst.daf.persistence import ButlerLocation
27 import lsst.pex.policy as pexPolicy
28 
29 """This module defines the Mapping base class."""
30 
31 class Mapping(object):
32 
33  """Mapping is a base class for all mappings. Mappings are used by
34  the Mapper to map (determine a path to some data given some
35  identifiers) and standardize (convert data into some standard
36  format or type) data, and to query the associated registry to see
37  what data is available.
38 
39  Subclasses must specify self.storage or else override self.map().
40 
41  Public methods: lookup, have, need, getKeys, map
42 
43  Mappings are specified mainly by policy. A Mapping policy should
44  consist of:
45 
46  template (string): a Python string providing the filename for that
47  particular dataset type based on some data identifiers. In the
48  case of redundancy in the path (e.g., file uniquely specified by
49  the exposure number, but filter in the path), the
50  redundant/dependent identifiers can be looked up in the registry.
51 
52  python (string): the Python type for the retrieved data (e.g.
53  lsst.afw.image.ExposureF)
54 
55  persistable (string): the Persistable registration for the on-disk data
56  (e.g. ImageU)
57 
58  storage (string, optional): Storage type for this dataset type (e.g.
59  "BoostStorage")
60 
61  level (string, optional): the level in the camera hierarchy at which the
62  data is stored (Amp, Ccd or skyTile), if relevant
63 
64  tables (string, optional): a whitespace-delimited list of tables in the
65  registry that can be NATURAL JOIN-ed to look up additional
66  information. """
67 
68  def __init__(self, datasetType, policy, registry, root, provided=None):
69  """Constructor for Mapping class.
70  @param datasetType (string)
71  @param policy (lsst.pex.policy.Policy) Mapping policy
72  @param registry (lsst.daf.butlerUtils.Registry) Registry for metadata lookups
73  @param root (string) Path of root directory
74  @param provided (list of strings) Keys provided by the mapper
75  """
76 
77  if policy is None:
78  raise RuntimeError, "No policy provided for mapping"
79 
80  self.datasetType = datasetType
81  self.registry = registry
82  self.root = root
83 
84  self.template = policy.getString("template") # Template path
85  self.keyDict = dict([
86  (k, _formatMap(v, k, datasetType))
87  for k, v in
88  re.findall(r'\%\((\w+)\).*?([diouxXeEfFgGcrs])', self.template)
89  ])
90  if provided is not None:
91  for p in provided:
92  if p in self.keyDict:
93  del self.keyDict[p]
94  self.python = policy.getString("python") # Python type
95  self.persistable = policy.getString("persistable") # Persistable type
96  self.storage = policy.getString("storage")
97  if policy.exists("level"):
98  self.level = policy.getString("level") # Level in camera hierarchy
99  if policy.exists("tables"):
100  self.tables = policy.getStringArray("tables")
101  else:
102  self.tables = None
103  self.range = None
104  self.columns = None
105  self.obsTimeName = policy.getString("obsTimeName") \
106  if policy.exists("obsTimeName") else None
107 
108  def keys(self):
109  """Return the dict of keys and value types required for this mapping."""
110  return self.keyDict
111 
112  def map(self, mapper, dataId, write=False):
113  """Standard implementation of map function.
114  @param mapper (lsst.daf.persistence.Mapper)
115  @param dataId (dict) Dataset identifier
116  @return (lsst.daf.persistence.ButlerLocation)"""
117 
118  actualId = self.need(self.keyDict.iterkeys(), dataId)
119  path = mapper._mapActualToPath(self.template, actualId)
120  if not os.path.isabs(path):
121  path = os.path.join(self.root, path)
122  if not write:
123  newPath = mapper._parentSearch(path)
124  if newPath is not None:
125  path = newPath
126 
127  addFunc = "add_" + self.datasetType # Name of method for additionalData
128  if hasattr(mapper, addFunc):
129  addFunc = getattr(mapper, addFunc)
130  additionalData = addFunc(actualId)
131  assert isinstance(additionalData, dict), "Bad type for returned data"
132  else:
133  additionalData = actualId.copy()
134 
135  return ButlerLocation(self.python, self.persistable, self.storage, path, additionalData)
136 
137  def lookup(self, properties, dataId):
138  """Look up properties for in a metadata registry given a partial
139  dataset identifier.
140  @param properties (list of strings)
141  @param dataId (dict) Dataset identifier
142  @return (list of tuples) values of properties"""
143 
144  if self.registry is None:
145  raise RuntimeError, "No registry for lookup"
146 
147  where = []
148  values = []
149  fastPath = True
150  for p in properties:
151  if p not in ('filter', 'expTime', 'taiObs'):
152  fastPath = False
153  break
154  if fastPath and dataId.has_key('visit') and "raw" in self.tables:
155  return self.registry.executeQuery(properties, ('raw_visit',),
156  [('visit', '?')], None, (dataId['visit'],))
157  if dataId is not None:
158  for k, v in dataId.iteritems():
159  if self.columns and not k in self.columns:
160  continue
161  if k == self.obsTimeName:
162  continue
163  where.append((k, '?'))
164  values.append(v)
165  if self.range is not None:
166  values.append(dataId[self.obsTimeName])
167  return self.registry.executeQuery(properties, self.tables,
168  where, self.range, values)
169 
170  def have(self, properties, dataId):
171  """Returns whether the provided data identifier has all
172  the properties in the provided list.
173  @param properties (list of strings) Properties required
174  @parm dataId (dict) Dataset identifier
175  @return (bool) True if all properties are present"""
176  for prop in properties:
177  if not dataId.has_key(prop):
178  return False
179  return True
180 
181  def need(self, properties, dataId):
182  """Ensures all properties in the provided list are present in
183  the data identifier, looking them up as needed. This is only
184  possible for the case where the data identifies a single
185  exposure.
186  @param properties (list of strings) Properties required
187  @param dataId (dict) Partial dataset identifier
188  @return (dict) copy of dataset identifier with enhanced values
189  """
190 
191  newId = dataId.copy()
192  newProps = [] # Properties we don't already have
193  for prop in properties:
194  if not newId.has_key(prop):
195  newProps.append(prop)
196  if len(newProps) == 0:
197  return newId
198 
199  lookups = self.lookup(newProps, newId)
200  if len(lookups) != 1:
201  raise RuntimeError, "No unique lookup for %s from %s: %d matches" % (newProps, newId, len(lookups))
202  for i, prop in enumerate(newProps):
203  newId[prop] = lookups[0][i]
204  return newId
205 
206 def _formatMap(ch, k, datasetType):
207  """Convert a format character into a Python type."""
208  if ch in "diouxX":
209  return int
210  elif ch in "eEfFgG":
211  return float
212  elif ch in "crs":
213  return str
214  else:
215  raise RuntimeError("Unexpected format specifier %s"
216  " for field %s in template for dataset %s" %
217  (ch, k, datasetType))
218 
219 
221  """ImageMapping is a Mapping subclass for non-camera images."""
222 
223  def __init__(self, datasetType, policy, registry, root, **kwargs):
224  """Constructor for Mapping class.
225  @param datasetType (string)
226  @param policy (lsst.pex.policy.Policy) Mapping policy
227  @param registry (lsst.daf.butlerUtils.Registry) Registry for metadata lookups
228  @param root (string) Path of root directory"""
229  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
230  self.columns = policy.getStringArray("columns") if policy.exists("columns") else None
231 
232 
234  """ExposureMapping is a Mapping subclass for normal exposures."""
235 
236  def __init__(self, datasetType, policy, registry, root, **kwargs):
237  """Constructor for Mapping class.
238  @param datasetType (string)
239  @param policy (lsst.pex.policy.Policy) Mapping policy
240  @param registry (lsst.daf.butlerUtils.Registry) Registry for metadata lookups
241  @param root (string) Path of root directory"""
242  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
243  self.columns = policy.getStringArray("columns") if policy.exists("columns") else None
244 
245  def standardize(self, mapper, item, dataId):
246  return mapper._standardizeExposure(self, item, dataId)
247 
249  """CalibrationMapping is a Mapping subclass for calibration-type products.
250 
251  The difference is that data properties in the query or template
252  can be looked up using a reference Mapping in addition to this one.
253 
254  CalibrationMapping Policies can contain the following:
255 
256  reference (string, optional): a list of tables for finding missing dataset
257  identifier components (including the observation time, if a validity range
258  is required) in the exposure registry; note that the "tables" entry refers
259  to the calibration registry
260 
261  refCols (string, optional): a list of dataset properties required from the
262  reference tables for lookups in the calibration registry
263 
264  validRange (bool): true if the calibration dataset has a validity range
265  specified by a column in the tables of the reference dataset in the
266  exposure registry) and two columns in the tables of this calibration
267  dataset in the calibration registry)
268 
269  obsTimeName (string, optional): the name of the column in the reference
270  dataset tables containing the observation time (default "taiObs")
271 
272  validStartName (string, optional): the name of the column in the
273  calibration dataset tables containing the start of the validity range
274  (default "validStart")
275 
276  validEndName (string, optional): the name of the column in the
277  calibration dataset tables containing the end of the validity range
278  (default "validEnd") """
279 
280  def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, **kwargs):
281  """Constructor for Mapping class.
282  @param datasetType (string)
283  @param policy (lsst.pex.policy.Policy) Mapping policy
284  @param registry (lsst.daf.butlerUtils.Registry) Registry for metadata lookups
285  @param calibRegistry (lsst.daf.butlerUtils.Registry) Registry for calibration metadata lookups
286  @param calibRoot (string) Path of calibration root directory"""
287 
288  Mapping.__init__(self, datasetType, policy, calibRegistry, calibRoot, **kwargs)
289  self.reference = policy.getStringArray("reference") \
290  if policy.exists("reference") else None
291  self.refCols = policy.getStringArray("refCols") \
292  if policy.exists("refCols") else None
293  self.refRegistry = registry
294  if policy.exists("validRange") and policy.getBool("validRange"):
295  self.range = ("?", policy.getString("validStartName"),
296  policy.getString("validEndName"))
297  if policy.exists("columns"):
298  self.columns = policy.getStringArray("columns")
299  if policy.exists("filter"):
300  self.setFilter = policy.getBool("filter")
301 
302  def lookup(self, properties, dataId):
303  """Look up properties for in a metadata registry given a partial
304  dataset identifier.
305  @param properties (list of strings)
306  @param dataId (dict) Dataset identifier
307  @return (list of tuples) values of properties"""
308 
309 # Either look up taiObs in reference and then all in calibRegistry
310 # Or look up all in registry
311 
312  newId = dataId.copy()
313  if self.reference is not None:
314  where = []
315  values = []
316  for k, v in dataId.iteritems():
317  if self.refCols and k not in self.refCols:
318  continue
319  where.append((k, '?'))
320  values.append(v)
321 
322  # Columns we need from the regular registry
323  if self.columns is not None:
324  columns = set(self.columns)
325  for k in dataId.iterkeys():
326  columns.discard(k)
327  else:
328  columns = set(properties)
329 
330  if not columns:
331  # Nothing to lookup in reference registry; continue with calib
332  # registry
333  return Mapping.lookup(self, properties, newId)
334 
335  lookups = self.refRegistry.executeQuery(columns, self.reference,
336  where, None, values)
337  if len(lookups) != 1:
338  raise RuntimeError("No unique lookup for %s from %s: %d matches" %
339  (columns, dataId, len(lookups)))
340  if columns == set(properties):
341  # Have everything we need
342  return lookups
343  for i, prop in enumerate(columns):
344  newId[prop] = lookups[0][i]
345  return Mapping.lookup(self, properties, newId)
346 
347  def standardize(self, mapper, item, dataId):
348  return mapper._standardizeExposure(self, item, dataId, filter=self.setFilter)
349 
351  """DatasetMapping is a Mapping subclass for non-Exposure datasets that can
352  be retrieved by the standard daf_persistence mechanism.
353 
354  The differences are that the Storage type must be specified and no
355  Exposure standardization is performed.
356 
357  The "storage" entry in the Policy is mandatory; the "tables" entry is
358  optional; no "level" entry is allowed. """
359 
360  def __init__(self, datasetType, policy, registry, root, **kwargs):
361  """Constructor for DatasetMapping class.
362  @param[in,out] mapper (lsst.daf.persistence.Mapper) Mapper object
363  @param policy (lsst.pex.policy.Policy) Mapping policy
364  @param datasetType (string)
365  @param registry (lsst.daf.butlerUtils.Registry) Registry for metadata lookups
366  @param root (string) Path of root directory"""
367  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
368  self.storage = policy.getString("storage") # Storage type