LSSTApplications  10.0-2-g4f67435,11.0.rc2+1,11.0.rc2+12,11.0.rc2+3,11.0.rc2+4,11.0.rc2+5,11.0.rc2+6,11.0.rc2+7,11.0.rc2+8
LSSTDataManagementBasePackage
datasetScanner.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2012 LSST Corporation.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 import os
23 import os.path
24 import re
26 
27 __all__ = ['getMapperClass',
28  'parseDataIdRules',
29  'HfsScanner',
30  'DatasetScanner',
31  ]
32 
33 
34 _mapperClassName = {
35  'lsstsim': 'lsst.obs.lsstSim.LsstSimMapper',
36  'sdss': 'lsst.obs.sdss.SdssMapper',
37  'cfht': 'lsst.obs.cfht.CfhtMapper',
38 }
39 
40 
41 def getMapperClass(camera):
42  """Return the subclass of lsst.daf.persistence.Mapper
43  to use for the camera with the given name (case-insensitive).
44  """
45  camera = camera.lower()
46  if camera not in _mapperClassName:
47  raise RuntimeError(str.format("{} is not a valid camera name", camera))
48  name = _mapperClassName[camera]
49  try:
50  pieces = name.split('.')
51  cls = reduce(getattr, pieces[1:], __import__('.'.join(pieces[:-1])))
52  return cls
53  except:
54  raise RuntimeError(str.format("Failed to import {}", name))
55 
56 
57 _keyTypes = {
58  'lsstsim': {
59  'visit': int,
60  'filter': str,
61  'sensorName': str,
62  'ccdName': str,
63  'channelName': str,
64  'ampName': str,
65  'raft': str,
66  'snap': int,
67  'exposure': int,
68  'ccd': str,
69  'sensor': str,
70  'amp': str,
71  'channel': str,
72  'raftId': int,
73  'ccdId': int,
74  'sensorId': int,
75  'ampId': int,
76  'channelId': int,
77  'skyTile': int,
78  'tract': int,
79  'patch': str,
80  },
81  'sdss': {
82  'run': int,
83  'camcol': int,
84  'field': int,
85  'filter': str,
86  'skyTile': int,
87  'tract': int,
88  'patch': str,
89  },
90  'cfht': {
91  'visit': int,
92  'filter': str,
93  'ccdName': str,
94  'ampName': str,
95  'ccd': int,
96  'amp': int,
97  'skyTile': int,
98  'tract': int,
99  'patch': str,
100  },
101 }
102 
103 
104 def parseDataIdRules(ruleList, camera):
105  """A rule is a string in the following format:
106 
107  'key=value1[^value2[^value3...]'
108 
109  The values may either be strings, or of the form 'int...int'
110  (e.g. '1..3') which is interpreted as '1^2^3' (inclusive, unlike a python
111  range). So '0^2..4^7..9' is equivalent to '0^2^3^4^7^8^9'.
112 
113  This function parses a list of such strings, and returns a dict mapping
114  keys to sets of legal values.
115 
116  ruleList:
117  List of rule strings
118  camera:
119  Camera the rule list applies to (e.g. 'lsstSim' or 'sdss')
120  """
121  camera = camera.lower()
122  if camera not in _keyTypes:
123  raise RuntimeError('{} is not a recognized camera name'.format(camera))
124  kvs = {}
125  for rule in ruleList:
126  # process rule for a single key
127  key, _, pattern = rule.partition('=')
128  if key not in _keyTypes[camera]:
129  raise RuntimeError('{} is not a valid dataId key for camera {}'.format(key, camera))
130  if len(pattern) == 0:
131  continue
132  values = set()
133  # compute union of all values or value ranges
134  for p in pattern.split('^'):
135  if _keyTypes[camera][key] == int:
136  # check for range syntax
137  m = re.search(r'^(\d+)\.\.(\d+)$', p)
138  if m:
139  values.update(xrange(int(m.group(1)), int(m.group(2)) + 1))
140  else:
141  values.add(int(p))
142  else:
143  values.add(p)
144  if key in kvs:
145  kvs[key].update(values)
146  else:
147  kvs[key] = values
148  return kvs
149 
150 
151 class _FormatKey(object):
152  """A key in a path template. Three attributes are provided:
153 
154  spec
155  Formatting spec for the key, e.g. '%(filter)s'.
156 
157  typ
158  key value type; int or str
159 
160  munge
161  A function that takes a key name, key value string and a dictionary.
162  This function should return a fresh dictionary including new entries
163  derived from the given key, value, and existing entries. The
164  _mungeStr and _mungeInt functions are examples.
165  """
166  def __init__(self, spec, typ, munge):
167  self.spec = spec
168  self.typ = typ
169  self.munge = munge
170 
171 def _mungeStr(k, v, dataId):
172  """Munger for keys with string formats."""
173  kv = dataId.copy()
174  kv[k] = str(v)
175  return kv
176 
177 def _mungeInt(k, v, dataId):
178  """Munger for keys with integer formats."""
179  kv = dataId.copy()
180  kv[k] = int(v)
181  return kv
182 
183 
184 class _PathComponent(object):
185  """A single component (directory or file) of a path template. The
186  following attributes are provided:
187 
188  keys
189  List of key names first occurring in this path component.
190 
191  regex
192  Compiled regular expression identifying matches to this path
193  component unless simple is True; in that case, regex is just
194  a string literal
195 
196  simple
197  True if regex is a simple string literal rather than a pattern.
198  In this case, keys will always by None or [].
199  """
200  def __init__(self, keys, regex, simple):
201  self.keys = keys
202  self.regex = regex
203  self.simple = simple
204 
205 
206 class HfsScanner(object):
207  """A hierarchical scanner for paths matching a template, optionally
208  also restricting visited paths to those matching a list of dataId rules.
209  """
210  def __init__(self, template):
211  """Build an FsScanner for given a path template. The path template
212  should be a Python string with named format substitution
213  specifications, as used in mapper policy files. For example:
214 
215  deepCoadd-results/%(filter)s/%(tract)d/%(patch)s/calexp-%(filter)s-%(tract)d-%(patch)s.fits
216 
217  Note that a key may appear multiple times. If it does,
218  the value for each occurrence should be identical (the formatting
219  specs must be identical). Octal, binary, hexadecimal, and floating
220  point formats are not supported.
221  """
222  template = os.path.normpath(template)
223  if (len(template) == 0 or
224  template == os.curdir or
225  template[0] == os.sep or
226  template[-1] == os.sep):
227  raise RuntimeError(
228  'Path template is empty, absolute, or identifies a directory')
229  self._formatKeys = {}
230  self._pathComponents = []
231  fmt = re.compile(r'%\((\w+)\).*?([diucrs])')
232 
233  # split path into components
234  for component in template.split(os.sep):
235  # search for all occurences of a format spec
236  simple = True
237  last = 0
238  regex = ''
239  newKeys = []
240  for m in fmt.finditer(component):
241  simple = False
242  spec = m.group(0)
243  k = m.group(1)
244  seenBefore = self._formatKeys.has_key(k)
245  # transform format spec into a regular expression
246  regex += re.escape(component[last:m.start(0)])
247  last = m.end(0)
248  regex += '('
249  if seenBefore:
250  regex += '?:'
251  if m.group(2) in 'crs':
252  munge = _mungeStr
253  typ = str
254  regex += r'.+)'
255  else:
256  munge = _mungeInt
257  typ = int
258  regex += r'[+-]?\d+)'
259  if seenBefore:
260  # check consistency of formatting spec across key occurences
261  if spec[-1] != self._formatKeys[k].spec[-1]:
262  raise RuntimeError(
263  'Path template contains inconsistent format type-codes '
264  'for the same key')
265  else:
266  newKeys.append(k)
267  self._formatKeys[k] = _FormatKey(spec, typ, munge)
268  regex += re.escape(component[last:])
269  if simple:
270  regex = component # literal match
271  else:
272  regex = re.compile('^' + regex + '$')
273  self._pathComponents.append(_PathComponent(newKeys, regex, simple))
274 
275  def walk(self, root, rules=None):
276  """Generator that descends the given root directory in top-down
277  fashion, matching paths corresponding to the template and satisfying
278  the given rule list. The generator yields tuples of the form
279  (path, dataId), where path is a dataset file name relative to root,
280  and dataId is a key value dictionary identifying the file.
281  """
282  oneFound = False
283  while os.path.exists(root) and not oneFound:
284  stack = [(0, root, rules, {})]
285  while stack:
286  depth, path, rules, dataId = stack.pop()
287  if os.path.isfile(path):
288  continue
289  pc = self._pathComponents[depth]
290  if pc.simple:
291  # No need to list directory contents
292  entries = [pc.regex]
293  if not os.path.exists(os.path.join(path, pc.regex)):
294  continue
295  else:
296  entries = os.listdir(path)
297  depth += 1
298  for e in entries:
299  subRules = rules
300  subDataId = dataId
301  if not pc.simple:
302  # make sure e matches path component regular expression
303  m = pc.regex.match(e)
304  if not m:
305  continue
306  # got a match - update dataId with new key values (if any)
307  try:
308  for i, k in enumerate(pc.keys):
309  subDataId = self._formatKeys[k].munge(k, m.group(i + 1), subDataId)
310  except:
311  # Munger raises if value is invalid for key, so
312  # not really a match
313  continue
314  if subRules and pc.keys:
315  # have dataId rules and saw new keys; filter rule list
316  for k in subDataId:
317  newRules = []
318  for r in subRules:
319  if k not in r or subDataId[k] in r[k]:
320  newRules.append(r)
321  subRules = newRules
322  if not subRules:
323  continue # no rules matched
324  # Have path matching template and at least one rule
325  p = os.path.join(path, e)
326  if depth < len(self._pathComponents):
327  # recurse
328  stack.append((depth, p, subRules, subDataId))
329  elif depth == len(self._pathComponents):
330  if os.path.isfile(p):
331  # found a matching file, yield it
332  yield os.path.relpath(p, root), subDataId
333  oneFound = True
334  # end while stack
335  root = os.path.join(root, "_parent")
336 
337 
338 # -- Camera specific dataId mungers ----
339 
340 def _mungeLsstSim(k, v, dataId):
341  dataId = dataId.copy()
342  if k == 'raft':
343  r1, r2 = v
344  dataId['raft'] = r1 + ',' + r2
345  dataId['raftId'] = int(r1) * 5 + int(r2)
346  elif k in ('sensor', 'ccd'):
347  s1, s2 = v
348  dataId['sensor'] = s1 + ',' + s2
349  dataId['sensorNum'] = int(s1) * 3 + int(s2)
350  elif k in ('channel', 'amp'):
351  c1, c2 = v
352  dataId['channel'] = c1 + ',' + c2
353  dataId['channelNum'] = int(c1) * 8 + int(c2)
354  elif k in ('snap', 'exposure'):
355  dataId['snap'] = int(v)
356  elif _keyTypes['lsstsim'][k] == int:
357  dataId[k] = int(v)
358  else:
359  dataId[k] = v
360  return dataId
361 
362 def _mungeSdss(k, v, dataId):
363  dataId = dataId.copy()
364  if _keyTypes['sdss'][k] == int:
365  dataId[k] = int(v)
366  else:
367  dataId[k] = v
368  return dataId
369 
370 def _mungeCfht(k, v, dataId):
371  dataId = dataId.copy()
372  if k == 'ccd':
373  dataId['ccd'] = int(v)
374  dataId['ccdName'] = v
375  elif k == 'amp':
376  dataId['amp'] = int(v)
377  dataId['ampName'] = v
378  elif _keyTypes['sdss'][k] == int:
379  dataId[k] = int(v)
380  else:
381  dataId[k] = v
382  return dataId
383 
384 _mungeFunctions = {
385  'lsstsim': _mungeLsstSim,
386  'sdss': _mungeSdss,
387  'cfht': _mungeCfht,
388 }
389 
390 
392  """File system scanner for a dataset known to a camera mapper.
393  """
394  def __init__(self, dataset, camera, cameraMapper):
395  if not isinstance(cameraMapper, lsst.daf.butlerUtils.CameraMapper):
396  raise TypeError('Expecting a lsst.daf.butlerUtils.CameraMapper!')
397  if dataset not in cameraMapper.mappings:
398  raise NotFoundError('Unknown dataset ' + str(dataset))
399  HfsScanner.__init__(self, cameraMapper.mappings[dataset].template)
400  camera = camera.lower()
401  if camera not in _keyTypes:
402  raise RuntimeError('{} camera not supported yet'.format(camera))
403  for k in self._formatKeys:
404  if k not in _keyTypes[camera]:
405  raise RuntimeError('{} is not a valid dataId key for camera {}'.format(k, camera))
406  self._formatKeys[k].munge = _mungeFunctions[camera]
407