LSSTApplications  11.0-24-g0a022a1,14.0+77,15.0,15.0+1
LSSTDataManagementBasePackage
datasetScanner.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2012 LSST Corporation.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 import os
23 import os.path
24 import re
25 import lsst.daf.butlerUtils
26 from functools import reduce
27 
28 __all__ = ['getMapperClass',
29  'parseDataIdRules',
30  'HfsScanner',
31  'DatasetScanner',
32  ]
33 
34 
35 _mapperClassName = {
36  'lsstsim': 'lsst.obs.lsstSim.LsstSimMapper',
37  'sdss': 'lsst.obs.sdss.SdssMapper',
38  'cfht': 'lsst.obs.cfht.CfhtMapper',
39 }
40 
41 
42 def getMapperClass(camera):
43  """Return the subclass of lsst.daf.persistence.Mapper
44  to use for the camera with the given name (case-insensitive).
45  """
46  camera = camera.lower()
47  if camera not in _mapperClassName:
48  raise RuntimeError(str.format("{} is not a valid camera name", camera))
49  name = _mapperClassName[camera]
50  try:
51  pieces = name.split('.')
52  cls = reduce(getattr, pieces[1:], __import__('.'.join(pieces[:-1])))
53  return cls
54  except:
55  raise RuntimeError(str.format("Failed to import {}", name))
56 
57 
58 _keyTypes = {
59  'lsstsim': {
60  'visit': int,
61  'filter': str,
62  'sensorName': str,
63  'ccdName': str,
64  'channelName': str,
65  'ampName': str,
66  'raft': str,
67  'snap': int,
68  'exposure': int,
69  'ccd': str,
70  'sensor': str,
71  'amp': str,
72  'channel': str,
73  'raftId': int,
74  'ccdId': int,
75  'sensorId': int,
76  'ampId': int,
77  'channelId': int,
78  'skyTile': int,
79  'tract': int,
80  'patch': str,
81  },
82  'sdss': {
83  'run': int,
84  'camcol': int,
85  'field': int,
86  'filter': str,
87  'skyTile': int,
88  'tract': int,
89  'patch': str,
90  },
91  'cfht': {
92  'visit': int,
93  'filter': str,
94  'ccdName': str,
95  'ampName': str,
96  'ccd': int,
97  'amp': int,
98  'skyTile': int,
99  'tract': int,
100  'patch': str,
101  },
102 }
103 
104 
105 def parseDataIdRules(ruleList, camera):
106  """A rule is a string in the following format:
107 
108  'key=value1[^value2[^value3...]'
109 
110  The values may either be strings, or of the form 'int...int'
111  (e.g. '1..3') which is interpreted as '1^2^3' (inclusive, unlike a python
112  range). So '0^2..4^7..9' is equivalent to '0^2^3^4^7^8^9'.
113 
114  This function parses a list of such strings, and returns a dict mapping
115  keys to sets of legal values.
116 
117  ruleList:
118  List of rule strings
119  camera:
120  Camera the rule list applies to (e.g. 'lsstSim' or 'sdss')
121  """
122  camera = camera.lower()
123  if camera not in _keyTypes:
124  raise RuntimeError('{} is not a recognized camera name'.format(camera))
125  kvs = {}
126  for rule in ruleList:
127  # process rule for a single key
128  key, _, pattern = rule.partition('=')
129  if key not in _keyTypes[camera]:
130  raise RuntimeError('{} is not a valid dataId key for camera {}'.format(key, camera))
131  if len(pattern) == 0:
132  continue
133  values = set()
134  # compute union of all values or value ranges
135  for p in pattern.split('^'):
136  if _keyTypes[camera][key] == int:
137  # check for range syntax
138  m = re.search(r'^(\d+)\.\.(\d+)$', p)
139  if m:
140  values.update(xrange(int(m.group(1)), int(m.group(2)) + 1))
141  else:
142  values.add(int(p))
143  else:
144  values.add(p)
145  if key in kvs:
146  kvs[key].update(values)
147  else:
148  kvs[key] = values
149  return kvs
150 
151 
153  """A key in a path template. Three attributes are provided:
154 
155  spec
156  Formatting spec for the key, e.g. '%(filter)s'.
157 
158  typ
159  key value type; int or str
160 
161  munge
162  A function that takes a key name, key value string and a dictionary.
163  This function should return a fresh dictionary including new entries
164  derived from the given key, value, and existing entries. The
165  _mungeStr and _mungeInt functions are examples.
166  """
167 
168  def __init__(self, spec, typ, munge):
169  self.spec = spec
170  self.typ = typ
171  self.munge = munge
172 
173 
174 def _mungeStr(k, v, dataId):
175  """Munger for keys with string formats."""
176  kv = dataId.copy()
177  kv[k] = str(v)
178  return kv
179 
180 
181 def _mungeInt(k, v, dataId):
182  """Munger for keys with integer formats."""
183  kv = dataId.copy()
184  kv[k] = int(v)
185  return kv
186 
187 
189  """A single component (directory or file) of a path template. The
190  following attributes are provided:
191 
192  keys
193  List of key names first occurring in this path component.
194 
195  regex
196  Compiled regular expression identifying matches to this path
197  component unless simple is True; in that case, regex is just
198  a string literal
199 
200  simple
201  True if regex is a simple string literal rather than a pattern.
202  In this case, keys will always by None or [].
203  """
204 
205  def __init__(self, keys, regex, simple):
206  self.keys = keys
207  self.regex = regex
208  self.simple = simple
209 
210 
212  """A hierarchical scanner for paths matching a template, optionally
213  also restricting visited paths to those matching a list of dataId rules.
214  """
215 
216  def __init__(self, template):
217  """Build an FsScanner for given a path template. The path template
218  should be a Python string with named format substitution
219  specifications, as used in mapper policy files. For example:
220 
221  deepCoadd-results/%(filter)s/%(tract)d/%(patch)s/calexp-%(filter)s-%(tract)d-%(patch)s.fits
222 
223  Note that a key may appear multiple times. If it does,
224  the value for each occurrence should be identical (the formatting
225  specs must be identical). Octal, binary, hexadecimal, and floating
226  point formats are not supported.
227  """
228  template = os.path.normpath(template)
229  if (len(template) == 0 or
230  template == os.curdir or
231  template[0] == os.sep or
232  template[-1] == os.sep):
233  raise RuntimeError(
234  'Path template is empty, absolute, or identifies a directory')
235  self._formatKeys = {}
236  self._pathComponents = []
237  fmt = re.compile(r'%\((\w+)\).*?([diucrs])')
238 
239  # split path into components
240  for component in template.split(os.sep):
241  # search for all occurences of a format spec
242  simple = True
243  last = 0
244  regex = ''
245  newKeys = []
246  for m in fmt.finditer(component):
247  simple = False
248  spec = m.group(0)
249  k = m.group(1)
250  seenBefore = k in self._formatKeys
251  # transform format spec into a regular expression
252  regex += re.escape(component[last:m.start(0)])
253  last = m.end(0)
254  regex += '('
255  if seenBefore:
256  regex += '?:'
257  if m.group(2) in 'crs':
258  munge = _mungeStr
259  typ = str
260  regex += r'.+)'
261  else:
262  munge = _mungeInt
263  typ = int
264  regex += r'[+-]?\d+)'
265  if seenBefore:
266  # check consistency of formatting spec across key occurences
267  if spec[-1] != self._formatKeys[k].spec[-1]:
268  raise RuntimeError(
269  'Path template contains inconsistent format type-codes '
270  'for the same key')
271  else:
272  newKeys.append(k)
273  self._formatKeys[k] = _FormatKey(spec, typ, munge)
274  regex += re.escape(component[last:])
275  if simple:
276  regex = component # literal match
277  else:
278  regex = re.compile('^' + regex + '$')
279  self._pathComponents.append(_PathComponent(newKeys, regex, simple))
280 
281  def walk(self, root, rules=None):
282  """Generator that descends the given root directory in top-down
283  fashion, matching paths corresponding to the template and satisfying
284  the given rule list. The generator yields tuples of the form
285  (path, dataId), where path is a dataset file name relative to root,
286  and dataId is a key value dictionary identifying the file.
287  """
288  oneFound = False
289  while os.path.exists(root) and not oneFound:
290  stack = [(0, root, rules, {})]
291  while stack:
292  depth, path, rules, dataId = stack.pop()
293  if os.path.isfile(path):
294  continue
295  pc = self._pathComponents[depth]
296  if pc.simple:
297  # No need to list directory contents
298  entries = [pc.regex]
299  if not os.path.exists(os.path.join(path, pc.regex)):
300  continue
301  else:
302  entries = os.listdir(path)
303  depth += 1
304  for e in entries:
305  subRules = rules
306  subDataId = dataId
307  if not pc.simple:
308  # make sure e matches path component regular expression
309  m = pc.regex.match(e)
310  if not m:
311  continue
312  # got a match - update dataId with new key values (if any)
313  try:
314  for i, k in enumerate(pc.keys):
315  subDataId = self._formatKeys[k].munge(k, m.group(i + 1), subDataId)
316  except:
317  # Munger raises if value is invalid for key, so
318  # not really a match
319  continue
320  if subRules and pc.keys:
321  # have dataId rules and saw new keys; filter rule list
322  for k in subDataId:
323  newRules = []
324  for r in subRules:
325  if k not in r or subDataId[k] in r[k]:
326  newRules.append(r)
327  subRules = newRules
328  if not subRules:
329  continue # no rules matched
330  # Have path matching template and at least one rule
331  p = os.path.join(path, e)
332  if depth < len(self._pathComponents):
333  # recurse
334  stack.append((depth, p, subRules, subDataId))
335  elif depth == len(self._pathComponents):
336  if os.path.isfile(p):
337  # found a matching file, yield it
338  yield os.path.relpath(p, root), subDataId
339  oneFound = True
340  # end while stack
341  root = os.path.join(root, "_parent")
342 
343 
344 # -- Camera specific dataId mungers ----
345 
346 def _mungeLsstSim(k, v, dataId):
347  dataId = dataId.copy()
348  if k == 'raft':
349  r1, r2 = v
350  dataId['raft'] = r1 + ',' + r2
351  dataId['raftId'] = int(r1) * 5 + int(r2)
352  elif k in ('sensor', 'ccd'):
353  s1, s2 = v
354  dataId['sensor'] = s1 + ',' + s2
355  dataId['sensorNum'] = int(s1) * 3 + int(s2)
356  elif k in ('channel', 'amp'):
357  c1, c2 = v
358  dataId['channel'] = c1 + ',' + c2
359  dataId['channelNum'] = int(c1) * 8 + int(c2)
360  elif k in ('snap', 'exposure'):
361  dataId['snap'] = int(v)
362  elif _keyTypes['lsstsim'][k] == int:
363  dataId[k] = int(v)
364  else:
365  dataId[k] = v
366  return dataId
367 
368 
369 def _mungeSdss(k, v, dataId):
370  dataId = dataId.copy()
371  if _keyTypes['sdss'][k] == int:
372  dataId[k] = int(v)
373  else:
374  dataId[k] = v
375  return dataId
376 
377 
378 def _mungeCfht(k, v, dataId):
379  dataId = dataId.copy()
380  if k == 'ccd':
381  dataId['ccd'] = int(v)
382  dataId['ccdName'] = v
383  elif k == 'amp':
384  dataId['amp'] = int(v)
385  dataId['ampName'] = v
386  elif _keyTypes['sdss'][k] == int:
387  dataId[k] = int(v)
388  else:
389  dataId[k] = v
390  return dataId
391 
392 _mungeFunctions = {
393  'lsstsim': _mungeLsstSim,
394  'sdss': _mungeSdss,
395  'cfht': _mungeCfht,
396 }
397 
398 
400  """File system scanner for a dataset known to a camera mapper.
401  """
402 
403  def __init__(self, dataset, camera, cameraMapper):
404  if not isinstance(cameraMapper, lsst.daf.butlerUtils.CameraMapper):
405  raise TypeError('Expecting a lsst.daf.butlerUtils.CameraMapper!')
406  if dataset not in cameraMapper.mappings:
407  raise NotFoundError('Unknown dataset ' + str(dataset))
408  HfsScanner.__init__(self, cameraMapper.mappings[dataset].template)
409  camera = camera.lower()
410  if camera not in _keyTypes:
411  raise RuntimeError('{} camera not supported yet'.format(camera))
412  for k in self._formatKeys:
413  if k not in _keyTypes[camera]:
414  raise RuntimeError('{} is not a valid dataId key for camera {}'.format(k, camera))
415  self._formatKeys[k].munge = _mungeFunctions[camera]
def parseDataIdRules(ruleList, camera)
def _mungeCfht(k, v, dataId)
def _mungeInt(k, v, dataId)
def __init__(self, dataset, camera, cameraMapper)
def _mungeStr(k, v, dataId)
std::shared_ptr< FrameSet > append(FrameSet const &first, FrameSet const &second)
Construct a FrameSet that performs two transformations in series.
Definition: functional.cc:33
def format(config, name=None, writeSourceLine=True, prefix="", verbose=False)
Definition: history.py:134
def _mungeLsstSim(k, v, dataId)
def walk(self, root, rules=None)
def _mungeSdss(k, v, dataId)
def __init__(self, keys, regex, simple)
def __init__(self, spec, typ, munge)