LSSTApplications  10.0+286,10.0+36,10.0+46,10.0-2-g4f67435,10.1+152,10.1+37,11.0,11.0+1,11.0-1-g47edd16,11.0-1-g60db491,11.0-1-g7418c06,11.0-2-g04d2804,11.0-2-g68503cd,11.0-2-g818369d,11.0-2-gb8b8ce7
LSSTDataManagementBasePackage
Public Member Functions | Private Attributes | List of all members
lsst.datarel.datasetScanner.HfsScanner Class Reference
Inheritance diagram for lsst.datarel.datasetScanner.HfsScanner:
lsst.datarel.datasetScanner.DatasetScanner

Public Member Functions

def __init__
 
def walk
 

Private Attributes

 _formatKeys
 
 _pathComponents
 

Detailed Description

A hierarchical scanner for paths matching a template, optionally
also restricting visited paths to those matching a list of dataId rules.

Definition at line 206 of file datasetScanner.py.

Constructor & Destructor Documentation

def lsst.datarel.datasetScanner.HfsScanner.__init__ (   self,
  template 
)
Build an FsScanner for given a path template. The path template
should be a Python string with named format substitution
specifications, as used in mapper policy files. For example:

deepCoadd-results/%(filter)s/%(tract)d/%(patch)s/calexp-%(filter)s-%(tract)d-%(patch)s.fits

Note that a key may appear multiple times. If it does,
the value for each occurrence should be identical (the formatting
specs must be identical). Octal, binary, hexadecimal, and floating
point formats are not supported.

Definition at line 210 of file datasetScanner.py.

211  def __init__(self, template):
212  """Build an FsScanner for given a path template. The path template
213  should be a Python string with named format substitution
214  specifications, as used in mapper policy files. For example:
215 
216  deepCoadd-results/%(filter)s/%(tract)d/%(patch)s/calexp-%(filter)s-%(tract)d-%(patch)s.fits
217 
218  Note that a key may appear multiple times. If it does,
219  the value for each occurrence should be identical (the formatting
220  specs must be identical). Octal, binary, hexadecimal, and floating
221  point formats are not supported.
222  """
223  template = os.path.normpath(template)
224  if (len(template) == 0 or
225  template == os.curdir or
226  template[0] == os.sep or
227  template[-1] == os.sep):
228  raise RuntimeError(
229  'Path template is empty, absolute, or identifies a directory')
230  self._formatKeys = {}
231  self._pathComponents = []
232  fmt = re.compile(r'%\((\w+)\).*?([diucrs])')
233 
234  # split path into components
235  for component in template.split(os.sep):
236  # search for all occurences of a format spec
237  simple = True
238  last = 0
239  regex = ''
240  newKeys = []
241  for m in fmt.finditer(component):
242  simple = False
243  spec = m.group(0)
244  k = m.group(1)
245  seenBefore = self._formatKeys.has_key(k)
246  # transform format spec into a regular expression
247  regex += re.escape(component[last:m.start(0)])
248  last = m.end(0)
249  regex += '('
250  if seenBefore:
251  regex += '?:'
252  if m.group(2) in 'crs':
253  munge = _mungeStr
254  typ = str
255  regex += r'.+)'
256  else:
257  munge = _mungeInt
258  typ = int
259  regex += r'[+-]?\d+)'
260  if seenBefore:
261  # check consistency of formatting spec across key occurences
262  if spec[-1] != self._formatKeys[k].spec[-1]:
263  raise RuntimeError(
264  'Path template contains inconsistent format type-codes '
265  'for the same key')
266  else:
267  newKeys.append(k)
268  self._formatKeys[k] = _FormatKey(spec, typ, munge)
269  regex += re.escape(component[last:])
270  if simple:
271  regex = component # literal match
272  else:
273  regex = re.compile('^' + regex + '$')
274  self._pathComponents.append(_PathComponent(newKeys, regex, simple))

Member Function Documentation

def lsst.datarel.datasetScanner.HfsScanner.walk (   self,
  root,
  rules = None 
)
Generator that descends the given root directory in top-down
fashion, matching paths corresponding to the template and satisfying
the given rule list. The generator yields tuples of the form
(path, dataId), where path is a dataset file name relative to root,
and dataId is a key value dictionary identifying the file.

Definition at line 275 of file datasetScanner.py.

276  def walk(self, root, rules=None):
277  """Generator that descends the given root directory in top-down
278  fashion, matching paths corresponding to the template and satisfying
279  the given rule list. The generator yields tuples of the form
280  (path, dataId), where path is a dataset file name relative to root,
281  and dataId is a key value dictionary identifying the file.
282  """
283  oneFound = False
284  while os.path.exists(root) and not oneFound:
285  stack = [(0, root, rules, {})]
286  while stack:
287  depth, path, rules, dataId = stack.pop()
288  if os.path.isfile(path):
289  continue
290  pc = self._pathComponents[depth]
291  if pc.simple:
292  # No need to list directory contents
293  entries = [pc.regex]
294  if not os.path.exists(os.path.join(path, pc.regex)):
295  continue
296  else:
297  entries = os.listdir(path)
298  depth += 1
299  for e in entries:
300  subRules = rules
301  subDataId = dataId
302  if not pc.simple:
303  # make sure e matches path component regular expression
304  m = pc.regex.match(e)
305  if not m:
306  continue
307  # got a match - update dataId with new key values (if any)
308  try:
309  for i, k in enumerate(pc.keys):
310  subDataId = self._formatKeys[k].munge(k, m.group(i + 1), subDataId)
311  except:
312  # Munger raises if value is invalid for key, so
313  # not really a match
314  continue
315  if subRules and pc.keys:
316  # have dataId rules and saw new keys; filter rule list
317  for k in subDataId:
318  newRules = []
319  for r in subRules:
320  if k not in r or subDataId[k] in r[k]:
321  newRules.append(r)
322  subRules = newRules
323  if not subRules:
324  continue # no rules matched
325  # Have path matching template and at least one rule
326  p = os.path.join(path, e)
327  if depth < len(self._pathComponents):
328  # recurse
329  stack.append((depth, p, subRules, subDataId))
330  elif depth == len(self._pathComponents):
331  if os.path.isfile(p):
332  # found a matching file, yield it
333  yield os.path.relpath(p, root), subDataId
334  oneFound = True
335  # end while stack
336  root = os.path.join(root, "_parent")
337 
338 
339 # -- Camera specific dataId mungers ----

Member Data Documentation

lsst.datarel.datasetScanner.HfsScanner._formatKeys
private

Definition at line 229 of file datasetScanner.py.

lsst.datarel.datasetScanner.HfsScanner._pathComponents
private

Definition at line 230 of file datasetScanner.py.


The documentation for this class was generated from the following file: