LSSTApplications  10.0+286,10.0+36,10.0+46,10.0-2-g4f67435,10.1+152,10.1+37,11.0,11.0+1,11.0-1-g47edd16,11.0-1-g60db491,11.0-1-g7418c06,11.0-2-g04d2804,11.0-2-g68503cd,11.0-2-g818369d,11.0-2-gb8b8ce7
LSSTDataManagementBasePackage
fsScanner.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # LSST Data Management System
5 # Copyright 2008, 2009, 2010 LSST Corporation.
6 #
7 # This product includes software developed by the
8 # LSST Project (http://www.lsst.org/).
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the LSST License Statement and
21 # the GNU General Public License along with this program. If not,
22 # see <http://www.lsstcorp.org/LegalNotices/>.
23 #
24 
25 
26 """This module provides the FsScanner class."""
27 
28 import glob
29 import os
30 import re
31 import sys
32 
33 class FsScanner(object):
34  """Class to scan a filesystem location for paths matching a template.
35 
36  Decomposes the resulting paths into fields and passes them to a callback
37  function.
38  """
39 
40  def __init__(self, pathTemplate):
41  """Constructor. Takes the path template, which should be in the form
42  of a Python string with named format substitution specifications.
43  Such a template would be suitable for generating a path given a set of
44  fields in a dictionary. Does not handle hex (%x or %X).
45 
46  Example:
47  %(field)s/%(visit)d/%(exposure)d/raw-%(visit)d-e%(exposure)03d-c%(ccd)03d-a%(amp)03d.fits
48 
49  Note that fields may appear multiple times; the second and subsequent
50  appearances of such fields will have "_{number}" appended to them to
51  disambiguate, although it is typically assumed that they will all be
52  identical.
53  """
54 
55  # Change template into a globbable path specification.
56  fmt = re.compile(r'%\((\w+)\).*?([dioueEfFgGcrs])')
57  self.globString = fmt.sub('*', pathTemplate)
58 
59  # Change template into a regular expression.
60  last = 0
61  self.fields = {}
62  self.reString = ""
63  n = 0
64  pos = 0
65  for m in fmt.finditer(pathTemplate):
66  fieldName = m.group(1)
67  if self.fields.has_key(fieldName):
68  fieldName += "_%d" % (n,)
69  n += 1
70 
71  prefix = pathTemplate[last:m.start(0)]
72  last = m.end(0)
73  self.reString += prefix
74 
75  if m.group(2) in 'crs':
76  fieldType = str
77  self.reString += r'(?P<' + fieldName + '>.+?)'
78  elif m.group(2) in 'eEfFgG':
79  fieldType = float
80  self.reString += r'(?P<' + fieldName + '>[\d.eE+-]+?)'
81  else:
82  fieldType = int
83  self.reString += r'(?P<' + fieldName + '>[\d+-]+?)'
84 
85  self.fields[fieldName] = dict(pos=pos, fieldType=fieldType)
86  pos += 1
87 
88  self.reString += pathTemplate[last:]
89 
90  def getFields(self):
91  """Return the list of fields that will be returned from matched
92  paths, in order."""
93 
94  fieldList = ["" for i in xrange(len(self.fields))]
95  for f in self.fields.keys():
96  fieldList[self.fields[f]['pos']] = f
97  return fieldList
98 
99  def isNumeric(self, name):
100  """Return true if the given field contains a number."""
101 
102  return self.fields[name]['fieldType'] in (float, int)
103 
104  def isInt(self, name):
105  """Return true if the given field contains an integer."""
106 
107  return self.fields[name]['fieldType'] == int
108 
109  def isFloat(self, name):
110  """Return true if the given field contains an float."""
111 
112  return self.fields[name]['fieldType'] == float
113 
114  def processPath(self, location, callback):
115  """Scan a given path location with the given callback function."""
116 
117  curdir = os.getcwd()
118  os.chdir(location)
119  pathList = glob.glob(self.globString)
120  for path in pathList:
121  m = re.search(self.reString, path)
122  if m:
123  dataId = m.groupdict()
124  for f in self.fields.keys():
125  if self.isInt(f):
126  dataId[f] = int(dataId[f])
127  elif self.isFloat(f):
128  dataId[f] = float(dataId[f])
129  callback(path, dataId)
130  else:
131  print >> sys.stderr, "Warning: unmatched path: %s" % (path,)
132  os.chdir(curdir)