LSSTApplications  18.1.0
LSSTDataManagementBasePackage
fsScanner.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008, 2009, 2010 LSST Corporation.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 
23 
24 """This module provides the FsScanner class."""
25 from __future__ import print_function
26 from builtins import range
27 from builtins import object
28 
29 import glob
30 import os
31 import re
32 import sys
33 
34 
36  """Class to scan a filesystem location for paths matching a template.
37 
38  Decomposes the resulting paths into fields and passes them to a callback
39  function.
40  """
41 
42  def __init__(self, pathTemplate):
43  """Constructor. Takes the path template, which should be in the form
44  of a Python string with named format substitution specifications.
45  Such a template would be suitable for generating a path given a set of
46  fields in a dictionary. Does not handle hex (%x or %X).
47 
48  Example:
49  %(field)s/%(visit)d/%(exposure)d/raw-%(visit)d-e%(exposure)03d-c%(ccd)03d-a%(amp)03d.fits
50 
51  Note that fields may appear multiple times; the second and subsequent
52  appearances of such fields will have "_{number}" appended to them to
53  disambiguate, although it is typically assumed that they will all be
54  identical.
55 
56  Trailing brackets (and their contents) can be used to indicate which HDU from a file should
57  be used. They will not be included in the filename search.
58  """
59 
60  # Trim any trailing braces off the end of the path template.
61  if pathTemplate.endswith(']'):
62  pathTemplate = pathTemplate[0:pathTemplate.rfind('[')]
63 
64  # Change template into a globbable path specification.
65  fmt = re.compile(r'%\((\w+)\).*?([dioueEfFgGcrs])')
66 
67  self.globString = fmt.sub('*', pathTemplate)
68 
69  # Change template into a regular expression.
70  last = 0
71  self.fields = {}
72  self.reString = ""
73  n = 0
74  pos = 0
75  for m in fmt.finditer(pathTemplate):
76  fieldName = m.group(1)
77  if fieldName in self.fields:
78  fieldName += "_%d" % (n,)
79  n += 1
80 
81  prefix = pathTemplate[last:m.start(0)]
82  last = m.end(0)
83  self.reString += prefix
84 
85  if m.group(2) in 'crs':
86  fieldType = str
87  self.reString += r'(?P<' + fieldName + '>.+)'
88  elif m.group(2) in 'eEfFgG':
89  fieldType = float
90  self.reString += r'(?P<' + fieldName + r'>[\d.eE+-]+)'
91  else:
92  fieldType = int
93  self.reString += r'(?P<' + fieldName + r'>[\d+-]+)'
94 
95  self.fields[fieldName] = dict(pos=pos, fieldType=fieldType)
96  pos += 1
97 
98  self.reString += pathTemplate[last:]
99 
100  def getFields(self):
101  """Return the list of fields that will be returned from matched
102  paths, in order."""
103 
104  fieldList = ["" for i in range(len(self.fields))]
105  for f in list(self.fields.keys()):
106  fieldList[self.fields[f]['pos']] = f
107  return fieldList
108 
109  def isNumeric(self, name):
110  """Return true if the given field contains a number."""
111 
112  return self.fields[name]['fieldType'] in (float, int)
113 
114  def isInt(self, name):
115  """Return true if the given field contains an integer."""
116 
117  return self.fields[name]['fieldType'] == int
118 
119  def isFloat(self, name):
120  """Return true if the given field contains an float."""
121 
122  return self.fields[name]['fieldType'] == float
123 
124  def processPath(self, location):
125  """
126  Scan a given path location. Return info about paths that conform to the path template:
127  :param location:
128  :return: Path info: {path: {key:value ...}, ...} e.g.:
129  {'0239622/instcal0239622.fits.fz': {'visit_0': 239622, 'visit': 239622}}
130  """
131  ret = {}
132  curdir = os.getcwd()
133  os.chdir(location)
134  pathList = glob.glob(self.globString)
135  for path in pathList:
136  m = re.search(self.reString, path)
137  if m:
138  dataId = m.groupdict()
139  for f in self.fields:
140  if self.isInt(f):
141  dataId[f] = int(dataId[f])
142  elif self.isFloat(f):
143  dataId[f] = float(dataId[f])
144  ret[path] = dataId
145  else:
146  print("Warning: unmatched path: %s" % (path,), file=sys.stderr)
147  os.chdir(curdir)
148  return ret
def __init__(self, pathTemplate)
Definition: fsScanner.py:42
daf::base::PropertyList * list
Definition: fits.cc:885