LSSTApplications  11.0-13-gbb96280,12.1+18,12.1+7,12.1-1-g14f38d3+72,12.1-1-g16c0db7+5,12.1-1-g5961e7a+84,12.1-1-ge22e12b+23,12.1-11-g06625e2+4,12.1-11-g0d7f63b+4,12.1-19-gd507bfc,12.1-2-g7dda0ab+38,12.1-2-gc0bc6ab+81,12.1-21-g6ffe579+2,12.1-21-gbdb6c2a+4,12.1-24-g941c398+5,12.1-3-g57f6835+7,12.1-3-gf0736f3,12.1-37-g3ddd237,12.1-4-gf46015e+5,12.1-5-g06c326c+20,12.1-5-g648ee80+3,12.1-5-gc2189d7+4,12.1-6-ga608fc0+1,12.1-7-g3349e2a+5,12.1-7-gfd75620+9,12.1-9-g577b946+5,12.1-9-gc4df26a+10
LSSTDataManagementBasePackage
registries.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008, 2009, 2010 LSST Corporation.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 
23 """This module provides registry classes for maintaining dataset metadata
24 for use by the Data Butler. Currently only a SQLite3-based registry is
25 implemented, but registries based on a text file, a policy file, a MySQL
26 (or other) relational database, and data gathered from scanning a filesystem
27 are all anticipated.
28 
29 Currently this module assumes posix access (for both PosixRegistry AND
30 SqliteRegistry). It is possible that it can be factored so that at least the
31 SqliteRegistry can be remote/not on the local filesystem. For now this module
32 is only used by CameraMapper and by PosixStorage, both of which work on the
33 local filesystem only, so this works for the time being.
34 """
35 from __future__ import absolute_import
36 from builtins import object
37 from past.builtins import basestring
38 
39 import copy
40 from . import fsScanner, sequencify
41 import os
42 import astropy.io.fits
43 import re
44 try:
45  import sqlite3
46  haveSqlite3 = True
47 except ImportError:
48  try:
49  # try external pysqlite package; deprecated
50  import sqlite as sqlite3
51  haveSqlite3 = True
52  except ImportError:
53  haveSqlite3 = False
54 
55 
56 class Registry(object):
57  """The registry base class."""
58 
59  def __init__(self):
60  pass
61 
62  @staticmethod
63  def create(location):
64  """Create a registry object of an appropriate type.
65  @param location (string) Path or URL for registry, or None if
66  unavailable"""
67 
68  if location is None:
69  return
70 
71  # if re.match(r'.*\.registry', location):
72  # return FileRegistry(location)
73  # if re.match(r'.*\.paf', location):
74  # return CalibRegistry(location)
75 
76  # look for an sqlite3 registry
77  if haveSqlite3 and re.match(r'.*\.sqlite3', location):
78  registry = SqliteRegistry(location)
79  if registry.conn is None:
80  return None
81  return registry
82 
83  # if re.match(r'mysql:', location):
84  # return DbRegistry(location)
85  # return FsRegistry(location)
86 
87  # next try to create a PosixRegistry
88  if os.path.exists(location):
89  return PosixRegistry(root=location)
90 
91  raise RuntimeError("Unable to create registry using location: " + location)
92 
93 
95  """A glob-based filesystem registry"""
96 
97  def __init__(self, root):
98  Registry.__init__(self)
99  self.root = root
100 
101  @staticmethod
102  def getHduNumber(template, dataId):
103  """Looks up the HDU number for a given template+dataId.
104  :param template: template with HDU specifier (ends with brackets and an
105  identifier that can be populated by a key-value pair in dataId.
106  e.g. "%(visit)07d/instcal%(visit)07d.fits.fz[%(ccdnum)d]"
107  :param dataId: dictionary that hopefully has a key-value pair whose key
108  matches (has the same name) as the key specifier in the template.
109  :return: the HDU specified by the template+dataId pair, or None if the
110  HDU can not be determined.
111  """
112  # sanity check that the template at least ends with a brace.
113  if not template.endswith(']'):
114  return None
115 
116  # get the key (with formatting) out of the brances
117  hduKey = template[template.rfind('[') + 1:template.rfind(']')]
118  # extract the key name from the formatting
119  hduKey = hduKey[hduKey.rfind('(') + 1:hduKey.rfind(')')]
120 
121  if hduKey in dataId:
122  return dataId[hduKey]
123  return None
124 
125  class LookupData(object):
126 
127  def __init__(self, lookupProperties, dataId):
128  self.dataId = copy.copy(dataId)
129  lookupProperties = sequencify(lookupProperties)
130  self.lookupProperties = copy.copy(lookupProperties)
131  self.foundItems = {}
132  self.cachedStatus = None
133  self.neededKeys = set(lookupProperties).union(dataId.keys())
134 
135  def __repr__(self):
136  return "LookupData lookupProperties:%s dataId:%s foundItems:%s cachedStatus:%s" % \
137  (self.lookupProperties, self.dataId, self.foundItems, self.cachedStatus)
138 
139  def status(self):
140  """Query the lookup status
141 
142  :return: 'match' if the key+value pairs in dataId have been satisifed and keys in
143  lookupProperties have found and their key+value added to resolvedId
144  'incomplete' if the found data matches but not all keys in lookupProperties have been matched
145  'not match' if data in foundId does not match data in dataId
146  """
147  class NotFound:
148  """Placeholder class for item not found.
149 
150  (None might be a valid value so we don't want to use that)
151  """
152  pass
153 
154  if self.cachedStatus is not None:
155  return self.cachedStatus
156  self.cachedStatus = 'match'
157  for key in self.lookupProperties:
158  val = self.foundItems.get(key, NotFound)
159  if val is NotFound:
160  self.cachedStatus = 'incomplete'
161  break
162  for dataIdKey, dataIdValue in self.dataId.items():
163  foundValue = self.foundItems.get(dataIdKey, NotFound)
164  if foundValue is not NotFound and foundValue != dataIdValue:
165  self.cachedStatus = 'notMatch'
166  break
167  return self.cachedStatus
168 
169  def setFoundItems(self, items):
170  self.cachedStatus = None
171  self.foundItems = items
172 
173  def addFoundItems(self, items):
174  self.cachedStatus = None
175  self.foundItems.update(items)
176 
177  def getMissingKeys(self):
178  return self.neededKeys - set(self.foundItems.keys())
179 
180  def lookup(self, lookupProperties, reference, dataId, **kwargs):
181  """Perform a lookup in the registry.
182 
183  Return values are refined by the values in dataId.
184  Returns a list of values that match keys in lookupProperties.
185  e.g. if the template is 'raw/raw_v%(visit)d_f%(filter)s.fits.gz', and
186  dataId={'visit':1}, and lookupProperties is ['filter'], and the
187  filesystem under self.root has exactly one file 'raw/raw_v1_fg.fits.gz'
188  then the return value will be [('g',)]
189 
190  :param lookupProperties: keys whose values will be returned.
191  :param reference: other data types that may be used to search for values.
192  :param dataId: must be an iterable. Keys must be string.
193  If value is a string then will look for elements in the repository that match value for key.
194  If value is a 2-item iterable then will look for elements in the repository are between (inclusive)
195  the first and second items in the value.
196  :param **kwargs: keys required for the posix registry to search for items. If required keys are not
197  provide will return an empty list.
198  'template': required. template parameter (typically from a policy) that can be used to look for files
199  'storage': optional. Needed to look for metadata in files. Currently supported values: 'FitsStorage'.
200  :return: a list of values that match keys in lookupProperties.
201  """
202  # required kwargs:
203  if 'template' in kwargs:
204  template = kwargs['template']
205  else:
206  return []
207  # optional kwargs:
208  storage = kwargs['storage'] if 'storage' in kwargs else None
209 
210  lookupData = PosixRegistry.LookupData(lookupProperties, dataId)
211  scanner = fsScanner.FsScanner(template)
212  allPaths = scanner.processPath(self.root)
213  retItems = [] # one item for each found file that matches
214  for path, foundProperties in allPaths.items():
215  # check for dataId keys that are not present in found properties
216  # search for those keys in metadata of file at path
217  # if present, check for matching values
218  # if not present, file can not match, do not use it.
219  lookupData.setFoundItems(foundProperties)
220  if 'incomplete' == lookupData.status():
221  PosixRegistry.lookupMetadata(os.path.join(self.root, path), template, lookupData, storage)
222  if 'match' == lookupData.status():
223  l = tuple(lookupData.foundItems[key] for key in lookupData.lookupProperties)
224  retItems.append(l)
225  return retItems
226 
227  @staticmethod
228  def lookupMetadata(filepath, template, lookupData, storage):
229  """Dispatcher for looking up metadata in a file of a given storage type
230  """
231  if storage == 'FitsStorage':
232  PosixRegistry.lookupFitsMetadata(filepath, template, lookupData, storage)
233 
234  @staticmethod
235  def lookupFitsMetadata(filepath, template, lookupData, dataId):
236  """Look up metadata in a fits file.
237  Will try to discover the correct HDU to look in by testing if the
238  template has a value in brackets at the end.
239  If the HDU is specified but the metadata key is not discovered in
240  that HDU, will look in the primary HDU before giving up.
241  :param filepath: path to the file
242  :param template: template that was used to discover the file. This can
243  be used to look up the correct HDU as needed.
244  :param lookupData: an instance if LookupData that contains the
245  lookupProperties, the dataId, and the data that has been found so far.
246  Will be updated with new information as discovered.
247  :param dataId:
248  :return:
249  """
250  try:
251  hdulist = astropy.io.fits.open(filepath, memmap=True)
252  except IOError:
253  return
254  hduNumber = PosixRegistry.getHduNumber(template=template, dataId=dataId)
255  if hduNumber is not None and hduNumber < len(hdulist):
256  hdu = hdulist[hduNumber]
257  else:
258  hdu = None
259  if len(hdulist) > 0:
260  primaryHdu = hdulist[0]
261  else:
262  primaryHdu = None
263 
264  for property in lookupData.getMissingKeys():
265  propertyValue = None
266  if hdu is not None and property in hdu.header:
267  propertyValue = hdu.header[property]
268  # if the value is not in the indicated HDU, try the primary HDU:
269  elif primaryHdu is not None and property in primaryHdu.header:
270  propertyValue = primaryHdu.header[property]
271  lookupData.addFoundItems({property: propertyValue})
272 
273 
275  """A SQLite3-based registry."""
276 
277  def __init__(self, location):
278  """Constructor.
279  @param location (string) Path to SQLite3 file"""
280 
281  Registry.__init__(self)
282  if os.path.exists(location):
283  self.conn = sqlite3.connect(location)
284  self.conn.text_factory = str
285  else:
286  self.conn = None
287 
288  def lookup(self, lookupProperties, reference, dataId, **kwargs):
289  """Perform a lookup in the registry.
290 
291  Return values are refined by the values in dataId.
292  Returns a list of values that match keys in lookupProperties.
293  e.g. if the template is 'raw/raw_v%(visit)d_f%(filter)s.fits.gz', and
294  dataId={'visit':1}, and lookupProperties is ['filter'], and the
295  filesystem under self.root has exactly one file 'raw/raw_v1_fg.fits.gz'
296  then the return value will be [('g',)]
297 
298  :param lookupProperties:
299  :param dataId: must be an iterable. Keys must be string.
300  If key is a string then will look for elements in the repository that match value for key.
301  If key is a 2-item iterable then will look for elements in the repository where the value is between
302  the values of key[0] and key[1].
303  :param reference: other data types that may be used to search for values.
304  :param **kwargs: nothing needed for sqlite lookup
305  :return: a list of values that match keys in lookupProperties.
306  """
307  if not self.conn:
308  return None
309 
310  # input variable sanitization:
311  reference = sequencify(reference)
312  lookupProperties = sequencify(lookupProperties)
313 
314  cmd = "SELECT DISTINCT "
315  cmd += ", ".join(lookupProperties)
316  cmd += " FROM " + " NATURAL JOIN ".join(reference)
317  valueList = []
318  if dataId is not None and len(dataId) > 0:
319  whereList = []
320  for k, v in dataId.items():
321  if hasattr(k, '__iter__') and not isinstance(k, basestring):
322  if len(k) != 2:
323  raise RuntimeError("Wrong number of keys for range:%s" % (k,))
324  whereList.append("(? BETWEEN %s AND %s)" % (k[0], k[1]))
325  valueList.append(v)
326  else:
327  whereList.append("%s = ?" % k)
328  valueList.append(v)
329  cmd += " WHERE " + " AND ".join(whereList)
330  c = self.conn.execute(cmd, valueList)
331  result = []
332  for row in c:
333  result.append(row)
334  return result
335 
336  def executeQuery(self, returnFields, joinClause, whereFields, range, values):
337  """Extract metadata from the registry.
338  @param returnFields (list of strings) Metadata fields to be extracted.
339  @param joinClause (list of strings) Tables in which metadata fields
340  are located.
341  @param whereFields (list of tuples) First tuple element is metadata
342  field to query; second is the value that field
343  must have (often '?').
344  @param range (tuple) Value, lower limit, and upper limit for a
345  range condition on the metadata. Any of these can
346  be metadata fields.
347  @param values (tuple) Tuple of values to be substituted for '?'
348  characters in the whereFields values or the range
349  values.
350  @return (list of tuples) All sets of field values that meet the
351  criteria"""
352  if not self.conn:
353  return None
354  cmd = "SELECT DISTINCT "
355  cmd += ", ".join(returnFields)
356  cmd += " FROM " + " NATURAL JOIN ".join(joinClause)
357  whereList = []
358  if whereFields:
359  for k, v in whereFields:
360  whereList.append("(%s = %s)" % (k, v))
361  if range is not None:
362  whereList.append("(%s BETWEEN %s AND %s)" % range)
363  if len(whereList) > 0:
364  cmd += " WHERE " + " AND ".join(whereList)
365  c = self.conn.execute(cmd, values)
366  result = []
367  for row in c:
368  result.append(row)
369  return result