LSST Applications  21.0.0-147-g0e635eb1+1acddb5be5,22.0.0+052faf71bd,22.0.0+1ea9a8b2b2,22.0.0+6312710a6c,22.0.0+729191ecac,22.0.0+7589c3a021,22.0.0+9f079a9461,22.0.1-1-g7d6de66+b8044ec9de,22.0.1-1-g87000a6+536b1ee016,22.0.1-1-g8e32f31+6312710a6c,22.0.1-10-gd060f87+016f7cdc03,22.0.1-12-g9c3108e+df145f6f68,22.0.1-16-g314fa6d+c825727ab8,22.0.1-19-g93a5c75+d23f2fb6d8,22.0.1-19-gb93eaa13+aab3ef7709,22.0.1-2-g8ef0a89+b8044ec9de,22.0.1-2-g92698f7+9f079a9461,22.0.1-2-ga9b0f51+052faf71bd,22.0.1-2-gac51dbf+052faf71bd,22.0.1-2-gb66926d+6312710a6c,22.0.1-2-gcb770ba+09e3807989,22.0.1-20-g32debb5+b8044ec9de,22.0.1-23-gc2439a9a+fb0756638e,22.0.1-3-g496fd5d+09117f784f,22.0.1-3-g59f966b+1e6ba2c031,22.0.1-3-g849a1b8+f8b568069f,22.0.1-3-gaaec9c0+c5c846a8b1,22.0.1-32-g5ddfab5d3+60ce4897b0,22.0.1-4-g037fbe1+64e601228d,22.0.1-4-g8623105+b8044ec9de,22.0.1-5-g096abc9+d18c45d440,22.0.1-5-g15c806e+57f5c03693,22.0.1-7-gba73697+57f5c03693,master-g6e05de7fdc+c1283a92b8,master-g72cdda8301+729191ecac,w.2021.39
LSST Data Management Base Package
registries.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008, 2009, 2010 LSST Corporation.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 
23 """This module provides registry classes for maintaining dataset metadata
24 for use by the Data Butler. Currently only a SQLite3-based registry is
25 implemented, but registries based on a text file, a policy file, a MySQL
26 (or other) relational database, and data gathered from scanning a filesystem
27 are all anticipated.
28 
29 Currently this module assumes posix access (for both PosixRegistry AND
30 SqliteRegistry). It is possible that it can be factored so that at least the
31 SqliteRegistry can be remote/not on the local filesystem. For now this module
32 is only used by CameraMapper and by PosixStorage, both of which work on the
33 local filesystem only, so this works for the time being.
34 """
35 import copy
36 from . import fsScanner, sequencify
37 import os
38 import astropy.io.fits
39 import re
40 import yaml
41 
42 try:
43  import sqlite3
44  haveSqlite3 = True
45 except ImportError:
46  try:
47  # try external pysqlite package; deprecated
48  import sqlite as sqlite3
49  haveSqlite3 = True
50  except ImportError:
51  haveSqlite3 = False
52 
53 # PostgreSQL support
54 try:
55  import psycopg2 as pgsql
56  havePgsql = True
57 except ImportError:
58  havePgsql = False
59 
60 
61 class Registry:
62  """The registry base class."""
63 
64  def __init__(self):
65  pass
66 
67  def __del__(self):
68  pass
69 
70  @staticmethod
71  def create(location):
72  """Create a registry object of an appropriate type.
73  @param location (string) Path or URL for registry, or None if
74  unavailable"""
75 
76  if location is None:
77  return
78 
79  # if re.match(r'.*\.registry', location):
80  # return FileRegistry(location)
81 
82  if location.endswith(".pgsql"):
83  return PgsqlRegistry(location)
84 
85  # look for an sqlite3 registry
86  if re.match(r'.*\.sqlite3', location):
87  if not haveSqlite3:
88  raise RuntimeError("sqlite3 registry specified (%s), but unable to import sqlite3 module" %
89  (location,))
90  registry = SqliteRegistry(location)
91  if registry.conn is None:
92  return None
93  return registry
94 
95  # if re.match(r'mysql:', location):
96  # return DbRegistry(location)
97  # return FsRegistry(location)
98 
99  # next try to create a PosixRegistry
100  if os.path.isdir(location):
101  return PosixRegistry(root=location)
102 
103  raise RuntimeError("Unable to create registry using location: " + location)
104 
105 
107  """A glob-based filesystem registry"""
108 
109  def __init__(self, root):
110  Registry.__init__(self)
111  self.rootroot = root
112 
113  @staticmethod
114  def getHduNumber(template, dataId):
115  """Looks up the HDU number for a given template+dataId.
116  :param template: template with HDU specifier (ends with brackets and an
117  identifier that can be populated by a key-value pair in dataId.
118  e.g. "%(visit)07d/instcal%(visit)07d.fits.fz[%(ccdnum)d]"
119  :param dataId: dictionary that hopefully has a key-value pair whose key
120  matches (has the same name) as the key specifier in the template.
121  :return: the HDU specified by the template+dataId pair, or None if the
122  HDU can not be determined.
123  """
124  # sanity check that the template at least ends with a brace.
125  if not template.endswith(']'):
126  return None
127 
128  # get the key (with formatting) out of the brances
129  hduKey = template[template.rfind('[') + 1:template.rfind(']')]
130  # extract the key name from the formatting
131  hduKey = hduKey[hduKey.rfind('(') + 1:hduKey.rfind(')')]
132 
133  if hduKey in dataId:
134  return dataId[hduKey]
135  return None
136 
137  class LookupData:
138 
139  def __init__(self, lookupProperties, dataId):
140  self.dataIddataId = copy.copy(dataId)
141  lookupProperties = sequencify(lookupProperties)
142  self.lookupPropertieslookupProperties = copy.copy(lookupProperties)
143  self.foundItemsfoundItems = {}
144  self.cachedStatuscachedStatus = None
145  self.neededKeysneededKeys = set(lookupProperties).union(dataId.keys())
146 
147  def __repr__(self):
148  return "LookupData lookupProperties:%s dataId:%s foundItems:%s cachedStatus:%s" % \
149  (self.lookupPropertieslookupProperties, self.dataIddataId, self.foundItemsfoundItems, self.cachedStatuscachedStatus)
150 
151  def status(self):
152  """Query the lookup status
153 
154  :return: 'match' if the key+value pairs in dataId have been satisifed and keys in
155  lookupProperties have found and their key+value added to resolvedId
156  'incomplete' if the found data matches but not all keys in lookupProperties have been matched
157  'not match' if data in foundId does not match data in dataId
158  """
159  class NotFound:
160  """Placeholder class for item not found.
161 
162  (None might be a valid value so we don't want to use that)
163  """
164  pass
165 
166  if self.cachedStatus is not None:
167  return self.cachedStatus
168  self.cachedStatus = 'match'
169  for key in self.lookupProperties:
170  val = self.foundItems.get(key, NotFound)
171  if val is NotFound:
172  self.cachedStatus = 'incomplete'
173  break
174  for dataIdKey, dataIdValue in self.dataId.items():
175  foundValue = self.foundItems.get(dataIdKey, NotFound)
176  if foundValue is not NotFound and foundValue != dataIdValue:
177  self.cachedStatus = 'notMatch'
178  break
179  return self.cachedStatus
180 
181  def setFoundItems(self, items):
182  self.cachedStatuscachedStatus = None
183  self.foundItemsfoundItems = items
184 
185  def addFoundItems(self, items):
186  self.cachedStatuscachedStatus = None
187  self.foundItemsfoundItems.update(items)
188 
189  def getMissingKeys(self):
190  return self.neededKeysneededKeys - set(self.foundItemsfoundItems.keys())
191 
192  def lookup(self, lookupProperties, reference, dataId, **kwargs):
193  """Perform a lookup in the registry.
194 
195  Return values are refined by the values in dataId.
196  Returns a list of values that match keys in lookupProperties.
197  e.g. if the template is 'raw/raw_v%(visit)d_f%(filter)s.fits.gz', and
198  dataId={'visit':1}, and lookupProperties is ['filter'], and the
199  filesystem under self.root has exactly one file 'raw/raw_v1_fg.fits.gz'
200  then the return value will be [('g',)]
201 
202  :param lookupProperties: keys whose values will be returned.
203  :param reference: other data types that may be used to search for values.
204  :param dataId: must be an iterable. Keys must be string.
205  If value is a string then will look for elements in the repository that match value for key.
206  If value is a 2-item iterable then will look for elements in the repository are between (inclusive)
207  the first and second items in the value.
208  :param **kwargs: keys required for the posix registry to search for items. If required keys are not
209  provide will return an empty list.
210  'template': required. template parameter (typically from a policy) that can be used to look for files
211  'storage': optional. Needed to look for metadata in files. Currently supported values: 'FitsStorage'.
212  :return: a list of values that match keys in lookupProperties.
213  """
214  # required kwargs:
215  if 'template' in kwargs:
216  template = kwargs['template']
217  else:
218  return []
219  # optional kwargs:
220  storage = kwargs['storage'] if 'storage' in kwargs else None
221 
222  lookupData = PosixRegistry.LookupData(lookupProperties, dataId)
223  scanner = fsScanner.FsScanner(template)
224  allPaths = scanner.processPath(self.rootroot)
225  retItems = [] # one item for each found file that matches
226  for path, foundProperties in allPaths.items():
227  # check for dataId keys that are not present in found properties
228  # search for those keys in metadata of file at path
229  # if present, check for matching values
230  # if not present, file can not match, do not use it.
231  lookupData.setFoundItems(foundProperties)
232  if 'incomplete' == lookupData.status():
233  PosixRegistry.lookupMetadata(os.path.join(self.rootroot, path), template, lookupData, storage)
234  if 'match' == lookupData.status():
235  ll = tuple(lookupData.foundItems[key] for key in lookupData.lookupProperties)
236  retItems.append(ll)
237  return retItems
238 
239  @staticmethod
240  def lookupMetadata(filepath, template, lookupData, storage):
241  """Dispatcher for looking up metadata in a file of a given storage type
242  """
243  if storage == 'FitsStorage':
244  PosixRegistry.lookupFitsMetadata(filepath, template, lookupData, storage)
245 
246  @staticmethod
247  def lookupFitsMetadata(filepath, template, lookupData, dataId):
248  """Look up metadata in a fits file.
249  Will try to discover the correct HDU to look in by testing if the
250  template has a value in brackets at the end.
251  If the HDU is specified but the metadata key is not discovered in
252  that HDU, will look in the primary HDU before giving up.
253  :param filepath: path to the file
254  :param template: template that was used to discover the file. This can
255  be used to look up the correct HDU as needed.
256  :param lookupData: an instance if LookupData that contains the
257  lookupProperties, the dataId, and the data that has been found so far.
258  Will be updated with new information as discovered.
259  :param dataId:
260  :return:
261  """
262  try:
263  hdulist = astropy.io.fits.open(filepath, memmap=True)
264  except IOError:
265  return
266  hduNumber = PosixRegistry.getHduNumber(template=template, dataId=dataId)
267  if hduNumber is not None and hduNumber < len(hdulist):
268  hdu = hdulist[hduNumber]
269  else:
270  hdu = None
271  if len(hdulist) > 0:
272  primaryHdu = hdulist[0]
273  else:
274  primaryHdu = None
275 
276  for property in lookupData.getMissingKeys():
277  propertyValue = None
278  if hdu is not None and property in hdu.header:
279  propertyValue = hdu.header[property]
280  # if the value is not in the indicated HDU, try the primary HDU:
281  elif primaryHdu is not None and property in primaryHdu.header:
282  propertyValue = primaryHdu.header[property]
283  lookupData.addFoundItems({property: propertyValue})
284 
285 
287  """A base class for SQL-based registries
288 
289  Subclasses should define the class variable `placeHolder` (the particular
290  placeholder to use for parameter substitution) appropriately. The
291  database's python module should define `paramstyle` (see PEP 249), which
292  would indicate what to use for a placeholder:
293  * paramstyle = "qmark" --> placeHolder = "?"
294  * paramstyle = "format" --> placeHolder = "%s"
295  Other `paramstyle` values are not currently supported.
296 
297  Constructor parameters
298  ----------------------
299  conn : DBAPI connection object
300  Connection object
301  """
302  placeHolder = "?" # Placeholder for parameter substitution
303 
304  def __init__(self, conn):
305  """Constructor.
306 
307  Parameters
308  ----------
309  conn : DBAPI connection object
310  Connection object
311  """
312  Registry.__init__(self)
313  self.connconn = conn
314 
315  def __del__(self):
316  if hasattr(self, "conn") and self.connconn:
317  self.connconn.close()
318  super().__del__()
319 
320  def _lookup(self, lookupProperties, dataId, reference, checkColumns=False):
321  """Perform a lookup in the registry.
322 
323  This is the worker code for cls.lookup with the added option of checking
324  that all the columns being looked up are in the database. The classic
325  example here is adding a template with an hdu, where the hdu in the dataId
326  prevents us looking up e.g. dateObs. checkColumns results in a performance
327  penalty, so is only invoked when a problem in the dataId keys has been seen
328 
329  Return values are refined by the values in dataId.
330  Returns a list of values that match keys in lookupProperties.
331  e.g. if the template is 'raw/raw_v%(visit)d_f%(filter)s.fits.gz', and
332  dataId={'visit':1}, and lookupProperties is ['filter'], and the
333  filesystem under self.root has exactly one file 'raw/raw_v1_fg.fits.gz'
334  then the return value will be [('g',)]
335 
336  :param lookupProperties:
337  :param dataId: must be a key/value iterable. Keys must be string.
338  See `SqlRegistry.lookup` for further details
339  :param reference: other data types that may be used to search for values.
340  :param checkColumns: if True, check that keys are actually in the registry and ignore them if not
341  :return: a list of values that match keys in lookupProperties.
342  """
343  cmd = "SELECT DISTINCT "
344  cmd += ", ".join(lookupProperties)
345  cmd += " FROM " + " NATURAL JOIN ".join(reference)
346  valueList = []
347  if dataId is not None and len(dataId) > 0:
348  whereList = []
349  for k, v in dataId.items():
350  if checkColumns: # check if k is in registry
351  try:
352  self.connconn.cursor().execute(
353  f'SELECT {k} FROM {" NATURAL JOIN ".join(reference)} LIMIT 1')
354  except sqlite3.OperationalError:
355  continue
356 
357  if hasattr(k, '__iter__') and not isinstance(k, str):
358  if len(k) != 2:
359  raise RuntimeError("Wrong number of keys for range:%s" % (k,))
360  whereList.append("(%s BETWEEN %s AND %s)" % (self.placeHolderplaceHolder, k[0], k[1]))
361  valueList.append(v)
362  else:
363  whereList.append("%s = %s" % (k, self.placeHolderplaceHolder))
364  valueList.append(v)
365  cmd += " WHERE " + " AND ".join(whereList)
366  cursor = self.connconn.cursor()
367  cursor.execute(cmd, valueList)
368  return [row for row in cursor.fetchall()]
369 
370  def lookup(self, lookupProperties, reference, dataId, **kwargs):
371  """Perform a lookup in the registry.
372 
373  Return values are refined by the values in dataId.
374  Returns a list of values that match keys in lookupProperties.
375  e.g. if the template is 'raw/raw_v%(visit)d_f%(filter)s.fits.gz', and
376  dataId={'visit':1}, and lookupProperties is ['filter'], and the
377  filesystem under self.root has exactly one file 'raw/raw_v1_fg.fits.gz'
378  then the return value will be [('g',)]
379 
380  :param lookupProperties:
381  :param dataId: must be a key/value iterable. Keys must be string.
382  If value is a string then will look for elements in the repository that match value for value.
383  If value is a 2-item iterable then will look for elements in the repository where the value is between
384  the values of value[0] and value[1].
385  :param reference: other data types that may be used to search for values.
386  :param **kwargs: nothing needed for sqlite lookup
387  :return: a list of values that match keys in lookupProperties.
388  """
389  if not self.connconn:
390  return None
391 
392  # input variable sanitization:
393  reference = sequencify(reference)
394  lookupProperties = sequencify(lookupProperties)
395 
396  try:
397  return self._lookup_lookup(lookupProperties, dataId, reference)
398  except sqlite3.OperationalError: # try again, with extra checking of the dataId keys
399  return self._lookup_lookup(lookupProperties, dataId, reference, checkColumns=True)
400 
401  def executeQuery(self, returnFields, joinClause, whereFields, range, values):
402  """Extract metadata from the registry.
403  @param returnFields (list of strings) Metadata fields to be extracted.
404  @param joinClause (list of strings) Tables in which metadata fields
405  are located.
406  @param whereFields (list of tuples) First tuple element is metadata
407  field to query; second is the value that field
408  must have (often '?').
409  @param range (tuple) Value, lower limit, and upper limit for a
410  range condition on the metadata. Any of these can
411  be metadata fields.
412  @param values (tuple) Tuple of values to be substituted for '?'
413  characters in the whereFields values or the range
414  values.
415  @return (list of tuples) All sets of field values that meet the
416  criteria"""
417  if not self.connconn:
418  return None
419  cmd = "SELECT DISTINCT "
420  cmd += ", ".join(returnFields)
421  cmd += " FROM " + " NATURAL JOIN ".join(joinClause)
422  whereList = []
423  if whereFields:
424  for k, v in whereFields:
425  whereList.append("(%s = %s)" % (k, v))
426  if range is not None:
427  whereList.append("(%s BETWEEN %s AND %s)" % range)
428  if len(whereList) > 0:
429  cmd += " WHERE " + " AND ".join(whereList)
430  cursor = self.connconn.cursor()
431  cursor.execute(cmd, values)
432  return [row for row in cursor.fetchall()]
433 
434 
436  """A SQLite-based registry"""
437  placeHolder = "?" # Placeholder for parameter substitution
438 
439  def __init__(self, location):
440  """Constructor
441 
442  Parameters
443  ----------
444  location : `str`
445  Path to SQLite3 file
446  """
447  if os.path.exists(location):
448  conn = sqlite3.connect(location)
449  conn.text_factory = str
450  self.rootroot = location
451  else:
452  conn = None
453  SqlRegistry.__init__(self, conn)
454 
455 
457  """A PostgreSQL-based registry"""
458  placeHolder = "%s"
459 
460  def __init__(self, location):
461  """Constructor
462 
463  Parameters
464  ----------
465  location : `str`
466  Path to PostgreSQL configuration file.
467  """
468  if not havePgsql:
469  raise RuntimeError("Cannot use PgsqlRegistry: could not import psycopg2")
470  config = self.readYamlreadYaml(location)
471  self._config_config = config
472  conn = pgsql.connect(host=config["host"], port=config["port"], database=config["database"],
473  user=config["user"], password=config["password"])
474  self.rootroot = location
475  SqlRegistry.__init__(self, conn)
476 
477  @staticmethod
478  def readYaml(location):
479  """Read YAML configuration file
480 
481  The YAML configuration file should contain:
482  * host : host name for database connection
483  * port : port for database connection
484  * user : user name for database connection
485  * database : database name
486 
487  It may also contain:
488  * password : password for database connection
489 
490  The optional entries are set to `None` in the output configuration.
491 
492  Parameters
493  ----------
494  location : `str`
495  Path to PostgreSQL YAML config file.
496 
497  Returns
498  -------
499  config : `dict`
500  Configuration
501  """
502  try:
503  # PyYAML >=5.1 prefers a different loader
504  loader = yaml.UnsafeLoader
505  except AttributeError:
506  loader = yaml.Loader
507  with open(location) as ff:
508  data = yaml.load(ff, Loader=loader)
509  requireKeys = set(["host", "port", "database", "user"])
510  optionalKeys = set(["password"])
511  haveKeys = set(data.keys())
512  if haveKeys - optionalKeys != requireKeys:
513  raise RuntimeError(
514  "PostgreSQL YAML configuration (%s) should contain only %s, and may contain 'password', "
515  "but this contains: %s" %
516  (location, ",".join("'%s'" % key for key in requireKeys),
517  ",".join("'%s'" % key for key in data.keys()))
518  )
519  for key in optionalKeys:
520  if key not in data:
521  data[key] = None
522 
523  return data
524 
525  def lookup(self, *args, **kwargs):
526  try:
527  return SqlRegistry.lookup(self, *args, **kwargs)
528  except Exception:
529  self.connconn.rollback()
530  raise
std::vector< SchemaItem< Flag > > * items
def __init__(self, lookupProperties, dataId)
Definition: registries.py:139
def lookupFitsMetadata(filepath, template, lookupData, dataId)
Definition: registries.py:247
def lookup(self, lookupProperties, reference, dataId, **kwargs)
Definition: registries.py:192
def lookupMetadata(filepath, template, lookupData, storage)
Definition: registries.py:240
def lookup(self, lookupProperties, reference, dataId, **kwargs)
Definition: registries.py:370
def _lookup(self, lookupProperties, dataId, reference, checkColumns=False)
Definition: registries.py:320
def executeQuery(self, returnFields, joinClause, whereFields, range, values)
Definition: registries.py:401
daf::base::PropertySet * set
Definition: fits.cc:912