23 """This module provides registry classes for maintaining dataset metadata 
   24 for use by the Data Butler.  Currently only a SQLite3-based registry is 
   25 implemented, but registries based on a text file, a policy file, a MySQL 
   26 (or other) relational database, and data gathered from scanning a filesystem 
   29 Currently this module assumes posix access (for both PosixRegistry AND 
   30 SqliteRegistry). It is possible that it can be factored so that at least the 
   31 SqliteRegistry can be remote/not on the local filesystem. For now this module 
   32 is only used by CameraMapper and by PosixStorage, both of which work on the 
   33 local filesystem only, so this works for the time being. 
   36 from . 
import fsScanner, sequencify
 
   38 import astropy.io.fits
 
   48         import sqlite 
as sqlite3
 
   55     import psycopg2 
as pgsql
 
   62     """The registry base class.""" 
   72         """Create a registry object of an appropriate type. 
   73         @param location (string) Path or URL for registry, or None if 
   84         if location.endswith(
".pgsql"):
 
   88         if re.match(
r'.*\.sqlite3', location):
 
   90                 raise RuntimeError(
"sqlite3 registry specified (%s), but unable to import sqlite3 module" %
 
   93             if registry.conn 
is None:
 
  102         if os.path.isdir(location):
 
  105         raise RuntimeError(
"Unable to create registry using location: " + location)
 
  109     """A glob-based filesystem registry""" 
  112         Registry.__init__(self)
 
  117         """Looks up the HDU number for a given template+dataId. 
  118         :param template: template with HDU specifier (ends with brackets and an 
  119         identifier that can be populated by a key-value pair in dataId. 
  120         e.g. "%(visit)07d/instcal%(visit)07d.fits.fz[%(ccdnum)d]" 
  121         :param dataId: dictionary that hopefully has a key-value pair whose key 
  122         matches (has the same name) as the key specifier in the template. 
  123         :return: the HDU specified by the template+dataId pair, or None if the 
  124         HDU can not be determined. 
  127         if not template.endswith(
']'):
 
  131         hduKey = template[template.rfind(
'[') + 1:template.rfind(
']')]
 
  133         hduKey = hduKey[hduKey.rfind(
'(') + 1:hduKey.rfind(
')')]
 
  136             return dataId[hduKey]
 
  143             lookupProperties = 
sequencify(lookupProperties)
 
  150             return "LookupData lookupProperties:%s dataId:%s foundItems:%s cachedStatus:%s" % \
 
  154             """Query the lookup status 
  156             :return: 'match' if the key+value pairs in dataId have been satisifed and keys in 
  157             lookupProperties have found and their key+value added to resolvedId 
  158             'incomplete' if the found data matches but not all keys in lookupProperties have been matched 
  159             'not match' if data in foundId does not match data in dataId 
  162                 """Placeholder class for item not found. 
  164                 (None might be a valid value so we don't want to use that) 
  168             if self.cachedStatus 
is not None:
 
  169                 return self.cachedStatus
 
  170             self.cachedStatus = 
'match' 
  171             for key 
in self.lookupProperties:
 
  172                 val = self.foundItems.get(key, NotFound)
 
  174                     self.cachedStatus = 
'incomplete' 
  176             for dataIdKey, dataIdValue 
in self.dataId.
items():
 
  177                 foundValue = self.foundItems.get(dataIdKey, NotFound)
 
  178                 if foundValue 
is not NotFound 
and foundValue != dataIdValue:
 
  179                     self.cachedStatus = 
'notMatch' 
  181             return self.cachedStatus
 
  194     def lookup(self, lookupProperties, reference, dataId, **kwargs):
 
  195         """Perform a lookup in the registry. 
  197         Return values are refined by the values in dataId. 
  198         Returns a list of values that match keys in lookupProperties. 
  199         e.g. if the template is 'raw/raw_v%(visit)d_f%(filter)s.fits.gz', and 
  200         dataId={'visit':1}, and lookupProperties is ['filter'], and the 
  201         filesystem under self.root has exactly one file 'raw/raw_v1_fg.fits.gz' 
  202         then the return value will be [('g',)] 
  204         :param lookupProperties: keys whose values will be returned. 
  205         :param reference: other data types that may be used to search for values. 
  206         :param dataId: must be an iterable. Keys must be string. 
  207         If value is a string then will look for elements in the repository that match value for key. 
  208         If value is a 2-item iterable then will look for elements in the repository are between (inclusive) 
  209         the first and second items in the value. 
  210         :param **kwargs: keys required for the posix registry to search for items. If required keys are not 
  211         provide will return an empty list. 
  212         'template': required. template parameter (typically from a policy) that can be used to look for files 
  213         'storage': optional. Needed to look for metadata in files. Currently supported values: 'FitsStorage'. 
  214         :return: a list of values that match keys in lookupProperties. 
  217         if 'template' in kwargs:
 
  218             template = kwargs[
'template']
 
  222         storage = kwargs[
'storage'] 
if 'storage' in kwargs 
else None 
  226         allPaths = scanner.processPath(self.
root)
 
  228         for path, foundProperties 
in allPaths.items():
 
  233             lookupData.setFoundItems(foundProperties)
 
  234             if 'incomplete' == lookupData.status():
 
  235                 PosixRegistry.lookupMetadata(os.path.join(self.
root, path), template, lookupData, storage)
 
  236             if 'match' == lookupData.status():
 
  237                 ll = tuple(lookupData.foundItems[key] 
for key 
in lookupData.lookupProperties)
 
  243         """Dispatcher for looking up metadata in a file of a given storage type 
  245         if storage == 
'FitsStorage':
 
  246             PosixRegistry.lookupFitsMetadata(filepath, template, lookupData, storage)
 
  250         """Look up metadata in a fits file. 
  251         Will try to discover the correct HDU to look in by testing if the 
  252         template has a value in brackets at the end. 
  253         If the HDU is specified but the metadata key is not discovered in 
  254         that HDU, will look in the primary HDU before giving up. 
  255         :param filepath: path to the file 
  256         :param template: template that was used to discover the file. This can 
  257         be used to look up the correct HDU as needed. 
  258         :param lookupData: an instance if LookupData that contains the 
  259         lookupProperties, the dataId, and the data that has been found so far. 
  260         Will be updated with new information as discovered. 
  265             hdulist = astropy.io.fits.open(filepath, memmap=
True)
 
  268         hduNumber = PosixRegistry.getHduNumber(template=template, dataId=dataId)
 
  269         if hduNumber 
is not None and hduNumber < len(hdulist):
 
  270             hdu = hdulist[hduNumber]
 
  274             primaryHdu = hdulist[0]
 
  278         for property 
in lookupData.getMissingKeys():
 
  280             if hdu 
is not None and property 
in hdu.header:
 
  281                 propertyValue = hdu.header[property]
 
  283             elif primaryHdu 
is not None and property 
in primaryHdu.header:
 
  284                 propertyValue = primaryHdu.header[property]
 
  285             lookupData.addFoundItems({property: propertyValue})
 
  289     """A base class for SQL-based registries 
  291     Subclasses should define the class variable `placeHolder` (the particular 
  292     placeholder to use for parameter substitution) appropriately. The 
  293     database's python module should define `paramstyle` (see PEP 249), which 
  294     would indicate what to use for a placeholder: 
  295     * paramstyle = "qmark" --> placeHolder = "?" 
  296     * paramstyle = "format" --> placeHolder = "%s" 
  297     Other `paramstyle` values are not currently supported. 
  299     Constructor parameters 
  300     ---------------------- 
  301     conn : DBAPI connection object 
  311         conn : DBAPI connection object 
  314         Registry.__init__(self)
 
  318         if hasattr(self, 
"conn") 
and self.
conn:
 
  322     def lookup(self, lookupProperties, reference, dataId, **kwargs):
 
  323         """Perform a lookup in the registry. 
  325         Return values are refined by the values in dataId. 
  326         Returns a list of values that match keys in lookupProperties. 
  327         e.g. if the template is 'raw/raw_v%(visit)d_f%(filter)s.fits.gz', and 
  328         dataId={'visit':1}, and lookupProperties is ['filter'], and the 
  329         filesystem under self.root has exactly one file 'raw/raw_v1_fg.fits.gz' 
  330         then the return value will be [('g',)] 
  332         :param lookupProperties: 
  333         :param dataId: must be an iterable. Keys must be string. 
  334         If key is a string then will look for elements in the repository that match value for key. 
  335         If key is a 2-item iterable then will look for elements in the repository where the value is between 
  336         the values of key[0] and key[1]. 
  337         :param reference: other data types that may be used to search for values. 
  338         :param **kwargs: nothing needed for sqlite lookup 
  339         :return: a list of values that match keys in lookupProperties. 
  346         lookupProperties = 
sequencify(lookupProperties)
 
  348         cmd = 
"SELECT DISTINCT " 
  349         cmd += 
", ".join(lookupProperties)
 
  350         cmd += 
" FROM " + 
" NATURAL JOIN ".join(reference)
 
  352         if dataId 
is not None and len(dataId) > 0:
 
  354             for k, v 
in dataId.items():
 
  355                 if hasattr(k, 
'__iter__') 
and not isinstance(k, str):
 
  357                         raise RuntimeError(
"Wrong number of keys for range:%s" % (k,))
 
  358                     whereList.append(
"(%s BETWEEN %s AND %s)" % (self.
placeHolder, k[0], k[1]))
 
  361                     whereList.append(
"%s = %s" % (k, self.
placeHolder))
 
  363             cmd += 
" WHERE " + 
" AND ".join(whereList)
 
  364         cursor = self.
conn.cursor()
 
  365         cursor.execute(cmd, valueList)
 
  366         return [row 
for row 
in cursor.fetchall()]
 
  368     def executeQuery(self, returnFields, joinClause, whereFields, range, values):
 
  369         """Extract metadata from the registry. 
  370         @param returnFields (list of strings) Metadata fields to be extracted. 
  371         @param joinClause   (list of strings) Tables in which metadata fields 
  373         @param whereFields  (list of tuples) First tuple element is metadata 
  374                             field to query; second is the value that field 
  375                             must have (often '?'). 
  376         @param range        (tuple) Value, lower limit, and upper limit for a 
  377                             range condition on the metadata.  Any of these can 
  379         @param values       (tuple) Tuple of values to be substituted for '?' 
  380                             characters in the whereFields values or the range 
  382         @return (list of tuples) All sets of field values that meet the 
  386         cmd = 
"SELECT DISTINCT " 
  387         cmd += 
", ".join(returnFields)
 
  388         cmd += 
" FROM " + 
" NATURAL JOIN ".join(joinClause)
 
  391             for k, v 
in whereFields:
 
  392                 whereList.append(
"(%s = %s)" % (k, v))
 
  393         if range 
is not None:
 
  394             whereList.append(
"(%s BETWEEN %s AND %s)" % range)
 
  395         if len(whereList) > 0:
 
  396             cmd += 
" WHERE " + 
" AND ".join(whereList)
 
  397         cursor = self.
conn.cursor()
 
  398         cursor.execute(cmd, values)
 
  399         return [row 
for row 
in cursor.fetchall()]
 
  403     """A SQLite-based registry""" 
  414         if os.path.exists(location):
 
  415             conn = sqlite3.connect(location)
 
  416             conn.text_factory = str
 
  420         SqlRegistry.__init__(self, conn)
 
  424     """A PostgreSQL-based registry""" 
  433             Path to PostgreSQL configuration file. 
  436             raise RuntimeError(
"Cannot use PgsqlRegistry: could not import psycopg2")
 
  439         conn = pgsql.connect(host=config[
"host"], port=config[
"port"], database=config[
"database"],
 
  440                              user=config[
"user"], password=config[
"password"])
 
  442         SqlRegistry.__init__(self, conn)
 
  446         """Read YAML configuration file 
  448         The YAML configuration file should contain: 
  449         * host : host name for database connection 
  450         * port : port for database connection 
  451         * user : user name for database connection 
  452         * database : database name 
  455         * password : password for database connection 
  457         The optional entries are set to `None` in the output configuration. 
  462             Path to PostgreSQL YAML config file. 
  471             loader = yaml.FullLoader
 
  472         except AttributeError:
 
  474         with open(location) 
as ff:
 
  475             data = yaml.load(ff, Loader=loader)
 
  476         requireKeys = 
set([
"host", 
"port", 
"database", 
"user"])
 
  477         optionalKeys = 
set([
"password"])
 
  478         haveKeys = 
set(data.keys())
 
  479         if haveKeys - optionalKeys != requireKeys:
 
  481                 "PostgreSQL YAML configuration (%s) should contain only %s, and may contain 'password', " 
  482                 "but this contains: %s" %
 
  483                 (location, 
",".join(
"'%s'" % key 
for key 
in requireKeys),
 
  484                  ",".join(
"'%s'" % key 
for key 
in data.keys()))
 
  486         for key 
in optionalKeys:
 
  494             return SqlRegistry.lookup(self, *args, **kwargs)