23 """This module provides registry classes for maintaining dataset metadata 24 for use by the Data Butler. Currently only a SQLite3-based registry is 25 implemented, but registries based on a text file, a policy file, a MySQL 26 (or other) relational database, and data gathered from scanning a filesystem 29 Currently this module assumes posix access (for both PosixRegistry AND 30 SqliteRegistry). It is possible that it can be factored so that at least the 31 SqliteRegistry can be remote/not on the local filesystem. For now this module 32 is only used by CameraMapper and by PosixStorage, both of which work on the 33 local filesystem only, so this works for the time being. 35 from __future__
import absolute_import
36 from past.builtins
import basestring
37 from builtins
import super
40 from .
import fsScanner, sequencify
42 import astropy.io.fits
52 import sqlite
as sqlite3
59 import psycopg2
as pgsql
66 """The registry base class.""" 76 """Create a registry object of an appropriate type. 77 @param location (string) Path or URL for registry, or None if 88 if location.endswith(
".pgsql"):
92 if re.match(
r'.*\.sqlite3', location):
94 raise RuntimeError(
"sqlite3 registry specified (%s), but unable to import sqlite3 module" %
97 if registry.conn
is None:
106 if os.path.isdir(location):
109 raise RuntimeError(
"Unable to create registry using location: " + location)
113 """A glob-based filesystem registry""" 116 Registry.__init__(self)
121 """Looks up the HDU number for a given template+dataId. 122 :param template: template with HDU specifier (ends with brackets and an 123 identifier that can be populated by a key-value pair in dataId. 124 e.g. "%(visit)07d/instcal%(visit)07d.fits.fz[%(ccdnum)d]" 125 :param dataId: dictionary that hopefully has a key-value pair whose key 126 matches (has the same name) as the key specifier in the template. 127 :return: the HDU specified by the template+dataId pair, or None if the 128 HDU can not be determined. 131 if not template.endswith(
']'):
135 hduKey = template[template.rfind(
'[') + 1:template.rfind(
']')]
137 hduKey = hduKey[hduKey.rfind(
'(') + 1:hduKey.rfind(
')')]
140 return dataId[hduKey]
147 lookupProperties =
sequencify(lookupProperties)
154 return "LookupData lookupProperties:%s dataId:%s foundItems:%s cachedStatus:%s" % \
158 """Query the lookup status 160 :return: 'match' if the key+value pairs in dataId have been satisifed and keys in 161 lookupProperties have found and their key+value added to resolvedId 162 'incomplete' if the found data matches but not all keys in lookupProperties have been matched 163 'not match' if data in foundId does not match data in dataId 166 """Placeholder class for item not found. 168 (None might be a valid value so we don't want to use that) 172 if self.cachedStatus
is not None:
173 return self.cachedStatus
174 self.cachedStatus =
'match' 175 for key
in self.lookupProperties:
176 val = self.foundItems.get(key, NotFound)
178 self.cachedStatus =
'incomplete' 180 for dataIdKey, dataIdValue
in self.dataId.
items():
181 foundValue = self.foundItems.get(dataIdKey, NotFound)
182 if foundValue
is not NotFound
and foundValue != dataIdValue:
183 self.cachedStatus =
'notMatch' 185 return self.cachedStatus
198 def lookup(self, lookupProperties, reference, dataId, **kwargs):
199 """Perform a lookup in the registry. 201 Return values are refined by the values in dataId. 202 Returns a list of values that match keys in lookupProperties. 203 e.g. if the template is 'raw/raw_v%(visit)d_f%(filter)s.fits.gz', and 204 dataId={'visit':1}, and lookupProperties is ['filter'], and the 205 filesystem under self.root has exactly one file 'raw/raw_v1_fg.fits.gz' 206 then the return value will be [('g',)] 208 :param lookupProperties: keys whose values will be returned. 209 :param reference: other data types that may be used to search for values. 210 :param dataId: must be an iterable. Keys must be string. 211 If value is a string then will look for elements in the repository that match value for key. 212 If value is a 2-item iterable then will look for elements in the repository are between (inclusive) 213 the first and second items in the value. 214 :param **kwargs: keys required for the posix registry to search for items. If required keys are not 215 provide will return an empty list. 216 'template': required. template parameter (typically from a policy) that can be used to look for files 217 'storage': optional. Needed to look for metadata in files. Currently supported values: 'FitsStorage'. 218 :return: a list of values that match keys in lookupProperties. 221 if 'template' in kwargs:
222 template = kwargs[
'template']
226 storage = kwargs[
'storage']
if 'storage' in kwargs
else None 230 allPaths = scanner.processPath(self.
root)
232 for path, foundProperties
in allPaths.items():
237 lookupData.setFoundItems(foundProperties)
238 if 'incomplete' == lookupData.status():
239 PosixRegistry.lookupMetadata(os.path.join(self.
root, path), template, lookupData, storage)
240 if 'match' == lookupData.status():
241 ll = tuple(lookupData.foundItems[key]
for key
in lookupData.lookupProperties)
247 """Dispatcher for looking up metadata in a file of a given storage type 249 if storage ==
'FitsStorage':
250 PosixRegistry.lookupFitsMetadata(filepath, template, lookupData, storage)
254 """Look up metadata in a fits file. 255 Will try to discover the correct HDU to look in by testing if the 256 template has a value in brackets at the end. 257 If the HDU is specified but the metadata key is not discovered in 258 that HDU, will look in the primary HDU before giving up. 259 :param filepath: path to the file 260 :param template: template that was used to discover the file. This can 261 be used to look up the correct HDU as needed. 262 :param lookupData: an instance if LookupData that contains the 263 lookupProperties, the dataId, and the data that has been found so far. 264 Will be updated with new information as discovered. 269 hdulist = astropy.io.fits.open(filepath, memmap=
True)
272 hduNumber = PosixRegistry.getHduNumber(template=template, dataId=dataId)
273 if hduNumber
is not None and hduNumber < len(hdulist):
274 hdu = hdulist[hduNumber]
278 primaryHdu = hdulist[0]
282 for property
in lookupData.getMissingKeys():
284 if hdu
is not None and property
in hdu.header:
285 propertyValue = hdu.header[property]
287 elif primaryHdu
is not None and property
in primaryHdu.header:
288 propertyValue = primaryHdu.header[property]
289 lookupData.addFoundItems({property: propertyValue})
293 """A base class for SQL-based registries 295 Subclasses should define the class variable `placeHolder` (the particular 296 placeholder to use for parameter substitution) appropriately. The 297 database's python module should define `paramstyle` (see PEP 249), which 298 would indicate what to use for a placeholder: 299 * paramstyle = "qmark" --> placeHolder = "?" 300 * paramstyle = "format" --> placeHolder = "%s" 301 Other `paramstyle` values are not currently supported. 303 Constructor parameters 304 ---------------------- 305 conn : DBAPI connection object 315 conn : DBAPI connection object 318 Registry.__init__(self)
322 if hasattr(self,
"conn")
and self.
conn:
326 def lookup(self, lookupProperties, reference, dataId, **kwargs):
327 """Perform a lookup in the registry. 329 Return values are refined by the values in dataId. 330 Returns a list of values that match keys in lookupProperties. 331 e.g. if the template is 'raw/raw_v%(visit)d_f%(filter)s.fits.gz', and 332 dataId={'visit':1}, and lookupProperties is ['filter'], and the 333 filesystem under self.root has exactly one file 'raw/raw_v1_fg.fits.gz' 334 then the return value will be [('g',)] 336 :param lookupProperties: 337 :param dataId: must be an iterable. Keys must be string. 338 If key is a string then will look for elements in the repository that match value for key. 339 If key is a 2-item iterable then will look for elements in the repository where the value is between 340 the values of key[0] and key[1]. 341 :param reference: other data types that may be used to search for values. 342 :param **kwargs: nothing needed for sqlite lookup 343 :return: a list of values that match keys in lookupProperties. 350 lookupProperties =
sequencify(lookupProperties)
352 cmd =
"SELECT DISTINCT " 353 cmd +=
", ".join(lookupProperties)
354 cmd +=
" FROM " +
" NATURAL JOIN ".join(reference)
356 if dataId
is not None and len(dataId) > 0:
358 for k, v
in dataId.items():
359 if hasattr(k,
'__iter__')
and not isinstance(k, basestring):
361 raise RuntimeError(
"Wrong number of keys for range:%s" % (k,))
362 whereList.append(
"(%s BETWEEN %s AND %s)" % (self.
placeHolder, k[0], k[1]))
365 whereList.append(
"%s = %s" % (k, self.
placeHolder))
367 cmd +=
" WHERE " +
" AND ".join(whereList)
368 cursor = self.
conn.cursor()
369 cursor.execute(cmd, valueList)
370 return [row
for row
in cursor.fetchall()]
372 def executeQuery(self, returnFields, joinClause, whereFields, range, values):
373 """Extract metadata from the registry. 374 @param returnFields (list of strings) Metadata fields to be extracted. 375 @param joinClause (list of strings) Tables in which metadata fields 377 @param whereFields (list of tuples) First tuple element is metadata 378 field to query; second is the value that field 379 must have (often '?'). 380 @param range (tuple) Value, lower limit, and upper limit for a 381 range condition on the metadata. Any of these can 383 @param values (tuple) Tuple of values to be substituted for '?' 384 characters in the whereFields values or the range 386 @return (list of tuples) All sets of field values that meet the 390 cmd =
"SELECT DISTINCT " 391 cmd +=
", ".join(returnFields)
392 cmd +=
" FROM " +
" NATURAL JOIN ".join(joinClause)
395 for k, v
in whereFields:
396 whereList.append(
"(%s = %s)" % (k, v))
397 if range
is not None:
398 whereList.append(
"(%s BETWEEN %s AND %s)" % range)
399 if len(whereList) > 0:
400 cmd +=
" WHERE " +
" AND ".join(whereList)
401 cursor = self.
conn.cursor()
402 cursor.execute(cmd, values)
403 return [row
for row
in cursor.fetchall()]
407 """A SQLite-based registry""" 418 if os.path.exists(location):
419 conn = sqlite3.connect(location)
420 conn.text_factory = str
424 SqlRegistry.__init__(self, conn)
428 """A PostgreSQL-based registry""" 437 Path to PostgreSQL configuration file. 440 raise RuntimeError(
"Cannot use PgsqlRegistry: could not import psycopg2")
443 conn = pgsql.connect(host=config[
"host"], port=config[
"port"], database=config[
"database"],
444 user=config[
"user"], password=config[
"password"])
446 SqlRegistry.__init__(self, conn)
450 """Read YAML configuration file 452 The YAML configuration file should contain: 453 * host : host name for database connection 454 * port : port for database connection 455 * user : user name for database connection 456 * database : database name 459 * password : password for database connection 461 The optional entries are set to `None` in the output configuration. 466 Path to PostgreSQL YAML config file. 473 with open(location)
as ff:
475 requireKeys =
set([
"host",
"port",
"database",
"user"])
476 optionalKeys =
set([
"password"])
477 haveKeys =
set(data.keys())
478 if haveKeys - optionalKeys != requireKeys:
480 "PostgreSQL YAML configuration (%s) should contain only %s, and may contain 'password', " 481 "but this contains: %s" %
482 (location,
",".join(
"'%s'" % key
for key
in requireKeys),
483 ",".join(
"'%s'" % key
for key
in data.keys()))
485 for key
in optionalKeys:
493 return SqlRegistry.lookup(self, *args, **kwargs)
def __init__(self, location)
def lookup(self, lookupProperties, reference, dataId, kwargs)
def lookupFitsMetadata(filepath, template, lookupData, dataId)
def lookup(self, args, kwargs)
def getHduNumber(template, dataId)
def __init__(self, lookupProperties, dataId)
daf::base::PropertySet * set
def addFoundItems(self, items)
def lookup(self, lookupProperties, reference, dataId, kwargs)
def executeQuery(self, returnFields, joinClause, whereFields, range, values)
def lookupMetadata(filepath, template, lookupData, storage)
std::vector< SchemaItem< Flag > > * items
def setFoundItems(self, items)
def __init__(self, location)