23 """This module provides registry classes for maintaining dataset metadata 24 for use by the Data Butler. Currently only a SQLite3-based registry is 25 implemented, but registries based on a text file, a policy file, a MySQL 26 (or other) relational database, and data gathered from scanning a filesystem 29 Currently this module assumes posix access (for both PosixRegistry AND 30 SqliteRegistry). It is possible that it can be factored so that at least the 31 SqliteRegistry can be remote/not on the local filesystem. For now this module 32 is only used by CameraMapper and by PosixStorage, both of which work on the 33 local filesystem only, so this works for the time being. 36 from .
import fsScanner, sequencify
38 import astropy.io.fits
48 import sqlite
as sqlite3
55 import psycopg2
as pgsql
62 """The registry base class.""" 72 """Create a registry object of an appropriate type. 73 @param location (string) Path or URL for registry, or None if 84 if location.endswith(
".pgsql"):
88 if re.match(
r'.*\.sqlite3', location):
90 raise RuntimeError(
"sqlite3 registry specified (%s), but unable to import sqlite3 module" %
93 if registry.conn
is None:
102 if os.path.isdir(location):
105 raise RuntimeError(
"Unable to create registry using location: " + location)
109 """A glob-based filesystem registry""" 112 Registry.__init__(self)
117 """Looks up the HDU number for a given template+dataId. 118 :param template: template with HDU specifier (ends with brackets and an 119 identifier that can be populated by a key-value pair in dataId. 120 e.g. "%(visit)07d/instcal%(visit)07d.fits.fz[%(ccdnum)d]" 121 :param dataId: dictionary that hopefully has a key-value pair whose key 122 matches (has the same name) as the key specifier in the template. 123 :return: the HDU specified by the template+dataId pair, or None if the 124 HDU can not be determined. 127 if not template.endswith(
']'):
131 hduKey = template[template.rfind(
'[') + 1:template.rfind(
']')]
133 hduKey = hduKey[hduKey.rfind(
'(') + 1:hduKey.rfind(
')')]
136 return dataId[hduKey]
143 lookupProperties =
sequencify(lookupProperties)
150 return "LookupData lookupProperties:%s dataId:%s foundItems:%s cachedStatus:%s" % \
154 """Query the lookup status 156 :return: 'match' if the key+value pairs in dataId have been satisifed and keys in 157 lookupProperties have found and their key+value added to resolvedId 158 'incomplete' if the found data matches but not all keys in lookupProperties have been matched 159 'not match' if data in foundId does not match data in dataId 162 """Placeholder class for item not found. 164 (None might be a valid value so we don't want to use that) 168 if self.cachedStatus
is not None:
169 return self.cachedStatus
170 self.cachedStatus =
'match' 171 for key
in self.lookupProperties:
172 val = self.foundItems.get(key, NotFound)
174 self.cachedStatus =
'incomplete' 176 for dataIdKey, dataIdValue
in self.dataId.
items():
177 foundValue = self.foundItems.get(dataIdKey, NotFound)
178 if foundValue
is not NotFound
and foundValue != dataIdValue:
179 self.cachedStatus =
'notMatch' 181 return self.cachedStatus
194 def lookup(self, lookupProperties, reference, dataId, **kwargs):
195 """Perform a lookup in the registry. 197 Return values are refined by the values in dataId. 198 Returns a list of values that match keys in lookupProperties. 199 e.g. if the template is 'raw/raw_v%(visit)d_f%(filter)s.fits.gz', and 200 dataId={'visit':1}, and lookupProperties is ['filter'], and the 201 filesystem under self.root has exactly one file 'raw/raw_v1_fg.fits.gz' 202 then the return value will be [('g',)] 204 :param lookupProperties: keys whose values will be returned. 205 :param reference: other data types that may be used to search for values. 206 :param dataId: must be an iterable. Keys must be string. 207 If value is a string then will look for elements in the repository that match value for key. 208 If value is a 2-item iterable then will look for elements in the repository are between (inclusive) 209 the first and second items in the value. 210 :param **kwargs: keys required for the posix registry to search for items. If required keys are not 211 provide will return an empty list. 212 'template': required. template parameter (typically from a policy) that can be used to look for files 213 'storage': optional. Needed to look for metadata in files. Currently supported values: 'FitsStorage'. 214 :return: a list of values that match keys in lookupProperties. 217 if 'template' in kwargs:
218 template = kwargs[
'template']
222 storage = kwargs[
'storage']
if 'storage' in kwargs
else None 226 allPaths = scanner.processPath(self.
root)
228 for path, foundProperties
in allPaths.items():
233 lookupData.setFoundItems(foundProperties)
234 if 'incomplete' == lookupData.status():
235 PosixRegistry.lookupMetadata(os.path.join(self.
root, path), template, lookupData, storage)
236 if 'match' == lookupData.status():
237 ll = tuple(lookupData.foundItems[key]
for key
in lookupData.lookupProperties)
243 """Dispatcher for looking up metadata in a file of a given storage type 245 if storage ==
'FitsStorage':
246 PosixRegistry.lookupFitsMetadata(filepath, template, lookupData, storage)
250 """Look up metadata in a fits file. 251 Will try to discover the correct HDU to look in by testing if the 252 template has a value in brackets at the end. 253 If the HDU is specified but the metadata key is not discovered in 254 that HDU, will look in the primary HDU before giving up. 255 :param filepath: path to the file 256 :param template: template that was used to discover the file. This can 257 be used to look up the correct HDU as needed. 258 :param lookupData: an instance if LookupData that contains the 259 lookupProperties, the dataId, and the data that has been found so far. 260 Will be updated with new information as discovered. 265 hdulist = astropy.io.fits.open(filepath, memmap=
True)
268 hduNumber = PosixRegistry.getHduNumber(template=template, dataId=dataId)
269 if hduNumber
is not None and hduNumber < len(hdulist):
270 hdu = hdulist[hduNumber]
274 primaryHdu = hdulist[0]
278 for property
in lookupData.getMissingKeys():
280 if hdu
is not None and property
in hdu.header:
281 propertyValue = hdu.header[property]
283 elif primaryHdu
is not None and property
in primaryHdu.header:
284 propertyValue = primaryHdu.header[property]
285 lookupData.addFoundItems({property: propertyValue})
289 """A base class for SQL-based registries 291 Subclasses should define the class variable `placeHolder` (the particular 292 placeholder to use for parameter substitution) appropriately. The 293 database's python module should define `paramstyle` (see PEP 249), which 294 would indicate what to use for a placeholder: 295 * paramstyle = "qmark" --> placeHolder = "?" 296 * paramstyle = "format" --> placeHolder = "%s" 297 Other `paramstyle` values are not currently supported. 299 Constructor parameters 300 ---------------------- 301 conn : DBAPI connection object 311 conn : DBAPI connection object 314 Registry.__init__(self)
318 if hasattr(self,
"conn")
and self.
conn:
322 def lookup(self, lookupProperties, reference, dataId, **kwargs):
323 """Perform a lookup in the registry. 325 Return values are refined by the values in dataId. 326 Returns a list of values that match keys in lookupProperties. 327 e.g. if the template is 'raw/raw_v%(visit)d_f%(filter)s.fits.gz', and 328 dataId={'visit':1}, and lookupProperties is ['filter'], and the 329 filesystem under self.root has exactly one file 'raw/raw_v1_fg.fits.gz' 330 then the return value will be [('g',)] 332 :param lookupProperties: 333 :param dataId: must be an iterable. Keys must be string. 334 If key is a string then will look for elements in the repository that match value for key. 335 If key is a 2-item iterable then will look for elements in the repository where the value is between 336 the values of key[0] and key[1]. 337 :param reference: other data types that may be used to search for values. 338 :param **kwargs: nothing needed for sqlite lookup 339 :return: a list of values that match keys in lookupProperties. 346 lookupProperties =
sequencify(lookupProperties)
348 cmd =
"SELECT DISTINCT " 349 cmd +=
", ".join(lookupProperties)
350 cmd +=
" FROM " +
" NATURAL JOIN ".join(reference)
352 if dataId
is not None and len(dataId) > 0:
354 for k, v
in dataId.items():
355 if hasattr(k,
'__iter__')
and not isinstance(k, str):
357 raise RuntimeError(
"Wrong number of keys for range:%s" % (k,))
358 whereList.append(
"(%s BETWEEN %s AND %s)" % (self.
placeHolder, k[0], k[1]))
361 whereList.append(
"%s = %s" % (k, self.
placeHolder))
363 cmd +=
" WHERE " +
" AND ".join(whereList)
364 cursor = self.
conn.cursor()
365 cursor.execute(cmd, valueList)
366 return [row
for row
in cursor.fetchall()]
368 def executeQuery(self, returnFields, joinClause, whereFields, range, values):
369 """Extract metadata from the registry. 370 @param returnFields (list of strings) Metadata fields to be extracted. 371 @param joinClause (list of strings) Tables in which metadata fields 373 @param whereFields (list of tuples) First tuple element is metadata 374 field to query; second is the value that field 375 must have (often '?'). 376 @param range (tuple) Value, lower limit, and upper limit for a 377 range condition on the metadata. Any of these can 379 @param values (tuple) Tuple of values to be substituted for '?' 380 characters in the whereFields values or the range 382 @return (list of tuples) All sets of field values that meet the 386 cmd =
"SELECT DISTINCT " 387 cmd +=
", ".join(returnFields)
388 cmd +=
" FROM " +
" NATURAL JOIN ".join(joinClause)
391 for k, v
in whereFields:
392 whereList.append(
"(%s = %s)" % (k, v))
393 if range
is not None:
394 whereList.append(
"(%s BETWEEN %s AND %s)" % range)
395 if len(whereList) > 0:
396 cmd +=
" WHERE " +
" AND ".join(whereList)
397 cursor = self.
conn.cursor()
398 cursor.execute(cmd, values)
399 return [row
for row
in cursor.fetchall()]
403 """A SQLite-based registry""" 414 if os.path.exists(location):
415 conn = sqlite3.connect(location)
416 conn.text_factory = str
420 SqlRegistry.__init__(self, conn)
424 """A PostgreSQL-based registry""" 433 Path to PostgreSQL configuration file. 436 raise RuntimeError(
"Cannot use PgsqlRegistry: could not import psycopg2")
439 conn = pgsql.connect(host=config[
"host"], port=config[
"port"], database=config[
"database"],
440 user=config[
"user"], password=config[
"password"])
442 SqlRegistry.__init__(self, conn)
446 """Read YAML configuration file 448 The YAML configuration file should contain: 449 * host : host name for database connection 450 * port : port for database connection 451 * user : user name for database connection 452 * database : database name 455 * password : password for database connection 457 The optional entries are set to `None` in the output configuration. 462 Path to PostgreSQL YAML config file. 471 loader = yaml.FullLoader
472 except AttributeError:
474 with open(location)
as ff:
475 data = yaml.load(ff, Loader=loader)
476 requireKeys =
set([
"host",
"port",
"database",
"user"])
477 optionalKeys =
set([
"password"])
478 haveKeys =
set(data.keys())
479 if haveKeys - optionalKeys != requireKeys:
481 "PostgreSQL YAML configuration (%s) should contain only %s, and may contain 'password', " 482 "but this contains: %s" %
483 (location,
",".join(
"'%s'" % key
for key
in requireKeys),
484 ",".join(
"'%s'" % key
for key
in data.keys()))
486 for key
in optionalKeys:
494 return SqlRegistry.lookup(self, *args, **kwargs)
def __init__(self, location)
def lookup(self, lookupProperties, reference, dataId, kwargs)
def lookupFitsMetadata(filepath, template, lookupData, dataId)
std::vector< SchemaItem< Flag > > * items
def lookup(self, args, kwargs)
def getHduNumber(template, dataId)
def __init__(self, lookupProperties, dataId)
daf::base::PropertySet * set
def addFoundItems(self, items)
def lookup(self, lookupProperties, reference, dataId, kwargs)
def executeQuery(self, returnFields, joinClause, whereFields, range, values)
def lookupMetadata(filepath, template, lookupData, storage)
def setFoundItems(self, items)
def __init__(self, location)