23 """This module provides registry classes for maintaining dataset metadata
24 for use by the Data Butler. Currently only a SQLite3-based registry is
25 implemented, but registries based on a text file, a policy file, a MySQL
26 (or other) relational database, and data gathered from scanning a filesystem
29 Currently this module assumes posix access (for both PosixRegistry AND
30 SqliteRegistry). It is possible that it can be factored so that at least the
31 SqliteRegistry can be remote/not on the local filesystem. For now this module
32 is only used by CameraMapper and by PosixStorage, both of which work on the
33 local filesystem only, so this works for the time being.
35 from __future__
import absolute_import
36 from builtins
import object
37 from past.builtins
import basestring
40 from .
import fsScanner, sequencify
42 import astropy.io.fits
50 import sqlite
as sqlite3
57 """The registry base class."""
64 """Create a registry object of an appropriate type.
65 @param location (string) Path or URL for registry, or None if
77 if haveSqlite3
and re.match(
r'.*\.sqlite3', location):
79 if registry.conn
is None:
88 if os.path.exists(location):
91 raise RuntimeError(
"Unable to create registry using location: " + location)
95 """A glob-based filesystem registry"""
98 Registry.__init__(self)
103 """Looks up the HDU number for a given template+dataId.
104 :param template: template with HDU specifier (ends with brackets and an
105 identifier that can be populated by a key-value pair in dataId.
106 e.g. "%(visit)07d/instcal%(visit)07d.fits.fz[%(ccdnum)d]"
107 :param dataId: dictionary that hopefully has a key-value pair whose key
108 matches (has the same name) as the key specifier in the template.
109 :return: the HDU specified by the template+dataId pair, or None if the
110 HDU can not be determined.
113 if not template.endswith(
']'):
117 hduKey = template[template.rfind(
'[') + 1:template.rfind(
']')]
119 hduKey = hduKey[hduKey.rfind(
'(') + 1:hduKey.rfind(
')')]
122 return dataId[hduKey]
129 lookupProperties =
sequencify(lookupProperties)
136 return "LookupData lookupProperties:%s dataId:%s foundItems:%s cachedStatus:%s" % \
140 """Query the lookup status
142 :return: 'match' if the key+value pairs in dataId have been satisifed and keys in
143 lookupProperties have found and their key+value added to resolvedId
144 'incomplete' if the found data matches but not all keys in lookupProperties have been matched
145 'not match' if data in foundId does not match data in dataId
148 """Placeholder class for item not found.
150 (None might be a valid value so we don't want to use that)
154 if self.cachedStatus
is not None:
155 return self.cachedStatus
156 self.cachedStatus =
'match'
157 for key
in self.lookupProperties:
158 val = self.foundItems.get(key, NotFound)
160 self.cachedStatus =
'incomplete'
162 for dataIdKey, dataIdValue
in self.dataId.items():
163 foundValue = self.foundItems.get(dataIdKey, NotFound)
164 if foundValue
is not NotFound
and foundValue != dataIdValue:
165 self.cachedStatus =
'notMatch'
167 return self.cachedStatus
175 self.foundItems.update(items)
178 return self.
neededKeys - set(self.foundItems.keys())
180 def lookup(self, lookupProperties, reference, dataId, **kwargs):
181 """Perform a lookup in the registry.
183 Return values are refined by the values in dataId.
184 Returns a list of values that match keys in lookupProperties.
185 e.g. if the template is 'raw/raw_v%(visit)d_f%(filter)s.fits.gz', and
186 dataId={'visit':1}, and lookupProperties is ['filter'], and the
187 filesystem under self.root has exactly one file 'raw/raw_v1_fg.fits.gz'
188 then the return value will be [('g',)]
190 :param lookupProperties: keys whose values will be returned.
191 :param reference: other data types that may be used to search for values.
192 :param dataId: must be an iterable. Keys must be string.
193 If value is a string then will look for elements in the repository that match value for key.
194 If value is a 2-item iterable then will look for elements in the repository are between (inclusive)
195 the first and second items in the value.
196 :param **kwargs: keys required for the posix registry to search for items. If required keys are not
197 provide will return an empty list.
198 'template': required. template parameter (typically from a policy) that can be used to look for files
199 'storage': optional. Needed to look for metadata in files. Currently supported values: 'FitsStorage'.
200 :return: a list of values that match keys in lookupProperties.
203 if 'template' in kwargs:
204 template = kwargs[
'template']
208 storage = kwargs[
'storage']
if 'storage' in kwargs
else None
212 allPaths = scanner.processPath(self.
root)
214 for path, foundProperties
in allPaths.items():
219 lookupData.setFoundItems(foundProperties)
220 if 'incomplete' == lookupData.status():
221 PosixRegistry.lookupMetadata(os.path.join(self.
root, path), template, lookupData, storage)
222 if 'match' == lookupData.status():
223 l = tuple(lookupData.foundItems[key]
for key
in lookupData.lookupProperties)
229 """Dispatcher for looking up metadata in a file of a given storage type
231 if storage ==
'FitsStorage':
232 PosixRegistry.lookupFitsMetadata(filepath, template, lookupData, storage)
236 """Look up metadata in a fits file.
237 Will try to discover the correct HDU to look in by testing if the
238 template has a value in brackets at the end.
239 If the HDU is specified but the metadata key is not discovered in
240 that HDU, will look in the primary HDU before giving up.
241 :param filepath: path to the file
242 :param template: template that was used to discover the file. This can
243 be used to look up the correct HDU as needed.
244 :param lookupData: an instance if LookupData that contains the
245 lookupProperties, the dataId, and the data that has been found so far.
246 Will be updated with new information as discovered.
251 hdulist = astropy.io.fits.open(filepath, memmap=
True)
254 hduNumber = PosixRegistry.getHduNumber(template=template, dataId=dataId)
255 if hduNumber
is not None and hduNumber < len(hdulist):
256 hdu = hdulist[hduNumber]
260 primaryHdu = hdulist[0]
264 for property
in lookupData.getMissingKeys():
266 if hdu
is not None and property
in hdu.header:
267 propertyValue = hdu.header[property]
269 elif primaryHdu
is not None and property
in primaryHdu.header:
270 propertyValue = primaryHdu.header[property]
271 lookupData.addFoundItems({property: propertyValue})
275 """A SQLite3-based registry."""
279 @param location (string) Path to SQLite3 file"""
281 Registry.__init__(self)
282 if os.path.exists(location):
283 self.
conn = sqlite3.connect(location)
284 self.conn.text_factory = str
288 def lookup(self, lookupProperties, reference, dataId, **kwargs):
289 """Perform a lookup in the registry.
291 Return values are refined by the values in dataId.
292 Returns a list of values that match keys in lookupProperties.
293 e.g. if the template is 'raw/raw_v%(visit)d_f%(filter)s.fits.gz', and
294 dataId={'visit':1}, and lookupProperties is ['filter'], and the
295 filesystem under self.root has exactly one file 'raw/raw_v1_fg.fits.gz'
296 then the return value will be [('g',)]
298 :param lookupProperties:
299 :param dataId: must be an iterable. Keys must be string.
300 If key is a string then will look for elements in the repository that match value for key.
301 If key is a 2-item iterable then will look for elements in the repository where the value is between
302 the values of key[0] and key[1].
303 :param reference: other data types that may be used to search for values.
304 :param **kwargs: nothing needed for sqlite lookup
305 :return: a list of values that match keys in lookupProperties.
312 lookupProperties =
sequencify(lookupProperties)
314 cmd =
"SELECT DISTINCT "
315 cmd +=
", ".join(lookupProperties)
316 cmd +=
" FROM " +
" NATURAL JOIN ".join(reference)
318 if dataId
is not None and len(dataId) > 0:
320 for k, v
in dataId.items():
321 if hasattr(k,
'__iter__')
and not isinstance(k, basestring):
323 raise RuntimeError(
"Wrong number of keys for range:%s" % (k,))
324 whereList.append(
"(? BETWEEN %s AND %s)" % (k[0], k[1]))
327 whereList.append(
"%s = ?" % k)
329 cmd +=
" WHERE " +
" AND ".join(whereList)
330 c = self.conn.execute(cmd, valueList)
336 def executeQuery(self, returnFields, joinClause, whereFields, range, values):
337 """Extract metadata from the registry.
338 @param returnFields (list of strings) Metadata fields to be extracted.
339 @param joinClause (list of strings) Tables in which metadata fields
341 @param whereFields (list of tuples) First tuple element is metadata
342 field to query; second is the value that field
343 must have (often '?').
344 @param range (tuple) Value, lower limit, and upper limit for a
345 range condition on the metadata. Any of these can
347 @param values (tuple) Tuple of values to be substituted for '?'
348 characters in the whereFields values or the range
350 @return (list of tuples) All sets of field values that meet the
354 cmd =
"SELECT DISTINCT "
355 cmd +=
", ".join(returnFields)
356 cmd +=
" FROM " +
" NATURAL JOIN ".join(joinClause)
359 for k, v
in whereFields:
360 whereList.append(
"(%s = %s)" % (k, v))
361 if range
is not None:
362 whereList.append(
"(%s BETWEEN %s AND %s)" % range)
363 if len(whereList) > 0:
364 cmd +=
" WHERE " +
" AND ".join(whereList)
365 c = self.conn.execute(cmd, values)