27 """This module defines the Butler class."""
28 from future
import standard_library
29 standard_library.install_aliases()
30 from builtins
import str
31 from past.builtins
import basestring
32 from builtins
import object
45 from .
import LogicalLocation, ReadProxy, ButlerSubset, ButlerDataRef, Persistence, \
46 Storage, Policy, NoResults, Repository, DataId, RepositoryCfg, \
47 RepositoryArgs, listify, setify, sequencify, doImport, ButlerComposite, genericAssembler, \
48 genericDisassembler, PosixStorage
52 """Represents a Butler configuration.
56 cfg is 'wet paint' and very likely to change. Use of it in production
57 code other than via the 'old butler' API is strongly discouraged.
59 yaml_tag =
u"!ButlerCfg"
62 super(ButlerCfg, self).
__init__({
'repoCfg': repoCfg,
'cls': cls})
66 """Container object for repository data used by Butler
71 Arguments used to initialize self.repo
73 Configuration of repository
74 storedCfg - RepositoryCfg or None
75 If the cfg at root and the RepositoryArgs don't match then a new cfg is kept in cfg and the cfg that
76 was read from root is kept in storedCfg.
78 The repository class instance
80 The tags that apply to this repository, if any
83 def __init__(self, args, cfg, storedCfg=None, isNewRepository=False, isV1Repository=True):
99 return "RepoData(args=%s cfg=%s repo=%s tags=%s" % (self.
args, self.
cfg, self.
repo, self.
tags)
102 self.
tags = self.tags.union(tags)
105 """Container object for RepoData instances owned by a Butler instance."""
115 """Add a RepoData to the container
119 repoData - RepoData instance to add
122 self.
byCfgRoot[repoData.args.cfgRoot] = repoData
125 """Get a list of RepoData that are used to as inputs to the Butler.
126 The list is created lazily as needed, and cached.
130 A list of RepoData with readable repositories, in the order to be used when searching.
133 raise RuntimeError(
"Inputs not yet initialized.")
137 """Get a list of RepoData that are used to as outputs to the Butler.
138 The list is created lazily as needed, and cached.
142 A list of RepoData with writable repositories, in the order to be use when searching.
145 raise RuntimeError(
"Outputs not yet initialized.")
149 """Get a list of all RepoData that are used to as by the Butler.
150 The list is created lazily as needed, and cached.
154 A list of RepoData with writable repositories, in the order to be use when searching.
156 if self.
_all is None:
157 raise RuntimeError(
"The all list is not yet initialized.")
161 return "%s(\nbyRepoRoot=%r, \nbyCfgRoot=%r, \n_inputs=%r, \n_outputs=%s, \n_all=%s)" % (
162 self.__class__.__name__,
170 """Buld the lists of inputs, outputs, and all repo datas in lookup
175 inputs : list of RepositoryArgs
176 The input RepositoryArgs, in order.
177 outputs : list of RepositoryArgs
178 The output RepositoryArgs, in order.
184 def addRepoDataToLists(repoData, inout):
185 """"Adds the cfg represented by repoData to the _all dict/list, as
186 well as the _inputs or _outputs list, as indicated by inout. Then,
187 adds all the parents of the cfg to the lists."""
188 if repoData.cfg.root
in self.
_all:
190 self.
_all[repoData.cfg.root] = repoData
192 self._inputs.append(repoData)
194 self._outputs.append(repoData)
195 if 'r' in repoData.args.mode:
196 self._inputs.append(repoData)
198 raise RuntimeError(
"'inout' must be 'in' or 'out', not %s" % inout)
199 for parent
in repoData.cfg.parents:
200 if 'r' in repoData.args.mode:
201 addRepoDataToLists(self.byRepoRoot[parent], 'in')
203 self.
_all = collections.OrderedDict()
207 for repoArgs
in outputs:
208 repoData = self.
byCfgRoot[repoArgs.cfgRoot]
209 addRepoDataToLists(repoData,
'out')
210 for repoArgs
in inputs:
211 repoData = self.
byCfgRoot[repoArgs.cfgRoot]
212 addRepoDataToLists(repoData,
'in')
216 """Butler provides a generic mechanism for persisting and retrieving data using mappers.
218 A Butler manages a collection of datasets known as a repository. Each dataset has a type representing its
219 intended usage and a location. Note that the dataset type is not the same as the C++ or Python type of the
220 object containing the data. For example, an ExposureF object might be used to hold the data for a raw
221 image, a post-ISR image, a calibrated science image, or a difference image. These would all be different
224 A Butler can produce a collection of possible values for a key (or tuples of values for multiple keys) if
225 given a partial data identifier. It can check for the existence of a file containing a dataset given its
226 type and data identifier. The Butler can then retrieve the dataset. Similarly, it can persist an object to
227 an appropriate location when given its associated data identifier.
229 Note that the Butler has two more advanced features when retrieving a data set. First, the retrieval is
230 lazy. Input does not occur until the data set is actually accessed. This allows datasets to be retrieved
231 and placed on a clipboard prospectively with little cost, even if the algorithm of a stage ends up not
232 using them. Second, the Butler will call a standardization hook upon retrieval of the dataset. This
233 function, contained in the input mapper object, must perform any necessary manipulations to force the
234 retrieved object to conform to standards, including translating metadata.
238 __init__(self, root, mapper=None, **mapperArgs)
240 defineAlias(self, alias, datasetType)
242 getKeys(self, datasetType=None, level=None)
244 queryMetadata(self, datasetType, format=None, dataId={}, **rest)
246 datasetExists(self, datasetType, dataId={}, **rest)
248 get(self, datasetType, dataId={}, immediate=False, **rest)
250 put(self, obj, datasetType, dataId={}, **rest)
252 subset(self, datasetType, level=None, dataId={}, **rest)
254 dataRef(self, datasetType, level=None, dataId={}, **rest)
258 The preferred method of initialization is to pass in a RepositoryArgs instance, or a list of
259 RepositoryArgs to inputs and/or outputs.
261 For backward compatibility: this initialization method signature can take a posix root path, and
262 optionally a mapper class instance or class type that will be instantiated using the mapperArgs input
263 argument. However, for this to work in a backward compatible way it creates a single repository that is
264 used as both an input and an output repository. This is NOT preferred, and will likely break any
265 provenance system we have in place.
270 .. note:: Deprecated in 12_0
271 `root` will be removed in TBD, it is replaced by `inputs` and `outputs` for
272 multiple-repository support.
273 A fileysystem path. Will only work with a PosixRepository.
274 mapper - string or instance
275 .. note:: Deprecated in 12_0
276 `mapper` will be removed in TBD, it is replaced by `inputs` and `outputs` for
277 multiple-repository support.
278 Provides a mapper to be used with Butler.
280 .. note:: Deprecated in 12_0
281 `mapperArgs` will be removed in TBD, it is replaced by `inputs` and `outputs` for
282 multiple-repository support.
283 Provides arguments to be passed to the mapper if the mapper input arg is a class type to be
284 instantiated by Butler.
285 inputs - RepositoryArgs or string
286 Can be a single item or a list. Provides arguments to load an existing repository (or repositories).
287 String is assumed to be a URI and is used as the cfgRoot (URI to the location of the cfg file). (Local
288 file system URI does not have to start with 'file://' and in this way can be a relative path).
289 outputs - RepositoryArg or string
290 Can be a single item or a list. Provides arguments to load one or more existing repositories or create
291 new ones. String is assumed to be a URI and as used as the repository root.
294 def __init__(self, root=None, mapper=None, inputs=None, outputs=None, **mapperArgs):
296 self.
log = Log.getLogger(
"daf.persistence.butler")
298 self.
_initArgs = {
'root': root,
'mapper': mapper,
'inputs': inputs,
'outputs': outputs,
299 'mapperArgs': mapperArgs}
302 inputs = copy.deepcopy(inputs)
303 outputs = copy.deepcopy(outputs)
305 isV1Args = inputs
is None and outputs
is None
307 inputs, outputs = self.
_convertV1Args(root=root, mapper=mapper, mapperArgs=mapperArgs)
308 elif root
or mapper
or mapperArgs:
310 'Butler version 1 API (root, mapper, **mapperArgs) may ' +
311 'not be used with version 2 API (inputs, outputs)')
318 inputs = [RepositoryArgs(cfgRoot=args)
if isinstance(args, basestring)
else args
for args
in inputs]
319 outputs = [RepositoryArgs(cfgRoot=args)
if isinstance(args, basestring)
else args
for args
in outputs]
322 if args.mode
is None:
324 elif 'r' not in args.mode:
325 raise RuntimeError(
"The mode of an input should be readable.")
327 if args.mode
is None:
329 elif 'w' not in args.mode:
330 raise RuntimeError(
"The mode of an output should be writable.")
338 self._repos._buildLookupList(inputs, outputs)
345 for repoData
in self._repos.all().values():
346 repoData.repo = Repository(repoData)
352 """Set the tags from each repoArgs into all its parent repoArgs so that they can be included in tagged
359 def setTags(butler, repoData, tags):
360 tags.update(repoData.args.tags)
361 repoData.addTags(tags)
362 for parent
in repoData.cfg.parents:
363 setTags(butler, butler._repos.byRepoRoot[parent], copy.copy(tags))
365 for repoData
in self._repos.all().values():
366 setTags(self, repoData, set())
370 """Make a RepoData object for args, adding it to the RepoDataContainer.
375 A RepositoryArgs that describes a new or existing Repository.
376 inout : 'in' or 'out'
377 Indicates if this Repository should be used by the Butler as an input or an output.
378 instanceParents : list of string
379 URI/path to the RepositoryCfg of parents in this instance of Butler; inputs and readable outputs
380 (but not their parents, grand-parents are looked up when the parents are loaded)
388 if inout
not in (
'in',
'out'):
389 raise RuntimeError(
"inout must be either 'in' or 'out'")
391 if args.cfgRoot
in self._repos.byCfgRoot:
394 cfg = Storage.getRepositoryCfg(args.cfgRoot)
397 if not cfg.matchesArgs(args):
398 if cfg.parents != instanceParents:
399 raise RuntimeError(
"Parents do not match.")
401 cfg = RepositoryCfg.makeFromArgs(args)
404 repoData =
RepoData(args=args, cfg=cfg, storedCfg=storedCfg)
405 self._repos.add(repoData)
406 for parentArgs
in cfg.parents:
407 self.
_createRepoData(RepositoryArgs(parentArgs, mode=
'r'), 'in', instanceParents)
411 if Storage.isPosix(args.cfgRoot):
412 v1RepoExists = PosixStorage.v1RepoExists(args.cfgRoot)
413 if not v1RepoExists
and inout ==
'in':
414 msg =
"Input repositories must exist; no repo found at " \
415 "%s. (A Butler V1 Repository 'exists' if the root " \
416 " folder exists AND contains items.)" % args.cfgRoot
417 raise RuntimeError(msg)
418 if inout ==
'out' and not v1RepoExists:
420 if args.cfgRoot
in p:
421 p.remove(args.cfgRoot)
426 args.mapper = PosixStorage.getMapperClass(args.cfgRoot)
427 cfg = RepositoryCfg.makeFromArgs(args, p)
428 repoData =
RepoData(args=args, cfg=cfg, isNewRepository=
not v1RepoExists,
429 isV1Repository=v1RepoExists)
430 self._repos.add(repoData)
432 parent = PosixStorage.getParentSymlinkPath(args.cfgRoot)
434 parent = os.path.relpath(os.path.join(cfg.root, parent),
'.')
435 cfg.addParents(parent)
439 self.
_createRepoData(RepositoryArgs(parent, mode=
'r'), 'in', instanceParents)
443 msg =
"Input repositories must exist; no repo found at " \
445 raise RuntimeError(msg)
446 cfg = RepositoryCfg.makeFromArgs(args, parents)
447 repoData =
RepoData(args=args, cfg=cfg, isNewRepository=
True)
448 self._repos.add(repoData)
453 for args
in outputs + inputs:
454 if 'r' in args.mode and args.cfgRoot not in parents:
455 parents.append(args.cfgRoot)
459 """Create the RepoDataContainer and put a RepoData object in it for each repository listed in inputs
460 and outputs as well as each parent of each repository.
462 After this function runs, there will be a RepoData for any Repository that may be used by this Butler
467 inputs : list of RepoArgs
468 Repositories to be used by the Butler as as input repositories.
469 outputs : list of RepoArgs
470 Repositories to be used by the Butler as as output repositories.
478 raise RuntimeError(
"Must not call _createRepoDatas twice.")
479 except AttributeError:
484 for outputArgs
in outputs:
486 for inputArgs
in inputs:
490 """Convert Butler V1 args (root, mapper, mapperArgs) to V2 args (inputs, outputs)
495 Posix path to repository root
496 mapper : class, class instance, or string
497 Instantiated class, a class object to be instantiated, or a string that refers to a class that
498 can be imported & used as the mapper.
500 Args & their values used when instnatiating the mapper.
505 (inputs, outputs) - values to be used for inputs and outputs in Butler.__init__
508 if not isinstance(mapper, basestring)
and not inspect.isclass(mapper):
509 err =
"mapper ought to be an importable string or a class object (not a mapper class instance)"
515 if hasattr(mapper,
'root'):
521 outputs = RepositoryArgs(mode=
'rw',
524 mapperArgs=mapperArgs)
525 return inputs, outputs
528 return 'Butler(datasetTypeAliasDict=%s, repos=%s, persistence=%s)' % (
532 """Get the default mapper. Currently this means if all the repos use
533 exactly the same mapper, that mapper may be considered the default.
535 This definition may be changing; mappers may be able to exclude
536 themselves as candidates for default, and they may nominate a different
537 mapper instead. Also, we may not want to look at *all* the repos, but
538 only a depth-first search on each of the input & output repos, and
539 use the first-found mapper for each of those. TBD.
549 Returns the class type of the default mapper, or None if a default
550 mapper can not be determined.
554 for inputRepoData
in self._repos.inputs():
556 if inputRepoData.cfg.mapper
is not None:
557 mapper = inputRepoData.cfg.mapper
562 if isinstance(mapper, basestring):
564 elif not inspect.isclass(mapper):
565 mapper = mapper.__class__
571 if defaultMapper
is None:
572 defaultMapper = mapper
573 elif mapper == defaultMapper:
575 elif mapper
is not None:
580 for repoData
in self._repos.all().values():
581 if repoData.cfg.mapper
is None and (repoData.isNewRepository
or repoData.isV1Repository):
582 if defaultMapper
is None:
584 "No mapper specified for %s and no default mapper could be determined." %
586 repoData.cfg.mapper = defaultMapper
590 """posix-only; gets the mapper class at the path specifed by root (if a file _mapper can be found at
591 that location or in a parent location.
593 As we abstract the storage and support different types of storage locations this method will be
594 moved entirely into Butler Access, or made more dynamic, and the API will very likely change."""
595 return Storage.getMapperClass(root)
598 """Register an alias that will be substituted in datasetTypes.
603 The alias keyword. It may start with @ or not. It may not contain @ except as the first character.
605 The string that will be substituted when @alias is passed into datasetType. It may not contain '@'
609 atLoc = alias.rfind(
'@')
611 alias =
"@" + str(alias)
613 raise RuntimeError(
"Badly formatted alias string: %s" % (alias,))
616 if datasetType.count(
'@') != 0:
617 raise RuntimeError(
"Badly formatted type string: %s" % (datasetType))
622 if key.startswith(alias)
or alias.startswith(key):
623 raise RuntimeError(
"Alias: %s overlaps with existing alias: %s" % (alias, key))
627 def getKeys(self, datasetType=None, level=None, tag=None):
628 """Get the valid data id keys at or above the given level of hierarchy for the dataset type or the
629 entire collection if None. The dict values are the basic Python types corresponding to the keys (int,
635 The type of dataset to get keys for, entire collection if None.
637 The hierarchy level to descend to. None if it should not be restricted. Use an empty string if the
638 mapper should lookup the default level.
639 tags - any, or list of any
640 Any object that can be tested to be the same as the tag in a dataId passed into butler input
641 functions. Applies only to input repositories: If tag is specified by the dataId then the repo
642 will only be read from used if the tag in the dataId matches a tag used for that repository.
646 Returns a dict. The dict keys are the valid data id keys at or above the given level of hierarchy for
647 the dataset type or the entire collection if None. The dict values are the basic Python types
648 corresponding to the keys (int, float, str).
654 for repoData
in self._repos.inputs():
655 if not tag
or len(tag.intersection(repoData.tags)) > 0:
656 keys = repoData.repo.getKeys(datasetType, level)
664 """Returns the valid values for one or more keys when given a partial
665 input collection data id.
670 The type of dataset to inquire about.
672 A key giving the level of granularity of the inquiry.
674 An optional key or tuple of keys to be returned.
675 dataId - DataId, dict
678 Keyword arguments for the partial data id.
682 A list of valid values or tuples of valid values as specified by the format (defaulting to the same as
683 the key) at the key's level of granularity.
687 dataId = DataId(dataId)
688 dataId.update(**rest)
696 for repoData
in self._repos.inputs():
697 if not dataId.tag
or len(dataId.tag.intersection(repoData.tags)) > 0:
698 tuples = repoData.repo.queryMetadata(datasetType, format, dataId)
717 """Determines if a dataset file exists.
722 The type of dataset to inquire about.
723 dataId - DataId, dict
724 The data id of the dataset.
725 **rest keyword arguments for the data id.
730 True if the dataset exists or is non-file-based.
733 dataId = DataId(dataId)
734 dataId.update(**rest)
737 for repoData
in self._repos.inputs():
738 if not dataId.tag
or len(dataId.tag.intersection(repoData.tags)) > 0:
739 location = repoData.repo.map(datasetType, dataId)
746 additionalData = location.getAdditionalData()
747 storageName = location.getStorageName()
748 if storageName
in (
'BoostStorage',
'FitsStorage',
'PafStorage',
749 'PickleStorage',
'ConfigStorage',
'FitsCatalogStorage'):
750 locations = location.getLocations()
751 for locationString
in locations:
752 logLoc = LogicalLocation(locationString, additionalData).locString()
753 if storageName ==
'FitsStorage':
755 bracket = logLoc.find(
'[')
757 logLoc = logLoc[:bracket]
758 if not os.path.exists(logLoc):
761 self.log.warn(
"datasetExists() for non-file storage %s, dataset type=%s, keys=%s",
762 storageName, datasetType, str(dataId))
765 def _locate(self, datasetType, dataId, write):
766 """Get one or more ButlerLocations and/or ButlercComposites.
771 The datasetType that is being searched for. The datasetType may be followed by a dot and
772 a component name (component names are specified in the policy). IE datasetType.componentName
774 dataId : dict or DataId class instance
778 True if this is a search to write an object. False if it is a search to read an object. This
779 affects what type (an object or a container) is returned.
783 If write is False, will return either a single object or None. If write is True, will return a list
786 repos = self._repos.outputs()
if write
else self._repos.inputs()
788 for repoData
in repos:
790 if not write
and dataId.tag
and len(dataId.tag.intersection(repoData.tags)) == 0:
792 components = datasetType.split(
'.')
793 datasetType = components[0]
794 components = components[1:]
795 location = repoData.repo.map(datasetType, dataId, write=write)
798 location.datasetType = datasetType
799 if len(components) > 0:
800 if not isinstance(location, ButlerComposite):
801 raise RuntimeError(
"The location for a dotted datasetType must be a composite.")
803 components[0] = location.componentInfo[components[0]].datasetType
805 datasetType =
'.'.join(components)
806 location = self.
_locate(datasetType, dataId, write)
816 locations.extend(location)
818 locations.append(location)
823 def get(self, datasetType, dataId=None, immediate=True, **rest):
824 """Retrieves a dataset given an input collection data id.
829 The type of dataset to retrieve.
833 If False use a proxy for delayed loading.
835 keyword arguments for the data id.
839 An object retrieved from the dataset (or a proxy for one).
842 dataId = DataId(dataId)
843 dataId.update(**rest)
845 location = self.
_locate(datasetType, dataId, write=
False)
847 raise NoResults(
"No locations for get:", datasetType, dataId)
848 self.log.debug(
"Get type=%s keys=%s from %s", datasetType, dataId, str(location))
850 if isinstance(location, ButlerComposite):
851 for name, componentInfo
in location.componentInfo.items():
852 if componentInfo.subset:
853 subset = self.
subset(datasetType=componentInfo.datasetType, dataId=location.dataId)
854 componentInfo.obj = [obj.get()
for obj
in subset]
856 obj = self.
get(componentInfo.datasetType, location.dataId, immediate=
True)
857 componentInfo.obj = obj
858 assembler = location.assembler
or genericAssembler
859 obj = assembler(dataId=location.dataId, componentInfo=location.componentInfo, cls=location.python)
862 if location.datasetType
and hasattr(location.mapper,
"bypass_" + location.datasetType):
864 pythonType = location.getPythonType()
865 if pythonType
is not None:
866 if isinstance(pythonType, basestring):
868 bypassFunc = getattr(location.mapper,
"bypass_" + location.datasetType)
869 callback =
lambda: bypassFunc(location.datasetType, pythonType, location, dataId)
871 callback =
lambda: self.
_read(location)
872 if location.mapper.canStandardize(location.datasetType):
873 innerCallback = callback
874 callback =
lambda: location.mapper.standardize(location.datasetType, innerCallback(), dataId)
877 return ReadProxy(callback)
879 def put(self, obj, datasetType, dataId={}, doBackup=False, **rest):
880 """Persists a dataset given an output collection data id.
885 The object to persist.
887 The type of dataset to persist.
891 If True, rename existing instead of overwriting.
892 WARNING: Setting doBackup=True is not safe for parallel processing, as it may be subject to race
895 Keyword arguments for the data id.
898 dataId = DataId(dataId)
899 dataId.update(**rest)
901 for location
in self.
_locate(datasetType, dataId, write=
True):
902 if isinstance(location, ButlerComposite):
903 disassembler = location.disassembler
if location.disassembler
else genericDisassembler
904 disassembler(obj=obj, dataId=location.dataId, componentInfo=location.componentInfo)
905 for name, info
in location.componentInfo.items():
906 if not info.inputOnly:
907 self.
put(info.obj, info.datasetType, location.dataId, doBackup=doBackup)
910 location.getRepository().backup(location.datasetType, dataId)
911 location.getRepository().write(location, obj)
913 def subset(self, datasetType, level=None, dataId={}, **rest):
914 """Return complete dataIds for a dataset type that match a partial (or empty) dataId.
916 Given a partial (or empty) dataId specified in dataId and **rest, find all datasets that match the
917 dataId. Optionally restrict the results to a given level specified by a dataId key (e.g. visit or
918 sensor or amp for a camera). Return an iterable collection of complete dataIds as ButlerDataRefs.
919 Datasets with the resulting dataIds may not exist; that needs to be tested with datasetExists().
924 The type of dataset collection to subset
926 The level of dataId at which to subset. Use an empty string if the mapper should look up the
931 Keyword arguments for the data id.
935 subset - ButlerSubset
936 Collection of ButlerDataRefs for datasets matching the data id.
940 To print the full dataIds for all r-band measurements in a source catalog
941 (note that the subset call is equivalent to: `butler.subset('src', dataId={'filter':'r'})`):
943 >>> subset = butler.subset('src', filter=
'r')
944 >>> for data_ref
in subset: print(data_ref.dataId)
946 datasetType = self._resolveDatasetTypeAlias(datasetType)
948 # Currently expected behavior of subset is that if specified level is None then the mapper's default
949 # level should be used. Convention for level within Butler is that an empty string is used to indicate
954 dataId = DataId(dataId)
955 dataId.update(**rest)
956 return ButlerSubset(self, datasetType, level, dataId)
958 def dataRef(self, datasetType, level=None, dataId={}, **rest):
959 """Returns a single ButlerDataRef.
961 Given a complete dataId specified
in dataId
and **rest, find the unique dataset at the given level
962 specified by a dataId key (e.g. visit
or sensor
or amp
for a camera)
and return a ButlerDataRef.
967 The type of dataset collection to reference
969 The level of dataId at which to reference
973 Keyword arguments
for the data id.
977 dataRef - ButlerDataRef
978 ButlerDataRef
for dataset matching the data id
981 datasetType = self._resolveDatasetTypeAlias(datasetType)
982 dataId = DataId(dataId)
983 subset = self.subset(datasetType, level, dataId, **rest)
985 raise RuntimeError("No unique dataset for: Dataset type:%s Level:%s Data ID:%s Keywords:%s" %
986 (str(datasetType), str(level), str(dataId), str(rest)))
987 return ButlerDataRef(subset, subset.cache[0])
989 def _read(self, location):
990 """Unpersist an object using data inside a butlerLocation object.
992 A weakref to loaded objects
is cached here. If the object specified by the butlerLocaiton has been
993 loaded before
and still exists then the object will
not be re-read. A ref to the already-existing
994 object will be returned instead.
998 location - ButlerLocation
999 A butlerLocation instance populated with data needed to read the object.
1003 object - an instance of the object specified by the butlerLoction.
1004 The object specified by the butlerLocation will either be loaded
from persistent storage
or will
1005 be fetched
from the object cache (
if it has already been read before).
1007 def hasher(butlerLocation):
1008 """Hash a butler location
for use
as a key
in the object cache.
1010 This requires that the dataId that was used to find the location
is set
in the usedDataId
1011 parameter. If this
is not set, the dataId that was used to do the mapping
is not known
and we
1012 can
't create a complete hash for comparison of like-objects.
1014 if butlerLocation.usedDataId is None:
1016 return hash((butlerLocation.storageName, id(butlerLocation.mapper), id(butlerLocation.storage),
1017 tuple(butlerLocation.locationList), repr(sorted(butlerLocation.usedDataId.items())),
1018 butlerLocation.datasetType))
1020 locationHash = hasher(location)
1021 results = self.objectCache.get(locationHash, None) if locationHash is not None else None
1023 self.log.debug("Starting read from %s", location)
1024 results = location.repository.read(location)
1025 if len(results) == 1:
1026 results = results[0]
1027 self.log.debug("Ending read from %s", location)
1029 self.objectCache[locationHash] = results
1031 # some object types (e.g. builtins, like list) do not support weakref, and will raise a
1032 # TypeError when we try to create the weakref. This is ok, we simply will not keep those
1033 # types of objects in the cache.
1037 def __reduce__(self):
1038 ret = (_unreduce, (self._initArgs, self.datasetTypeAliasDict))
1041 def _resolveDatasetTypeAlias(self, datasetType):
1042 """Replaces all the known alias keywords
in the given string with the alias value.
1047 A datasetType string to search & replace on
1052 The de-aliased string
1054 for key in self.datasetTypeAliasDict:
1055 # if all aliases have been replaced, bail out
1056 if datasetType.find('@') == -1:
1058 datasetType = datasetType.replace(key, self.datasetTypeAliasDict[key])
1060 # If an alias specifier can not be resolved then throw.
1061 if datasetType.find('@') != -1:
1062 raise RuntimeError("Unresolvable alias specifier in datasetType: %s" % (datasetType))
1067 def _unreduce(initArgs, datasetTypeAliasDict):
1068 mapperArgs = initArgs.pop('mapperArgs')
1069 initArgs.update(mapperArgs)
1070 butler = Butler(**initArgs)
1071 butler.datasetTypeAliasDict = datasetTypeAliasDict
a container for holding hierarchical configuration data in memory.
def _resolveDatasetTypeAlias