34 from .
import (LogicalLocation, Policy,
35 StorageInterface, Storage, ButlerLocation,
36 NoRepositroyAtRoot, RepositoryCfg, doImport)
38 from .safeFileIo
import SafeFilename, safeMakeDir
41 __all__ = [
"PosixStorage"]
45 """Defines the interface for a storage location on the local filesystem.
50 URI or path that is used as the storage location.
52 If True a new repository will be created at the root location if it
53 does not exist. If False then a new repository will not be created.
58 If create is False and a repository does not exist at the root
59 specified by uri then NoRepositroyAtRoot is raised.
63 self.
log = Log.getLogger(
"daf.persistence.butler")
65 if self.
root and not os.path.exists(self.
root):
67 raise NoRepositroyAtRoot(
"No repository at {}".
format(uri))
71 return 'PosixStorage(root=%s)' % self.
root
74 def _pathFromURI(uri):
75 """Get the path part of the URI"""
76 return urllib.parse.urlparse(uri).path
80 """Get a relative path from a location to a location.
85 A path at which to start. It can be a relative path or an
88 A target location. It can be a relative path or an absolute path.
93 A relative path that describes the path from fromPath to toPath.
95 fromPath = os.path.realpath(fromPath)
96 return os.path.relpath(toPath, fromPath)
100 """Get an absolute path for the path from fromUri to toUri
104 fromPath : the starting location
105 A location at which to start. It can be a relative path or an
107 relativePath : the location relative to fromPath
113 Path that is an absolute path representation of fromPath +
114 relativePath, if one exists. If relativePath is absolute or if
115 fromPath is not related to relativePath then relativePath will be
118 if os.path.isabs(relativePath):
120 fromPath = os.path.realpath(fromPath)
121 return os.path.normpath(os.path.join(fromPath, relativePath))
125 """Get a persisted RepositoryCfg
129 uri : URI or path to a RepositoryCfg
134 A RepositoryCfg instance or None
136 storage = Storage.makeFromURI(uri)
137 location = ButlerLocation(pythonType=RepositoryCfg,
140 locationList=
'repositoryCfg.yaml',
146 return storage.read(location)
150 storage = Storage.makeFromURI(cfg.root
if loc
is None else loc, create=
True)
151 location = ButlerLocation(pythonType=RepositoryCfg,
154 locationList=
'repositoryCfg.yaml',
160 storage.write(location, cfg)
164 """Get the mapper class associated with a repository root.
166 Supports the legacy _parent symlink search (which was only ever posix-only. This should not be used by
167 new code and repositories; they should use the Repository parentCfg mechanism.
172 The location of a persisted ReositoryCfg is (new style repos), or
173 the location where a _mapper file is (old style repos).
177 A class object or a class instance, depending on the state of the
178 mapper when the repository was created.
183 cfg = PosixStorage.getRepositoryCfg(root)
189 mapperFile =
"_mapper"
190 while not os.path.exists(os.path.join(basePath, mapperFile)):
192 if os.path.exists(os.path.join(basePath,
"_parent")):
193 basePath = os.path.join(basePath,
"_parent")
198 if mapperFile
is not None:
199 mapperFile = os.path.join(basePath, mapperFile)
202 with open(mapperFile,
"r")
as f:
203 mapperName = f.readline().
strip()
204 components = mapperName.split(
".")
205 if len(components) <= 1:
206 raise RuntimeError(
"Unqualified mapper name %s in %s" %
207 (mapperName, mapperFile))
208 pkg = importlib.import_module(
".".join(components[:-1]))
209 return getattr(pkg, components[-1])
215 """For Butler V1 Repositories only, if a _parent symlink exists, get the location pointed to by the
221 A path to the folder on the local filesystem.
226 A path to the parent folder indicated by the _parent symlink, or None if there is no _parent
229 linkpath = os.path.join(root,
'_parent')
230 if os.path.exists(linkpath):
232 return os.readlink(os.path.join(root,
'_parent'))
236 return os.path.join(root,
'_parent')
239 def write(self, butlerLocation, obj):
240 """Writes an object to a location and persistence format specified by
245 butlerLocation : ButlerLocation
246 The location & formatting for the object to be written.
247 obj : object instance
248 The object to be written.
250 self.
log.
debug(
"Put location=%s obj=%s", butlerLocation, obj)
252 writeFormatter = self.getWriteFormatter(butlerLocation.getStorageName())
253 if not writeFormatter:
254 writeFormatter = self.getWriteFormatter(butlerLocation.getPythonType())
256 writeFormatter(butlerLocation, obj)
259 raise(RuntimeError(
"No formatter for location:{}".
format(butlerLocation)))
261 def read(self, butlerLocation):
262 """Read from a butlerLocation.
266 butlerLocation : ButlerLocation
267 The location & formatting for the object(s) to be read.
271 A list of objects as described by the butler location. One item for
272 each location in butlerLocation.getLocations()
274 readFormatter = self.getReadFormatter(butlerLocation.getStorageName())
275 if not readFormatter:
276 readFormatter = self.getReadFormatter(butlerLocation.getPythonType())
278 return readFormatter(butlerLocation)
280 raise(RuntimeError(
"No formatter for location:{}".
format(butlerLocation)))
283 """Implementation of PosixStorage.exists for ButlerLocation objects.
285 storageName = location.getStorageName()
286 if storageName
not in (
'FitsStorage',
287 'PickleStorage',
'ConfigStorage',
'FitsCatalogStorage',
288 'YamlStorage',
'ParquetStorage',
'MatplotlibStorage'):
289 self.
log.
warn(
"butlerLocationExists for non-supported storage %s" % location)
291 for locationString
in location.getLocations():
292 logLoc = LogicalLocation(locationString, location.getAdditionalData()).locString()
299 """Check if location exists.
303 location : ButlerLocation or string
304 A a string or a ButlerLocation that describes the location of an
305 object in this storage.
310 True if exists, else False.
312 if isinstance(location, ButlerLocation):
319 """Get the full path to the location.
324 return os.path.join(self.
root, location)
328 """Test if a Version 1 Repository exists.
330 Version 1 Repositories only exist in posix storages, do not have a
331 RepositoryCfg file, and contain either a registry.sqlite3 file, a
332 _mapper file, or a _parent link.
337 A path to a folder on the local filesystem.
342 True if the repository at root exists, else False.
344 return os.path.exists(root)
and (
345 os.path.exists(os.path.join(root,
"registry.sqlite3"))
346 or os.path.exists(os.path.join(root,
"_mapper"))
347 or os.path.exists(os.path.join(root,
"_parent"))
351 """Copy a file from one location to another on the local filesystem.
356 Path and name of existing file.
358 Path and name of new file.
364 shutil.copy(os.path.join(self.
root, fromLocation), os.path.join(self.
root, toLocation))
367 """Get a handle to a local copy of the file, downloading it to a
372 A path the the file in storage, relative to root.
376 A handle to a local copy of the file. If storage is remote it will be
377 a temporary file. If storage is local it may be the original file or
378 a temporary file. The file name can be gotten via the 'name' property
379 of the returned object.
381 p = os.path.join(self.
root, path)
391 """Search for the given path in this storage instance.
393 If the path contains an HDU indicator (a number in brackets before the
394 dot, e.g. 'foo.fits[1]', this will be stripped when searching and so
395 will match filenames without the HDU indicator, e.g. 'foo.fits'. The
396 path returned WILL contain the indicator though, e.g. ['foo.fits[1]'].
401 A filename (and optionally prefix path) to search for within root.
406 The location that was found, or None if no location was found.
411 def search(root, path, searchParents=False):
412 """Look for the given path in the current root.
414 Also supports searching for the path in Butler v1 repositories by
415 following the Butler v1 _parent symlink
417 If the path contains an HDU indicator (a number in brackets, e.g.
418 'foo.fits[1]', this will be stripped when searching and so
419 will match filenames without the HDU indicator, e.g. 'foo.fits'. The
420 path returned WILL contain the indicator though, e.g. ['foo.fits[1]'].
425 The path to the root directory.
427 The path to the file within the root directory.
428 searchParents : bool, optional
429 For Butler v1 repositories only, if true and a _parent symlink
430 exists, then the directory at _parent will be searched if the file
431 is not found in the root repository. Will continue searching the
432 parent of the parent until the file is found or no additional
438 The location that was found, or None if no location was found.
444 while len(rootDir) > 1
and rootDir[-1] ==
'/':
445 rootDir = rootDir[:-1]
447 if not path.startswith(
'/'):
450 elif path.startswith(rootDir +
"/"):
452 path = path[len(rootDir +
'/'):]
454 elif rootDir ==
"/" and path.startswith(
"/"):
459 pathPrefix = os.path.dirname(path)
460 while pathPrefix !=
"" and pathPrefix !=
"/":
461 if os.path.realpath(pathPrefix) == os.path.realpath(root):
463 pathPrefix = os.path.dirname(pathPrefix)
464 if pathPrefix ==
"/":
466 elif pathPrefix !=
"":
467 path = path[len(pathPrefix)+1:]
473 firstBracket = path.find(
"[")
474 if firstBracket != -1:
475 strippedPath = path[:firstBracket]
476 pathStripped = path[firstBracket:]
480 paths = glob.glob(os.path.join(dir, strippedPath))
482 if pathPrefix != rootDir:
483 paths = [p[len(rootDir+
'/'):]
for p
in paths]
484 if pathStripped
is not None:
485 paths = [p + pathStripped
for p
in paths]
488 dir = os.path.join(dir,
"_parent")
489 if not os.path.exists(dir):
496 """Ask if a storage at the location described by uri exists
501 URI to the the root location of the storage
506 True if the storage exists, false if not
508 return os.path.exists(PosixStorage._pathFromURI(uri))
512 """Read an lsst.pex.config.Config from a butlerLocation.
516 butlerLocation : ButlerLocation
517 The location for the object(s) to be read.
521 A list of objects as described by the butler location. One item for
522 each location in butlerLocation.getLocations()
525 for locationString
in butlerLocation.getLocations():
526 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
527 logLoc = LogicalLocation(locStringWithRoot, butlerLocation.getAdditionalData())
528 if not os.path.exists(logLoc.locString()):
529 raise RuntimeError(
"No such config file: " + logLoc.locString())
530 pythonType = butlerLocation.getPythonType()
531 if pythonType
is not None:
532 if isinstance(pythonType, str):
534 finalItem = pythonType()
535 finalItem.load(logLoc.locString())
536 results.append(finalItem)
541 """Writes an lsst.pex.config.Config object to a location specified by
546 butlerLocation : ButlerLocation
547 The location for the object to be written.
548 obj : object instance
549 The object to be written.
551 filename = os.path.join(butlerLocation.getStorage().root, butlerLocation.getLocations()[0])
553 logLoc = LogicalLocation(locationString, butlerLocation.getAdditionalData())
554 obj.save(logLoc.locString())
558 """Read objects from a FITS file specified by ButlerLocation.
560 The object is read using class or static method
561 ``readFitsWithOptions(path, options)``, if it exists, else
562 ``readFits(path)``. The ``options`` argument is the data returned by
563 ``butlerLocation.getAdditionalData()``.
567 butlerLocation : ButlerLocation
568 The location for the object(s) to be read.
572 A list of objects as described by the butler location. One item for
573 each location in butlerLocation.getLocations()
575 pythonType = butlerLocation.getPythonType()
576 if pythonType
is not None:
577 if isinstance(pythonType, str):
579 supportsOptions = hasattr(pythonType,
"readFitsWithOptions")
580 if not supportsOptions:
582 if issubclass(pythonType, (PropertySet, PropertyList)):
584 reader = readMetadata
586 reader = pythonType.readFits
588 additionalData = butlerLocation.getAdditionalData()
589 for locationString
in butlerLocation.getLocations():
590 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
591 logLoc = LogicalLocation(locStringWithRoot, additionalData)
594 filePath = re.sub(
r"(\.fits(.[a-zA-Z0-9]+)?)(\[.+\])$",
r"\1", logLoc.locString())
595 if not os.path.exists(filePath):
596 raise RuntimeError(
"No such FITS file: " + logLoc.locString())
598 finalItem = pythonType.readFitsWithOptions(logLoc.locString(), options=additionalData)
600 fileName = logLoc.locString()
601 mat = re.search(
r"^(.*)\[(\d+)\]$", fileName)
603 if mat
and reader == readMetadata:
604 fileName = mat.group(1)
605 hdu = int(mat.group(2))
607 finalItem = reader(fileName, hdu=hdu)
609 finalItem = reader(fileName)
610 results.append(finalItem)
615 """Writes an object to a FITS file specified by ButlerLocation.
617 The object is written using method
618 ``writeFitsWithOptions(path, options)``, if it exists, else
619 ``writeFits(path)``. The ``options`` argument is the data returned by
620 ``butlerLocation.getAdditionalData()``.
624 butlerLocation : ButlerLocation
625 The location for the object to be written.
626 obj : object instance
627 The object to be written.
629 supportsOptions = hasattr(obj,
"writeFitsWithOptions")
630 additionalData = butlerLocation.getAdditionalData()
631 locations = butlerLocation.getLocations()
632 with SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
633 logLoc = LogicalLocation(locationString, additionalData)
635 obj.writeFitsWithOptions(logLoc.locString(), options=additionalData)
637 obj.writeFits(logLoc.locString())
641 """Read a catalog from a Parquet file specified by ButlerLocation.
643 The object returned by this is expected to be a subtype
644 of `ParquetTable`, which is a thin wrapper to `pyarrow.ParquetFile`
645 that allows for lazy loading of the data.
649 butlerLocation : ButlerLocation
650 The location for the object(s) to be read.
654 A list of objects as described by the butler location. One item for
655 each location in butlerLocation.getLocations()
658 additionalData = butlerLocation.getAdditionalData()
660 for locationString
in butlerLocation.getLocations():
661 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
662 logLoc = LogicalLocation(locStringWithRoot, additionalData)
663 if not os.path.exists(logLoc.locString()):
664 raise RuntimeError(
"No such parquet file: " + logLoc.locString())
666 pythonType = butlerLocation.getPythonType()
667 if pythonType
is not None:
668 if isinstance(pythonType, str):
671 filename = logLoc.locString()
675 results.append(pythonType(filename=filename))
681 """Writes pandas dataframe to parquet file.
685 butlerLocation : ButlerLocation
686 The location for the object(s) to be read.
687 obj : `lsst.qa.explorer.parquetTable.ParquetTable`
688 Wrapped DataFrame to write.
691 additionalData = butlerLocation.getAdditionalData()
692 locations = butlerLocation.getLocations()
693 with SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
694 logLoc = LogicalLocation(locationString, additionalData)
695 filename = logLoc.locString()
700 """Writes an object to a YAML file specified by ButlerLocation.
704 butlerLocation : ButlerLocation
705 The location for the object to be written.
706 obj : object instance
707 The object to be written.
709 additionalData = butlerLocation.getAdditionalData()
710 locations = butlerLocation.getLocations()
711 with SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
712 logLoc = LogicalLocation(locationString, additionalData)
713 with open(logLoc.locString(),
"w")
as outfile:
714 yaml.dump(obj, outfile)
718 """Read an object from a pickle file specified by ButlerLocation.
722 butlerLocation : ButlerLocation
723 The location for the object(s) to be read.
727 A list of objects as described by the butler location. One item for
728 each location in butlerLocation.getLocations()
732 additionalData = butlerLocation.getAdditionalData()
733 for locationString
in butlerLocation.getLocations():
734 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
735 logLoc = LogicalLocation(locStringWithRoot, additionalData)
736 if not os.path.exists(logLoc.locString()):
737 raise RuntimeError(
"No such pickle file: " + logLoc.locString())
738 with open(logLoc.locString(),
"rb")
as infile:
742 if sys.version_info.major >= 3:
743 finalItem = pickle.load(infile, encoding=
"latin1")
745 finalItem = pickle.load(infile)
746 results.append(finalItem)
751 """Writes an object to a pickle file specified by ButlerLocation.
755 butlerLocation : ButlerLocation
756 The location for the object to be written.
757 obj : object instance
758 The object to be written.
760 additionalData = butlerLocation.getAdditionalData()
761 locations = butlerLocation.getLocations()
762 with SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
763 logLoc = LogicalLocation(locationString, additionalData)
764 with open(logLoc.locString(),
"wb")
as outfile:
765 pickle.dump(obj, outfile, pickle.HIGHEST_PROTOCOL)
769 """Read a catalog from a FITS table specified by ButlerLocation.
773 butlerLocation : ButlerLocation
774 The location for the object(s) to be read.
778 A list of objects as described by the butler location. One item for
779 each location in butlerLocation.getLocations()
781 pythonType = butlerLocation.getPythonType()
782 if pythonType
is not None:
783 if isinstance(pythonType, str):
786 additionalData = butlerLocation.getAdditionalData()
787 for locationString
in butlerLocation.getLocations():
788 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
789 logLoc = LogicalLocation(locStringWithRoot, additionalData)
790 if not os.path.exists(logLoc.locString()):
791 raise RuntimeError(
"No such FITS catalog file: " + logLoc.locString())
793 if additionalData.exists(
"hdu"):
794 kwds[
"hdu"] = additionalData.getInt(
"hdu")
795 if additionalData.exists(
"flags"):
796 kwds[
"flags"] = additionalData.getInt(
"flags")
797 finalItem = pythonType.readFits(logLoc.locString(), **kwds)
798 results.append(finalItem)
803 """Writes a catalog to a FITS table specified by ButlerLocation.
807 butlerLocation : ButlerLocation
808 The location for the object to be written.
809 obj : object instance
810 The object to be written.
812 additionalData = butlerLocation.getAdditionalData()
813 locations = butlerLocation.getLocations()
814 with SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
815 logLoc = LogicalLocation(locationString, additionalData)
816 if additionalData.exists(
"flags"):
817 kwds = dict(flags=additionalData.getInt(
"flags"))
820 obj.writeFits(logLoc.locString(), **kwds)
824 """Read from a butlerLocation (always fails for this storage type).
828 butlerLocation : ButlerLocation
829 The location for the object(s) to be read.
833 A list of objects as described by the butler location. One item for
834 each location in butlerLocation.getLocations()
836 raise NotImplementedError(
"Figures saved with MatplotlibStorage cannot be retreived using the Butler.")
840 """Writes a matplotlib.figure.Figure to a location, using the template's
841 filename suffix to infer the file format.
845 butlerLocation : ButlerLocation
846 The location for the object to be written.
847 obj : matplotlib.figure.Figure
848 The object to be written.
850 additionalData = butlerLocation.getAdditionalData()
851 locations = butlerLocation.getLocations()
852 with SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
853 logLoc = LogicalLocation(locationString, additionalData)
858 _, ext = os.path.splitext(locations[0])
865 obj.savefig(logLoc.locString(), format=ext)
869 """Read an object from a YAML file specified by a butlerLocation.
873 butlerLocation : ButlerLocation
874 The location for the object(s) to be read.
878 A list of objects as described by the butler location. One item for
879 each location in butlerLocation.getLocations()
882 for locationString
in butlerLocation.getLocations():
883 logLoc = LogicalLocation(butlerLocation.getStorage().locationWithRoot(locationString),
884 butlerLocation.getAdditionalData())
885 if not os.path.exists(logLoc.locString()):
886 raise RuntimeError(
"No such YAML file: " + logLoc.locString())
888 if butlerLocation.pythonType ==
'lsst.daf.persistence.RepositoryCfg':
889 finalItem = Policy(filePath=logLoc.locString())
893 loader = yaml.FullLoader
894 except AttributeError:
896 with open(logLoc.locString(),
"rb")
as infile:
897 finalItem = yaml.load(infile, Loader=loader)
898 results.append(finalItem)
902 PosixStorage.registerFormatters(
"FitsStorage", readFitsStorage, writeFitsStorage)
903 PosixStorage.registerFormatters(
"ParquetStorage", readParquetStorage, writeParquetStorage)
904 PosixStorage.registerFormatters(
"ConfigStorage", readConfigStorage, writeConfigStorage)
905 PosixStorage.registerFormatters(
"PickleStorage", readPickleStorage, writePickleStorage)
906 PosixStorage.registerFormatters(
"FitsCatalogStorage", readFitsCatalogStorage, writeFitsCatalogStorage)
907 PosixStorage.registerFormatters(
"MatplotlibStorage", readMatplotlibStorage, writeMatplotlibStorage)
908 PosixStorage.registerFormatters(
"YamlStorage", readYamlStorage, writeYamlStorage)
910 Storage.registerStorageClass(scheme=
'', cls=PosixStorage)
911 Storage.registerStorageClass(scheme=
'file', cls=PosixStorage)