34from .
import (LogicalLocation, Policy,
35 StorageInterface, Storage, ButlerLocation,
36 NoRepositroyAtRoot, RepositoryCfg, doImport)
39from .safeFileIo
import SafeFilename, safeMakeDir
42__all__ = [
"PosixStorage"]
46 """Defines the interface for a storage location on the local filesystem.
51 URI or path that
is used
as the storage location.
53 If
True a new repository will be created at the root location
if it
54 does
not exist. If
False then a new repository will
not be created.
59 If create
is False and a repository does
not exist at the root
60 specified by uri then NoRepositroyAtRoot
is raised.
64 self.
loglog = Log.getLogger(
"daf.persistence.butler")
66 if self.
rootroot
and not os.path.exists(self.
rootroot):
72 return 'PosixStorage(root=%s)' % self.
rootroot
75 def _pathFromURI(uri):
76 """Get the path part of the URI"""
77 return urllib.parse.urlparse(uri).path
81 """Get a relative path from a location to a location.
86 A path at which to start. It can be a relative path or an
89 A target location. It can be a relative path
or an absolute path.
94 A relative path that describes the path
from fromPath to toPath.
96 fromPath = os.path.realpath(fromPath)
97 return os.path.relpath(toPath, fromPath)
101 """Get an absolute path for the path from fromUri to toUri
105 fromPath : the starting location
106 A location at which to start. It can be a relative path or an
108 relativePath : the location relative to fromPath
114 Path that
is an absolute path representation of fromPath +
115 relativePath,
if one exists. If relativePath
is absolute
or if
116 fromPath
is not related to relativePath then relativePath will be
119 if os.path.isabs(relativePath):
121 fromPath = os.path.realpath(fromPath)
122 return os.path.normpath(os.path.join(fromPath, relativePath))
126 """Get a persisted RepositoryCfg
130 uri : URI or path to a RepositoryCfg
135 A RepositoryCfg instance
or None
137 storage = Storage.makeFromURI(uri)
141 locationList=
'repositoryCfg.yaml',
147 return storage.read(location)
151 storage = Storage.makeFromURI(cfg.root
if loc
is None else loc, create=
True)
155 locationList=
'repositoryCfg.yaml',
161 storage.write(location, cfg)
165 """Get the mapper class associated with a repository root.
167 Supports the legacy _parent symlink search (which was only ever posix-only. This should not be used by
168 new code
and repositories; they should use the Repository parentCfg mechanism.
173 The location of a persisted ReositoryCfg
is (new style repos),
or
174 the location where a _mapper file
is (old style repos).
178 A
class object or
a class instance, depending on the state of the
179 mapper when the repository was created.
184 cfg = PosixStorage.getRepositoryCfg(root)
190 mapperFile =
"_mapper"
191 while not os.path.exists(os.path.join(basePath, mapperFile)):
193 if os.path.exists(os.path.join(basePath,
"_parent")):
194 basePath = os.path.join(basePath,
"_parent")
199 if mapperFile
is not None:
200 mapperFile = os.path.join(basePath, mapperFile)
203 with open(mapperFile,
"r")
as f:
204 mapperName = f.readline().
strip()
205 components = mapperName.split(
".")
206 if len(components) <= 1:
207 raise RuntimeError(
"Unqualified mapper name %s in %s" %
208 (mapperName, mapperFile))
209 pkg = importlib.import_module(
".".join(components[:-1]))
210 return getattr(pkg, components[-1])
216 """For Butler V1 Repositories only, if a _parent symlink exists, get the location pointed to by the
222 A path to the folder on the local filesystem.
227 A path to the parent folder indicated by the _parent symlink,
or None if there
is no _parent
230 linkpath = os.path.join(root, '_parent')
231 if os.path.exists(linkpath):
233 return os.readlink(os.path.join(root,
'_parent'))
237 return os.path.join(root,
'_parent')
240 def write(self, butlerLocation, obj):
241 """Writes an object to a location and persistence format specified by
246 butlerLocation : ButlerLocation
247 The location & formatting for the object to be written.
248 obj : object instance
249 The object to be written.
251 self.loglog.debug("Put location=%s obj=%s", butlerLocation, obj)
253 writeFormatter = self.
getWriteFormattergetWriteFormatter(butlerLocation.getStorageName())
254 if not writeFormatter:
255 writeFormatter = self.
getWriteFormattergetWriteFormatter(butlerLocation.getPythonType())
257 writeFormatter(butlerLocation, obj)
260 raise(RuntimeError(
"No formatter for location:{}".
format(butlerLocation)))
262 def read(self, butlerLocation):
263 """Read from a butlerLocation.
267 butlerLocation : ButlerLocation
268 The location & formatting for the
object(s) to be read.
272 A list of objects
as described by the butler location. One item
for
273 each location
in butlerLocation.getLocations()
275 readFormatter = self.getReadFormattergetReadFormatter(butlerLocation.getStorageName())
276 if not readFormatter:
277 readFormatter = self.
getReadFormattergetReadFormatter(butlerLocation.getPythonType())
279 return readFormatter(butlerLocation)
281 raise(RuntimeError(
"No formatter for location:{}".
format(butlerLocation)))
284 """Implementation of PosixStorage.exists for ButlerLocation objects.
286 storageName = location.getStorageName()
287 if storageName
not in (
'FitsStorage',
288 'PickleStorage',
'ConfigStorage',
'FitsCatalogStorage',
289 'YamlStorage',
'ParquetStorage',
'MatplotlibStorage'):
290 self.
loglog.
warn(
"butlerLocationExists for non-supported storage %s" % location)
292 for locationString
in location.getLocations():
293 logLoc =
LogicalLocation(locationString, location.getAdditionalData()).locString()
300 """Check if location exists.
304 location : ButlerLocation or string
305 A a string
or a ButlerLocation that describes the location of an
306 object
in this storage.
311 True if exists,
else False.
313 if isinstance(location, ButlerLocation):
320 """Get the full path to the location.
325 return os.path.join(self.
rootroot, location)
329 """Test if a Version 1 Repository exists.
331 Version 1 Repositories only exist in posix storages, do
not have a
332 RepositoryCfg file,
and contain either a registry.sqlite3 file, a
333 _mapper file,
or a _parent link.
338 A path to a folder on the local filesystem.
343 True if the repository at root exists,
else False.
345 return os.path.exists(root)
and (
346 os.path.exists(os.path.join(root,
"registry.sqlite3"))
347 or os.path.exists(os.path.join(root,
"_mapper"))
348 or os.path.exists(os.path.join(root,
"_parent"))
352 """Copy a file from one location to another on the local filesystem.
357 Path and name of existing file.
359 Path
and name of new file.
365 shutil.copy(os.path.join(self.rootroot, fromLocation), os.path.join(self.rootroot, toLocation))
368 """Get a handle to a local copy of the file, downloading it to a
373 A path the the file
in storage, relative to root.
377 A handle to a local copy of the file. If storage
is remote it will be
378 a temporary file. If storage
is local it may be the original file
or
379 a temporary file. The file name can be gotten via the
'name' property
380 of the returned object.
382 p = os.path.join(self.rootroot, path)
392 """Search for the given path in this storage instance.
394 If the path contains an HDU indicator (a number in brackets before the
395 dot, e.g.
'foo.fits[1]', this will be stripped when searching
and so
396 will match filenames without the HDU indicator, e.g.
'foo.fits'. The
397 path returned WILL contain the indicator though, e.g. [
'foo.fits[1]'].
402 A filename (
and optionally prefix path) to search
for within root.
407 The location that was found,
or None if no location was found.
412 def search(root, path, searchParents=False):
413 """Look for the given path in the current root.
415 Also supports searching for the path
in Butler v1 repositories by
416 following the Butler v1 _parent symlink
418 If the path contains an HDU indicator (a number
in brackets, e.g.
419 'foo.fits[1]', this will be stripped when searching
and so
420 will match filenames without the HDU indicator, e.g.
'foo.fits'. The
421 path returned WILL contain the indicator though, e.g. [
'foo.fits[1]'].
426 The path to the root directory.
428 The path to the file within the root directory.
429 searchParents : bool, optional
430 For Butler v1 repositories only,
if true
and a _parent symlink
431 exists, then the directory at _parent will be searched
if the file
432 is not found
in the root repository. Will
continue searching the
433 parent of the parent until the file
is found
or no additional
439 The location that was found,
or None if no location was found.
445 while len(rootDir) > 1
and rootDir[-1] ==
'/':
446 rootDir = rootDir[:-1]
448 if not path.startswith(
'/'):
451 elif path.startswith(rootDir +
"/"):
453 path = path[len(rootDir +
'/'):]
455 elif rootDir ==
"/" and path.startswith(
"/"):
460 pathPrefix = os.path.dirname(path)
461 while pathPrefix !=
"" and pathPrefix !=
"/":
462 if os.path.realpath(pathPrefix) == os.path.realpath(root):
464 pathPrefix = os.path.dirname(pathPrefix)
465 if pathPrefix ==
"/":
467 elif pathPrefix !=
"":
468 path = path[len(pathPrefix)+1:]
474 firstBracket = path.find(
"[")
475 if firstBracket != -1:
476 strippedPath = path[:firstBracket]
477 pathStripped = path[firstBracket:]
481 paths = glob.glob(os.path.join(dir, strippedPath))
483 if pathPrefix != rootDir:
484 paths = [p[len(rootDir+
'/'):]
for p
in paths]
485 if pathStripped
is not None:
486 paths = [p + pathStripped
for p
in paths]
489 dir = os.path.join(dir,
"_parent")
490 if not os.path.exists(dir):
497 """Ask if a storage at the location described by uri exists
502 URI to the the root location of the storage
507 True if the storage exists, false
if not
509 return os.path.exists(PosixStorage._pathFromURI(uri))
513 """Read an lsst.pex.config.Config from a butlerLocation.
517 butlerLocation : ButlerLocation
518 The location for the
object(s) to be read.
522 A list of objects
as described by the butler location. One item
for
523 each location
in butlerLocation.getLocations()
526 for locationString
in butlerLocation.getLocations():
527 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
528 logLoc =
LogicalLocation(locStringWithRoot, butlerLocation.getAdditionalData())
529 if not os.path.exists(logLoc.locString()):
530 raise RuntimeError(
"No such config file: " + logLoc.locString())
533 with open(logLoc.locString(),
"r")
as fd:
534 config_py = fd.read()
535 config = pexConfig.Config._fromPython(config_py)
537 pythonType = butlerLocation.getPythonType()
538 if pythonType
is not None:
539 if isinstance(pythonType, str):
541 if not isinstance(config, pythonType):
542 raise TypeError(f
"Unexpected type of config: {type(config)}, expected {pythonType}")
544 results.append(config)
549 """Writes an lsst.pex.config.Config object to a location specified by
554 butlerLocation : ButlerLocation
555 The location for the object to be written.
556 obj : object instance
557 The object to be written.
559 filename = os.path.join(butlerLocation.getStorage().root, butlerLocation.getLocations()[0])
561 logLoc =
LogicalLocation(locationString, butlerLocation.getAdditionalData())
562 obj.save(logLoc.locString())
566 """Read objects from a FITS file specified by ButlerLocation.
568 The object is read using
class or static method
570 ``readFits(path)``. The ``options`` argument
is the data returned by
571 ``butlerLocation.getAdditionalData()``.
575 butlerLocation : ButlerLocation
576 The location
for the
object(s) to be read.
580 A list of objects
as described by the butler location. One item
for
581 each location
in butlerLocation.getLocations()
583 pythonType = butlerLocation.getPythonType()
584 if pythonType
is not None:
585 if isinstance(pythonType, str):
587 supportsOptions = hasattr(pythonType,
"readFitsWithOptions")
588 if not supportsOptions:
590 if issubclass(pythonType, (PropertySet, PropertyList)):
592 reader = readMetadata
594 reader = pythonType.readFits
596 additionalData = butlerLocation.getAdditionalData()
597 for locationString
in butlerLocation.getLocations():
598 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
602 filePath = re.sub(
r"(\.fits(.[a-zA-Z0-9]+)?)(\[.+\])$",
r"\1", logLoc.locString())
603 if not os.path.exists(filePath):
604 raise RuntimeError(
"No such FITS file: " + logLoc.locString())
606 finalItem = pythonType.readFitsWithOptions(logLoc.locString(), options=additionalData)
608 fileName = logLoc.locString()
609 mat = re.search(
r"^(.*)\[(\d+)\]$", fileName)
611 if mat
and reader == readMetadata:
612 fileName = mat.group(1)
613 hdu =
int(mat.group(2))
615 finalItem = reader(fileName, hdu=hdu)
617 finalItem = reader(fileName)
618 results.append(finalItem)
623 """Writes an object to a FITS file specified by ButlerLocation.
625 The object is written using method
626 ``writeFitsWithOptions(path, options)``,
if it exists,
else
627 ``
writeFits(path)``. The ``options`` argument
is the data returned by
628 ``butlerLocation.getAdditionalData()``.
632 butlerLocation : ButlerLocation
633 The location
for the object to be written.
634 obj : object instance
635 The object to be written.
637 supportsOptions = hasattr(obj, "writeFitsWithOptions")
638 additionalData = butlerLocation.getAdditionalData()
639 locations = butlerLocation.getLocations()
640 with SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
643 obj.writeFitsWithOptions(logLoc.locString(), options=additionalData)
645 obj.writeFits(logLoc.locString())
649 """Read a catalog from a Parquet file specified by ButlerLocation.
651 The object returned by this is expected to be a subtype
652 of `ParquetTable`, which
is a thin wrapper to `pyarrow.ParquetFile`
653 that allows
for lazy loading of the data.
657 butlerLocation : ButlerLocation
658 The location
for the
object(s) to be read.
662 A list of objects
as described by the butler location. One item
for
663 each location
in butlerLocation.getLocations()
666 additionalData = butlerLocation.getAdditionalData()
668 for locationString
in butlerLocation.getLocations():
669 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
671 if not os.path.exists(logLoc.locString()):
672 raise RuntimeError(
"No such parquet file: " + logLoc.locString())
674 pythonType = butlerLocation.getPythonType()
675 if pythonType
is not None:
676 if isinstance(pythonType, str):
679 filename = logLoc.locString()
683 results.append(pythonType(filename=filename))
689 """Writes pandas dataframe to parquet file.
693 butlerLocation : ButlerLocation
694 The location for the
object(s) to be read.
695 obj : `lsst.qa.explorer.parquetTable.ParquetTable`
696 Wrapped DataFrame to write.
699 additionalData = butlerLocation.getAdditionalData()
700 locations = butlerLocation.getLocations()
701 with SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
703 filename = logLoc.locString()
708 """Writes an object to a YAML file specified by ButlerLocation.
712 butlerLocation : ButlerLocation
713 The location for the object to be written.
714 obj : object instance
715 The object to be written.
717 additionalData = butlerLocation.getAdditionalData()
718 locations = butlerLocation.getLocations()
719 with SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
721 with open(logLoc.locString(),
"w")
as outfile:
722 yaml.dump(obj, outfile)
726 """Read an object from a pickle file specified by ButlerLocation.
730 butlerLocation : ButlerLocation
731 The location for the
object(s) to be read.
735 A list of objects
as described by the butler location. One item
for
736 each location
in butlerLocation.getLocations()
740 additionalData = butlerLocation.getAdditionalData()
741 for locationString
in butlerLocation.getLocations():
742 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
744 if not os.path.exists(logLoc.locString()):
745 raise RuntimeError(
"No such pickle file: " + logLoc.locString())
746 with open(logLoc.locString(),
"rb")
as infile:
750 if sys.version_info.major >= 3:
751 finalItem = pickle.load(infile, encoding=
"latin1")
753 finalItem = pickle.load(infile)
754 results.append(finalItem)
759 """Writes an object to a pickle file specified by ButlerLocation.
763 butlerLocation : ButlerLocation
764 The location for the object to be written.
765 obj : object instance
766 The object to be written.
768 additionalData = butlerLocation.getAdditionalData()
769 locations = butlerLocation.getLocations()
770 with SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
772 with open(logLoc.locString(),
"wb")
as outfile:
773 pickle.dump(obj, outfile, pickle.HIGHEST_PROTOCOL)
777 """Read a catalog from a FITS table specified by ButlerLocation.
781 butlerLocation : ButlerLocation
782 The location for the
object(s) to be read.
786 A list of objects
as described by the butler location. One item
for
787 each location
in butlerLocation.getLocations()
789 pythonType = butlerLocation.getPythonType()
790 if pythonType
is not None:
791 if isinstance(pythonType, str):
794 additionalData = butlerLocation.getAdditionalData()
795 for locationString
in butlerLocation.getLocations():
796 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
798 if not os.path.exists(logLoc.locString()):
799 raise RuntimeError(
"No such FITS catalog file: " + logLoc.locString())
801 if additionalData.exists(
"hdu"):
802 kwds[
"hdu"] = additionalData.getInt(
"hdu")
803 if additionalData.exists(
"flags"):
804 kwds[
"flags"] = additionalData.getInt(
"flags")
805 finalItem = pythonType.readFits(logLoc.locString(), **kwds)
806 results.append(finalItem)
811 """Writes a catalog to a FITS table specified by ButlerLocation.
815 butlerLocation : ButlerLocation
816 The location for the object to be written.
817 obj : object instance
818 The object to be written.
820 additionalData = butlerLocation.getAdditionalData()
821 locations = butlerLocation.getLocations()
822 with SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
824 if additionalData.exists(
"flags"):
825 kwds = dict(flags=additionalData.getInt(
"flags"))
828 obj.writeFits(logLoc.locString(), **kwds)
832 """Read from a butlerLocation (always fails for this storage type).
836 butlerLocation : ButlerLocation
837 The location for the
object(s) to be read.
841 A list of objects
as described by the butler location. One item
for
842 each location
in butlerLocation.getLocations()
844 raise NotImplementedError(
"Figures saved with MatplotlibStorage cannot be retreived using the Butler.")
848 """Writes a matplotlib.figure.Figure to a location, using the template's
849 filename suffix to infer the file format.
853 butlerLocation : ButlerLocation
854 The location for the object to be written.
855 obj : matplotlib.figure.Figure
856 The object to be written.
858 additionalData = butlerLocation.getAdditionalData()
859 locations = butlerLocation.getLocations()
860 with SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
866 _, ext = os.path.splitext(locations[0])
873 obj.savefig(logLoc.locString(), format=ext)
877 """Read an object from a YAML file specified by a butlerLocation.
881 butlerLocation : ButlerLocation
882 The location for the
object(s) to be read.
886 A list of objects
as described by the butler location. One item
for
887 each location
in butlerLocation.getLocations()
890 for locationString
in butlerLocation.getLocations():
891 logLoc =
LogicalLocation(butlerLocation.getStorage().locationWithRoot(locationString),
892 butlerLocation.getAdditionalData())
893 if not os.path.exists(logLoc.locString()):
894 raise RuntimeError(
"No such YAML file: " + logLoc.locString())
896 if butlerLocation.pythonType ==
'lsst.daf.persistence.RepositoryCfg':
897 finalItem =
Policy(filePath=logLoc.locString())
901 loader = yaml.UnsafeLoader
902 except AttributeError:
904 with open(logLoc.locString(),
"rb")
as infile:
905 finalItem = yaml.load(infile, Loader=loader)
906 results.append(finalItem)
910PosixStorage.registerFormatters(
"FitsStorage", readFitsStorage, writeFitsStorage)
911PosixStorage.registerFormatters(
"ParquetStorage", readParquetStorage, writeParquetStorage)
912PosixStorage.registerFormatters(
"ConfigStorage", readConfigStorage, writeConfigStorage)
913PosixStorage.registerFormatters(
"PickleStorage", readPickleStorage, writePickleStorage)
914PosixStorage.registerFormatters(
"FitsCatalogStorage", readFitsCatalogStorage, writeFitsCatalogStorage)
915PosixStorage.registerFormatters(
"MatplotlibStorage", readMatplotlibStorage, writeMatplotlibStorage)
916PosixStorage.registerFormatters(
"YamlStorage", readYamlStorage, writeYamlStorage)
918Storage.registerStorageClass(scheme=
'', cls=PosixStorage)
919Storage.registerStorageClass(scheme=
'file', cls=PosixStorage)
Class for logical location of a persisted Persistable instance.
def copyFile(self, fromLocation, toLocation)
def getLocalFile(self, path)
def write(self, butlerLocation, obj)
def putRepositoryCfg(cfg, loc=None)
def getRepositoryCfg(uri)
def search(root, path, searchParents=False)
def absolutePath(fromPath, relativePath)
def getParentSymlinkPath(root)
def locationWithRoot(self, location)
def read(self, butlerLocation)
def exists(self, location)
def instanceSearch(self, path)
def __init__(self, uri, create)
def butlerLocationExists(self, location)
def relativePath(fromPath, toPath)
def getWriteFormatter(cls, objType)
def search(cls, root, path)
def getReadFormatter(cls, objType)
def instanceSearch(self, path)
def writeParquetStorage(butlerLocation, obj)
def writePickleStorage(butlerLocation, obj)
def writeYamlStorage(butlerLocation, obj)
def writeFitsStorage(butlerLocation, obj)
def readFitsStorage(butlerLocation)
def readPickleStorage(butlerLocation)
def writeConfigStorage(butlerLocation, obj)
def readMatplotlibStorage(butlerLocation)
def readConfigStorage(butlerLocation)
def writeMatplotlibStorage(butlerLocation, obj)
def writeFitsCatalogStorage(butlerLocation, obj)
def readFitsCatalogStorage(butlerLocation)
def readParquetStorage(butlerLocation)
def readYamlStorage(butlerLocation)
def safeMakeDir(directory)
def writeFits(filename, stamps, metadata, type_name, write_mask, write_variance, write_archive=False)
def readFitsWithOptions(filename, stamp_factory, options)
def format(config, name=None, writeSourceLine=True, prefix="", verbose=False)