22 __all__ = (
"RepoConverter",
"DataIdExtractor")
26 from collections
import OrderedDict
33 from lsst.daf.butler
import DataId, DatasetType, DatasetRef
34 from lsst.daf.butler.gen2convert
import FilePathParser, Translator
41 """Find the mapper class associated with a Gen2 data repository root. 46 Path to a Gen2 repository root directory. 51 A subclass of `lsst.obs.base.CameraMapper`. 56 Raised if the directory does not appear to be the root of a 59 cfgPath = os.path.join(root,
"repositoryCfg.yaml")
60 if os.path.exists(cfgPath):
61 with open(cfgPath,
"r") as f: 62 repoCfg = yaml.load(f, Loader=yaml.UnsafeLoader) 64 parentLinkPath = os.path.join(root,
"_parent")
65 if os.path.exists(parentLinkPath):
67 mapperFilePath = os.path.join(root,
"_mapper")
68 if os.path.exists(mapperFilePath):
69 with open(mapperFilePath,
"r") as f: 70 mapperClassPath = f.read().strip() 72 calibRegistryPath = os.path.join(root,
"calibRegistry.sqlite3")
73 if os.path.exists(calibRegistryPath):
74 return findMapperClass(os.path.normpath(os.path.join(root, os.path.pardir)))
75 raise ValueError(f
"Could not determine (Gen2) mapper class for repo at '{root}'.")
79 """A class that extracts Gen3 data IDs from Gen2 filenames for a 80 particular dataset type. 84 datasetTypeName : `str` 85 Name of the dataset type the object will process. 86 storageClass : `str` or `lsst.daf.butler.StorageClass` 87 Gen3 storage class of the dataset type. 88 universe : `lsst.daf.butler.DimensionUniverse` 89 Object containing all dimension definitions. 91 Key-value pairs that may need to appear in the Gen3 data ID, but can 92 never be inferred from a Gen2 filename. This should always include 93 the instrument name (even Gen3 data IDs that don't involve the 94 instrument dimension have instrument-dependent Gen2 filenames) and 95 should also include the skymap name for any data ID that involves 97 filePathParser : `lsst.daf.butler.gen2convert.FilePathParser`, optional 98 Object responsible for reading a Gen2 data ID from a filename. Will 99 be created from ``mapper`` if not provided. 100 translator : `lsst.daf.butler.gen2convert.Translator`, optional 101 Object responsible for converting a Gen2 data ID into a Gen3 data ID. 102 Will be created if not provided. 103 mapper : `lsst.obs.base.CameraMapper`, optional 104 Object that defines Gen2 filename templates. Must be provided if 105 ``filePathParser`` is not. 106 skyMap : `lsst.skymap.BaseSkyMap`, optional 107 SkyMap that defines tracts and patches. Must be provided for datasets 108 with a ``patch`` key in their data IDs. 111 def __init__(self, datasetTypeName, storageClass, *, universe, baseDataId,
112 filePathParser=None, translator=None, mapper=None, skyMap=None):
113 if filePathParser
is None:
114 filePathParser = FilePathParser.fromMapping(mapper.mappings[datasetTypeName])
116 if translator
is None:
117 translator = Translator.makeMatching(filePathParser.datasetType, baseDataId, skyMap=skyMap)
120 storageClass=storageClass)
124 """Extract a Gen3 data ID from the given filename, 128 fileNameInRoot : `str` 129 Filename relative to a Gen2 data repository root. 133 dataId : `lsst.daf.butler.DataId` or `None` 134 The Gen3 data ID, or `None` if the file was not recognized as an 135 instance of the extractor's dataset type. 144 """A helper class that ingests (some of) the contents of a Gen2 data 145 repository into a Gen3 data repository. 150 Root of the Gen2 data repository. 151 universe : `lsst.daf.butler.DimensionUniverse` 152 Object containing all dimension definitions. 154 Key-value pairs that may need to appear in the Gen3 data ID, but can 155 never be inferred from a Gen2 filename. This should always include 156 the instrument name (even Gen3 data IDs that don't involve the 157 instrument dimension have instrument-dependent Gen2 filenames) and 158 should also include the skymap name in order to process any data IDs 159 that involve tracts or patches. 160 mapper : `lsst.obs.base.CameraMapper`, optional 161 Object that defines Gen2 filename templates. Will be identified, 162 imported, and constructed from ``root`` if not provided. 163 skyMap : `lsst.skymap.BaseSkyMap`, optional 164 SkyMap that defines tracts and patches. Must be provided in order to 165 provess datasets with a ``patch`` key in their data IDs. 168 COADD_NAMES = (
"deep",
"goodSeeing",
"dcr")
169 REPO_ROOT_FILES = (
"registry.sqlite3",
"_mapper",
"repositoryCfg.yaml",
170 "calibRegistry.sqlite3",
"_parent")
172 def __init__(self, root, *, universe, baseDataId, mapper=None, skyMap=None):
182 mapper =
cls(root=root)
187 if "skymap" in baseDataId:
190 mapping = self.
mapper.mappings.get(f
"{name}Coadd_skyMap",
None)
193 filename = os.path.join(self.
root, mapping.template)
194 if os.path.exists(filename):
195 if skyMap
is not None:
196 raise ValueError(
"Multiple SkyMaps found in repository; please use multiple " 197 "RepoConverters with an explicit skyMap argument for each.")
198 with open(filename,
"rb")
as f:
199 skyMap = pickle.load(f, encoding=
"latin1")
203 """Add a dataset type to those recognized by the converter. 207 datasetTypeName : `str` 208 String name of the dataset type. 209 storageClass : `str` or `lsst.daf.butler.StorageClass` 210 Gen3 storage class of the dataset type. 214 extractor : `DataIdExtractor` 215 The object that will be used to extract data IDs for instances of 216 this dataset type (also held internally, so the return value can 225 """Extract a Gen3 `~lsst.daf.butler.DatasetRef` from a filename in a 226 Gen2 data repository. 230 fileNameInRoot : `str` 231 Name of the file, relative to the root of its Gen2 repository. 235 ref : `lsst.daf.butler.DatasetRef` or `None` 236 Reference to the Gen3 dataset that would be created by converting 237 this file, or `None` if the file is not recognized as an instance 238 of a dataset type known to this converter. 241 dataId = extractor.apply(fileNameInRoot)
242 if dataId
is not None:
246 self.
extractors.move_to_end(datasetTypeName, last=
False)
247 return DatasetRef(extractor.datasetType, dataId=dataId)
251 """Recursively a (subset of) a Gen2 data repository, yielding files 252 that may be convertible. 256 directory : `str`, optional 257 A subdirectory of the repository root to process, instead of 258 processing the entire repository. 259 skipDirs : sequence of `str` 260 Subdirectories that should be skipped. 264 fileNameInRoot : `str` 265 Name of a file in the repository, relative to the root of the 268 if directory
is None:
269 directory = self.
root 270 for dirPath, subdirNamesInDir, fileNamesInDir
in os.walk(directory, followlinks=
True):
273 def isRepoRoot(dirName):
274 return any(os.path.exists(os.path.join(dirPath, dirName, f))
276 subdirNamesInDir[:] = [d
for d
in subdirNamesInDir
if not isRepoRoot(d)
and d
not in skipDirs]
280 dirPathInRoot = dirPath[len(self.
root) + len(os.path.sep):]
281 for fileNameInDir
in fileNamesInDir:
282 fileNameInRoot = os.path.join(dirPathInRoot, fileNameInDir)
287 def convertRepo(self, butler, *, directory=None, transfer=None, formatter=None, skipDirs=()):
288 """Ingest all recognized files into a Gen3 repository. 292 butler : `lsst.daf.butler.Butler` 293 Gen3 butler that files should be ingested into. 294 directory : `str`, optional 295 A subdirectory of the repository root to process, instead of 296 processing the entire repository. 297 transfer : str, optional 298 If not `None`, must be one of 'move', 'copy', 'hardlink', or 299 'symlink' indicating how to transfer the file. 300 formatter : `lsst.daf.butler.Formatter`, optional 301 Formatter that should be used to retreive the Dataset. If not 302 provided, the formatter will be constructed according to 303 Datastore configuration. This should only be used when converting 304 only a single dataset type multiple dataset types of the same 306 skipDirs : sequence of `str` 307 Subdirectories that should be skipped. 309 log = Log.getLogger(
"RepoConverter")
311 butler.registry.registerDatasetType(extractor.datasetType)
313 for file
in self.
walkRepo(directory=directory, skipDirs=skipDirs):
317 butler.ingest(os.path.join(self.
root, file), ref, transfer=transfer, formatter=formatter)
318 except Exception
as err:
321 for cls, messages
in skipped.items():
322 log.warn(
"Skipped %s files due to exceptions of type %s.", len(messages), cls.__name__)
323 if log.isDebugEnabled():
324 for message
in messages:
def findMapperClass(root)
std::shared_ptr< FrameSet > append(FrameSet const &first, FrameSet const &second)
Construct a FrameSet that performs two transformations in series.
def temporaryLogLevel(name, level)
bool any(CoordinateExpr< N > const &expr) noexcept
Return true if any elements are true.
def extractDatasetRef(self, fileNameInRoot)
def convertRepo(self, butler, directory=None, transfer=None, formatter=None, skipDirs=())
def walkRepo(self, directory=None, skipDirs=())
def addDatasetType(self, datasetTypeName, storageClass)
std::vector< SchemaItem< Flag > > * items
def __init__(self, root, universe, baseDataId, mapper=None, skyMap=None)