21 from __future__
import annotations
23 __all__ = [
"ConvertRepoConfig",
"ConvertRepoTask",
"ConvertRepoSkyMapConfig"]
27 from dataclasses
import dataclass
28 from typing
import Iterable, Optional, List, Dict
31 from lsst.daf.butler
import (
35 from lsst.pex.config
import Config, ConfigurableField, ConfigDictField, DictField, ListField, Field
39 from ..ingest
import RawIngestTask
40 from .repoConverter
import ConversionSubset
41 from .rootRepoConverter
import RootRepoConverter
42 from .calibRepoConverter
import CalibRepoConverter
43 from .standardRepoConverter
import StandardRepoConverter
48 """Struct containing information about a skymap that may appear in a Gen2 53 """Name of the skymap used in Gen3 data IDs. 57 """Hash computed by `BaseSkyMap.getSha1`. 61 """Name of the skymap used in Gen3 data IDs. 65 """Whether this skymap has been found in at least one repository being 71 """Sub-config used to hold the parameters of a SkyMap. 75 This config only needs to exist because we can't put a 76 `~lsst.pex.config.RegistryField` directly inside a 77 `~lsst.pex.config.ConfigDictField`. 79 It needs to have its only field named "skyMap" for compatibility with the 80 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can 81 use one config file in an obs package to configure both. 83 This name leads to unfortunate repetition with the field named 84 "skymap" that holds it - "skyMap[name].skyMap" - but that seems 87 skyMap = skyMapRegistry.makeField(
88 doc=
"Type and parameters for the SkyMap itself.",
94 raws = ConfigurableField(
95 "Configuration for subtask responsible for ingesting raws and adding " 96 "visit and exposure dimension entries.",
99 skyMaps = ConfigDictField(
100 "Mapping from Gen3 skymap name to the parameters used to construct a " 101 "BaseSkyMap instance. This will be used to associated names with " 102 "existing skymaps found in the Gen2 repo.",
104 itemtype=ConvertRepoSkyMapConfig,
107 collections = DictField(
108 "Special collections (values) for certain dataset types (keys). " 109 "These are used in addition to rerun collections for datasets in " 110 "reruns. The 'raw' dataset must have an entry here if it is to be " 115 "deepCoadd_skyMap":
"skymaps",
116 "brightObjectMask":
"masks",
119 storageClasses = DictField(
120 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' " 121 "or 'persistable') to the Gen3 StorageClass name.",
125 "BaseSkyMap":
"SkyMap",
126 "BaseCatalog":
"Catalog",
127 "BackgroundList":
"Background",
129 "MultilevelParquetTable":
"DataFrame",
132 doRegisterInstrument = Field(
133 "If True (default), add dimension records for the Instrument and its " 134 "filters and detectors to the registry instead of assuming they are " 139 doWriteCuratedCalibrations = Field(
140 "If True (default), ingest human-curated calibrations directly via " 141 "the Instrument interface. Note that these calibrations are never " 142 "converted from Gen2 repositories.",
147 "The names of reference catalogs (subdirectories under ref_cats) to " 152 fileIgnorePatterns = ListField(
153 "Filename globs that should be ignored instead of being treated as " 156 default=[
"README.txt",
"*~?",
"butler.yaml",
"gen3.sqlite3"]
158 datasetIncludePatterns = ListField(
159 "Glob-style patterns for dataset type names that should be converted.",
163 datasetIgnorePatterns = ListField(
164 "Glob-style patterns for dataset type names that should not be " 165 "converted despite matching a pattern in datasetIncludePatterns.",
170 "Key used for the Gen2 equivalent of 'detector' in data IDs.",
175 "If True (default), only convert datasets that are related to the " 176 "ingested visits. Ignored unless a list of visits is passed to " 184 return self.
raws.transfer
188 self.
raws.transfer = value
192 return self.
raws.instrument
196 self.
raws.instrument = value
206 """A task that converts one or more related Gen2 data repositories to a 207 single Gen3 data repository (with multiple collections). 211 config: `ConvertRepoConfig` 212 Configuration for this task. 213 butler3: `lsst.daf.butler.Butler` 214 Gen3 Butler instance that represents the data repository datasets will 215 be ingested into. The collection and/or run associated with this 216 Butler will be ignored in favor of collections/runs passed via config 219 Other keyword arguments are forwarded to the `Task` constructor. 223 Most of the work of converting repositories is delegated to instances of 224 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds 225 only state that is relevant for all Gen2 repositories being ingested, while 226 each `RepoConverter` instance holds only state relevant for the conversion 227 of a single Gen2 repository. Both the task and the `RepoConverter` 228 instances are single use; `ConvertRepoTask.run` and most `RepoConverter` 229 methods may only be called once on a particular instance. 232 ConfigClass = ConvertRepoConfig
234 _DefaultName =
"convertRepo" 236 def __init__(self, config=None, *, butler3: Butler3, **kwds):
249 for name, config
in self.
config.skyMaps.items():
250 instance = config.skyMap.apply()
251 struct =
ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance)
257 """Return `True` if configuration indicates that the given dataset type 260 This method is intended to be called primarily by the 261 `RepoConverter` instances used interally by the task. 266 Name of the dataset type. 271 Whether the dataset should be included in the conversion. 274 any(fnmatch.fnmatchcase(datasetTypeName, pattern)
275 for pattern
in self.
config.datasetIncludePatterns)
and 276 not any(fnmatch.fnmatchcase(datasetTypeName, pattern)
277 for pattern
in self.
config.datasetIgnorePatterns)
281 """Indicate that a repository uses the given SkyMap. 283 This method is intended to be called primarily by the 284 `RepoConverter` instances used interally by the task. 288 skyMap : `lsst.skymap.BaseSkyMap` 289 SkyMap instance being used, typically retrieved from a Gen2 295 The name of the skymap in Gen3 data IDs. 297 sha1 = skyMap.getSha1()
300 except KeyError
as err:
301 raise LookupError(f
"SkyMap with sha1={sha1} not included in configuration.")
from err
306 """Register all skymaps that have been marked as used. 308 This method is intended to be called primarily by the 309 `RepoConverter` instances used interally by the task. 313 subset : `ConversionSubset`, optional 314 Object that will be used to filter converted datasets by data ID. 315 If given, it will be updated with the tracts of this skymap that 316 overlap the visits in the subset. 320 struct.instance.register(struct.name, self.
registry)
321 if subset
is not None and self.
config.relatedOnly:
322 subset.addSkyMap(self.
registry, struct.name)
325 """Indicate that a repository uses the given SkyPix dimension. 327 This method is intended to be called primarily by the 328 `RepoConverter` instances used interally by the task. 332 dimension : `lsst.daf.butler.SkyPixDimension` 333 Dimension represening a pixelization of the sky. 338 """Register all skymaps that have been marked as used. 340 This method is intended to be called primarily by the 341 `RepoConverter` instances used interally by the task. 345 subset : `ConversionSubset`, optional 346 Object that will be used to filter converted datasets by data ID. 347 If given, it will be updated with the pixelization IDs that 348 overlap the visits in the subset. 350 if subset
is not None and self.
config.relatedOnly:
352 subset.addSkyPix(self.
registry, dimension)
354 def run(self, root: str, collections: List[str], *,
355 calibs: Dict[str, List[str]] =
None,
356 reruns: Dict[str, List[str]] =
None,
357 visits: Optional[Iterable[int]] =
None):
358 """Convert a group of related data repositories. 363 Complete path to the root Gen2 data repository. This should be 364 a data repository that includes a Gen2 registry and any raw files 365 and/or reference catalogs. 366 collections : `list` of `str` 367 Gen3 collections that datasets from the root repository should be 368 associated with. This should include any rerun collection that 369 these datasets should also be considered to be part of; because of 370 structural difference between Gen2 parent/child relationships and 371 Gen3 collections, these cannot be reliably inferred. 373 Dictionary mapping calibration repository path to the collections 374 that the repository's datasets should be associated with. The path 375 may be relative to ``root`` or absolute. Collections should 376 include child repository collections as appropriate (see 377 documentation for ``collections``). 379 Dictionary mapping rerun repository path to the collections that 380 the repository's datasets should be associated with. The path may 381 be relative to ``root`` or absolute. Collections should include 382 child repository collections as appropriate (see documentation for 384 visits : iterable of `int`, optional 385 The integer IDs of visits to convert. If not provided, all visits 386 in the Gen2 root repository will be converted. 393 if visits
is not None:
396 if self.
config.relatedOnly:
397 self.
log.
warn(
"config.relatedOnly is True but all visits are being ingested; " 398 "no filtering will be done.")
401 if self.
config.doRegisterInstrument:
407 rootConverter =
RootRepoConverter(task=self, root=root, collections=collections, subset=subset)
409 converters.append(rootConverter)
411 for root, collections
in calibs.items():
412 if not os.path.isabs(root):
413 root = os.path.join(rootConverter.root, root)
415 mapper=rootConverter.mapper,
416 subset=rootConverter.subset)
418 converters.append(converter)
420 for root, collections
in reruns.items():
421 if not os.path.isabs(root):
422 root = os.path.join(rootConverter.root, root)
424 subset=rootConverter.subset)
426 converters.append(converter)
443 for converter
in converters:
444 converter.insertDimensionData()
455 for converter
in converters:
def makeSubtask(self, name, keyArgs)
daf::base::PropertySet * set
def isDatasetTypeIncluded
bool any(CoordinateExpr< N > const &expr) noexcept
Return true if any elements are true.