21 """Concrete implementations of `PathElementHandler`.
23 The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24 avoid a circular dependency between modules.
26 from __future__
import annotations
28 __all__ = [
"IgnoreHandler",
"SkipHandler",
"SubdirectoryHandler",
"TargetFileHandler"]
30 from abc
import abstractmethod
41 from lsst.daf.butler
import (
47 from ..translators
import Translator, makeCalibrationLabel
48 from .parser
import PathElementParser
49 from .scanner
import PathElementHandler, DirectoryScanner
52 from lsst.daf.butler
import FormatterParameter
56 """A `PathElementHandler` that matches via a regular expression, and does
59 An `IgnoreHandler` is used to ignore file or directory patterns that can
60 occur at any level in the directory tree, and have no relation to any
61 Gen2 filename template.
65 pattern : `re.Pattern`
66 A regular expression pattern.
68 Whether this handler should be applied to files (`True`) or
69 directories (`False`).
71 def __init__(self, pattern: re.Pattern, isForFiles: bool):
76 __slots__ = (
"_pattern",
"_isForFiles")
79 return f
"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})"
90 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
91 predicate: Callable[[DataCoordinate], bool]) -> bool:
100 """An intermediate base class for `PathElementHandler` classes that utilize
101 a `PathElementParser` to match a Gen2 filename template.
105 parser : `PathElementParser`
106 An object that matches the path element this handler is responsible for
107 and extracts a (partial) Gen2 data ID from it.
113 __slots__ = (
"_parser",)
116 return f
"{type(self).__name__}(parser={self._parser})"
118 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
119 predicate: Callable[[DataCoordinate], bool]) -> bool:
122 if nextDataId2
is None:
124 self.
handle(path, nextDataId2, datasets, predicate=predicate)
133 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
134 predicate: Callable[[DataCoordinate], bool]):
135 """Customization hook for ``__call__``.
137 Subclasses must override this method, while external callers (i.e.
138 `DirectoryScanner` should instead invoke `__call__`.
143 Full path of the file or directory.
145 Gen2 data ID (usually partial) extracted from the path so far.
146 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
147 Dictionary that found datasets should be added to.
148 predicate : `~collections.abc.Callable`
149 A callable taking a single `DataCoordinate` argument and returning
150 `bool`, indicating whether that (Gen3) data ID represents one
151 that should be included in the scan.
152 formatterMap : `dict`, optional
153 Map dataset type to specialist formatter.
155 raise NotImplementedError()
159 """A `ParsedPathElementHandler` that does nothing with an entry other
160 optionally logging a warning message.
162 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
163 want to (or cannot) extract Gen3 datasets from, or other files/directories
164 that alway appears at a fixed level in the diectory tree.
168 parser : `PathElementParser`
169 An object that matches the path element this handler is responsible for
170 and extracts a (partial) Gen2 data ID from it.
172 Whether this handler should be applied to files (`True`) or
173 directories (`False`).
174 message : `str`, optional
175 A message to log at warning level when this handler matches a path
176 entry. If `None`, matched entrie will be silently skipped.
178 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
183 __slots__ = (
"_message",
"_isForFiles")
189 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
190 predicate: Callable[[DataCoordinate], bool]):
197 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
201 parser : `PathElementParser`
202 An object that matches the path element this handler is responsible for
203 and extracts a (partial) Gen2 data ID from it.
207 The nested `DirectoryScanner` is default-constructed and should be
208 populated with child handlers after the `SubdirectoryHandler` is created.
215 __slots__ = (
"scanner",)
221 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
222 predicate: Callable[[DataCoordinate], bool]):
230 dataId3 = self.
translate(nextDataId2, partial=
True)
231 if dataId3
is not None:
232 scan = predicate(dataId3)
237 handler.lastDataId2 = nextDataId2
238 self.
scanner.scan(path, datasets, predicate=predicate)
240 def translate(self, dataId2: dict, *, partial: bool =
False) -> Optional[DataCoordinate]:
246 result = handler.translate(dataId2, partial=
True)
247 if result
is not None:
251 scanner: DirectoryScanner
252 """Scanner object that holds handlers for the entries of the subdirectory
253 matched by this handler (`DirectoryScanner`).
258 """A `PathElementHandler` that matches files that correspond to target
259 datasets and outputs `FileDataset` instances for them.
263 parser : `PathElementParser`
264 An object that matches the path element this handler is responsible for
265 and extracts a (partial) Gen2 data ID from it.
266 translator : `Translator`
267 Object that translates data IDs from Gen2 to Gen3.
268 datasetType : `lsst.daf.butler.DatasetType`
269 Gen3 dataset type for the datasets this handler matches.
270 formatter : `lsst.daf.butler.Formatter` or `str`, optional
271 A Gen 3 formatter class or fully-qualified name.
273 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType,
274 formatter: FormatterParameter =
None):
280 __slots__ = (
"_translator",
"_datasetType",
"_formatter")
283 return f
"{type(self).__name__}({self._translator}, {self._datasetType})"
289 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
290 predicate: Callable[[DataCoordinate], bool]):
292 dataId3 = self.
translate(nextDataId2, partial=
False)
293 if predicate(dataId3):
297 def translate(self, dataId2: dict, *, partial: bool =
False) -> Optional[DataCoordinate]:
299 rawDataId3 = self.
_translator(dataId2, partial=partial)
301 return DataCoordinate.standardize(rawDataId3, universe=self.
_datasetType.dimensions.universe)
303 return DataCoordinate.standardize(rawDataId3, graph=self.
_datasetType.dimensions)
307 """Handler for FITS files that store image and metadata in multiple HDUs
308 per file, for example DECam raw and Community Pipeline calibrations.
312 For now, this is only used by DECam, and may need to be made more generic
313 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
314 with other obs packages.
316 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
317 predicate: Callable[[DataCoordinate], bool]):
318 dataId3 = self.
translate(nextDataId2, partial=
True)
320 def get_detectors(filename):
324 for i
in range(1, fitsData.countHdus()):
326 metadata = fitsData.readMetadata()
327 detectors.append(metadata[
'CCDNUM'])
330 if predicate(dataId3):
331 detectors = get_detectors(path)
333 for detector
in detectors:
335 ccd=detector, filter=nextDataId2.get(
"filter"))
336 newDataId3 = DataCoordinate.standardize(dataId3,
339 calibration_label=label)
344 def translate(self, dataId2: dict, *, partial: bool =
False) -> Optional[DataCoordinate]:
345 assert partial
is True,
"We always require partial, to ignore 'ccdnum'"
346 rawDataId3 = self.
_translator(dataId2, partial=partial)
347 return DataCoordinate.standardize(rawDataId3, universe=self.
_datasetType.dimensions.universe)