21 """High-level interface to the Gen2 repository-walking functionality defined
24 from __future__
import annotations
26 __all__ = [
"RepoWalker"]
28 from collections
import defaultdict
42 from lsst.daf.butler
import (
47 from .builders
import BuilderTargetInput, BuilderSkipInput, BuilderTree
48 from .scanner
import DirectoryScanner
52 """An object that recursively walks a Gen2 data repository tree, extracting
53 Gen3 `FileDataset` objects and warning about unrecognized or unconvertable
58 inputs : `~collections.abc.Iterable` of `Target` or `Skip`
59 Structs that indicate dataset types to be extracted (`Target`) or
60 explicitly skipped (`Skip`). Skips may include a warning message to
61 log when matching entries are encountered.
62 fileIgnoreRegEx : `re.Pattern`, optional
63 A regular expression pattern that identifies non-dataset files that
64 can be ignored, to be applied at all levels of the directory tree.
65 dirIgnoreRegEx : `re.Pattern`, optional
66 A regular expression pattern that identifies non-dataset subdirectories
67 that can be ignored, to be applied at all levels of the directory tree.
69 Logger for warnings and diagnostic information.
71 def __init__(self, inputs: Iterable[Union[Target, Skip]], *,
72 fileIgnoreRegEx: Optional[re.Pattern] =
None,
73 dirIgnoreRegEx: Optional[re.Pattern] =
None,
74 log: Optional[Log] =
None):
77 log = Log.getLogger(
"obs.base.gen2to3.TranslatorFactory")
80 allKeys: Dict[str, type] = {}
83 for key, dtype
in leaf.keys.items():
84 if allKeys.setdefault(key, dtype) != dtype:
85 raise ValueError(f
"Multiple types for key '{key}': {dtype} "
86 f
"(from {leaf.template}) vs. {allKeys[key]}.")
87 tree, messages, pruned = tree.prune()
90 tree.fill(self.
_scanner, allKeys, {}, fileIgnoreRegEx=fileIgnoreRegEx,
91 dirIgnoreRegEx=dirIgnoreRegEx)
97 Target: ClassVar[type] = BuilderTargetInput
98 """An input struct type whose instances represent a dataset type to be
102 Skip: ClassVar[type] = BuilderSkipInput
103 """An input struct type whose instances represent a dataset type to be
107 def walk(self, root: str, *, predicate: Optional[Callable[[DataCoordinate], bool]]
108 ) -> Mapping[DatasetType, List[FileDataset]]:
109 """Walk a Gen2 repository root to extract Gen3 `FileDataset` instances
115 Absolute path to the repository root.
116 predicate : `~collections.abc.Callable`, optional
117 If not `None`, a callable that returns `True` if a `DataCoordinate`
118 is consistent with what we want to extract. If ``predicate``
119 returns `False`, the file or directory that data ID was extracted
120 from will not be processed, even if it includes target dataset
125 datasets : `defaultdict` [`DatasetType`, `list`[`FileDataset`]]
126 Extracted datasets, grouped by Gen3 `DatasetType`.
128 if predicate
is None:
129 def predicate(dataId: DataCoordinate) -> bool:
131 datasets = defaultdict(list)
133 self.
_scanner.scan(root, datasets, predicate=predicate)