LSSTApplications  19.0.0-14-gb0260a2+72efe9b372,20.0.0+7927753e06,20.0.0+8829bf0056,20.0.0+995114c5d2,20.0.0+b6f4b2abd1,20.0.0+bddc4f4cbe,20.0.0-1-g253301a+8829bf0056,20.0.0-1-g2b7511a+0d71a2d77f,20.0.0-1-g5b95a8c+7461dd0434,20.0.0-12-g321c96ea+23efe4bbff,20.0.0-16-gfab17e72e+fdf35455f6,20.0.0-2-g0070d88+ba3ffc8f0b,20.0.0-2-g4dae9ad+ee58a624b3,20.0.0-2-g61b8584+5d3db074ba,20.0.0-2-gb780d76+d529cf1a41,20.0.0-2-ged6426c+226a441f5f,20.0.0-2-gf072044+8829bf0056,20.0.0-2-gf1f7952+ee58a624b3,20.0.0-20-geae50cf+e37fec0aee,20.0.0-25-g3dcad98+544a109665,20.0.0-25-g5eafb0f+ee58a624b3,20.0.0-27-g64178ef+f1f297b00a,20.0.0-3-g4cc78c6+e0676b0dc8,20.0.0-3-g8f21e14+4fd2c12c9a,20.0.0-3-gbd60e8c+187b78b4b8,20.0.0-3-gbecbe05+48431fa087,20.0.0-38-ge4adf513+a12e1f8e37,20.0.0-4-g97dc21a+544a109665,20.0.0-4-gb4befbc+087873070b,20.0.0-4-gf910f65+5d3db074ba,20.0.0-5-gdfe0fee+199202a608,20.0.0-5-gfbfe500+d529cf1a41,20.0.0-6-g64f541c+d529cf1a41,20.0.0-6-g9a5b7a1+a1cd37312e,20.0.0-68-ga3f3dda+5fca18c6a4,20.0.0-9-g4aef684+e18322736b,w.2020.45
LSSTDataManagementBasePackage
walker.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 """High-level interface to the Gen2 repository-walking functionality defined
22 by this package.
23 """
24 from __future__ import annotations
25 
26 __all__ = ["RepoWalker"]
27 
28 from collections import defaultdict
29 import re
30 from typing import (
31  Callable,
32  ClassVar,
33  Dict,
34  Iterable,
35  List,
36  Mapping,
37  Optional,
38  Union,
39 )
40 
41 from lsst.log import Log
42 from lsst.daf.butler import (
43  DataCoordinate,
44  DatasetType,
45  FileDataset,
46 )
47 from .builders import BuilderTargetInput, BuilderSkipInput, BuilderTree
48 from .scanner import DirectoryScanner
49 
50 
51 class RepoWalker:
52  """An object that recursively walks a Gen2 data repository tree, extracting
53  Gen3 `FileDataset` objects and warning about unrecognized or unconvertable
54  Gen2 datasets.
55 
56  Parameters
57  ----------
58  inputs : `~collections.abc.Iterable` of `Target` or `Skip`
59  Structs that indicate dataset types to be extracted (`Target`) or
60  explicitly skipped (`Skip`). Skips may include a warning message to
61  log when matching entries are encountered.
62  fileIgnoreRegEx : `re.Pattern`, optional
63  A regular expression pattern that identifies non-dataset files that
64  can be ignored, to be applied at all levels of the directory tree.
65  dirIgnoreRegEx : `re.Pattern`, optional
66  A regular expression pattern that identifies non-dataset subdirectories
67  that can be ignored, to be applied at all levels of the directory tree.
68  log : `Log`, optional
69  Logger for warnings and diagnostic information.
70  """
71  def __init__(self, inputs: Iterable[Union[Target, Skip]], *,
72  fileIgnoreRegEx: Optional[re.Pattern] = None,
73  dirIgnoreRegEx: Optional[re.Pattern] = None,
74  log: Optional[Log] = None):
75  super().__init__()
76  if log is None:
77  log = Log.getLogger("obs.base.gen2to3.TranslatorFactory")
78  self.log = log
79  tree = BuilderTree()
80  allKeys: Dict[str, type] = {}
81  for leaf in inputs:
82  tree.insert(0, leaf)
83  for key, dtype in leaf.keys.items():
84  if allKeys.setdefault(key, dtype) != dtype:
85  raise ValueError(f"Multiple types for key '{key}': {dtype} "
86  f"(from {leaf.template}) vs. {allKeys[key]}.")
87  tree, messages, pruned = tree.prune()
88  if not pruned:
89  self._scanner = DirectoryScanner(log=self.log)
90  tree.fill(self._scanner, allKeys, {}, fileIgnoreRegEx=fileIgnoreRegEx,
91  dirIgnoreRegEx=dirIgnoreRegEx)
92  else:
93  # Nothing to do; just remember this for later to avoid disturbing
94  # higher-level code with the fact that walk() will be a no-op.
95  self._scanner = None
96 
97  Target: ClassVar[type] = BuilderTargetInput
98  """An input struct type whose instances represent a dataset type to be
99  extracted (`type`).
100  """
101 
102  Skip: ClassVar[type] = BuilderSkipInput
103  """An input struct type whose instances represent a dataset type to be
104  explicitly skipped.
105  """
106 
107  def walk(self, root: str, *, predicate: Optional[Callable[[DataCoordinate], bool]]
108  ) -> Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]]:
109  """Walk a Gen2 repository root to extract Gen3 `FileDataset` instances
110  from it.
111 
112  Parameters
113  ----------
114  root : `str`
115  Absolute path to the repository root.
116  predicate : `~collections.abc.Callable`, optional
117  If not `None`, a callable that returns `True` if a `DataCoordinate`
118  is consistent with what we want to extract. If ``predicate``
119  returns `False`, the file or directory that data ID was extracted
120  from will not be processed, even if it includes target dataset
121  types.
122 
123  Returns
124  -------
125  datasets : `defaultdict` [`DatasetType`, `defaultdict` ]
126  Extracted datasets, grouped by Gen3 `DatasetType`. Nested dict
127  keys are "CALIBDATE" strings (for calibration datasets) or `None`
128  (otherwise). Nested dict values are lists of `FileDataset`.
129  """
130  if predicate is None:
131  def predicate(dataId: DataCoordinate) -> bool:
132  return True
133  datasets = defaultdict(lambda: defaultdict(list))
134  if self._scanner is not None:
135  self._scanner.scan(root, datasets, predicate=predicate)
136  return datasets
lsst.obs.base.gen2to3.repoWalker.scanner.DirectoryScanner
Definition: scanner.py:162
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTree
Definition: builders.py:323
lsst.obs.base.gen2to3.repoWalker.walker.RepoWalker.walk
Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] walk(self, str root, *Optional[Callable[[DataCoordinate], bool]] predicate)
Definition: walker.py:107
lsst.obs.base.gen2to3.repoWalker.walker.RepoWalker
Definition: walker.py:51
lsst::log
Definition: Log.h:706
lsst.obs.base.gen2to3.repoWalker.walker.RepoWalker.__init__
def __init__(self, Iterable[Union[Target, Skip]] inputs, *Optional[re.Pattern] fileIgnoreRegEx=None, Optional[re.Pattern] dirIgnoreRegEx=None, Optional[Log] log=None)
Definition: walker.py:71
lsst.obs.base.gen2to3.repoWalker.walker.RepoWalker._scanner
_scanner
Definition: walker.py:86
lsst.obs.base.gen2to3.repoWalker.walker.RepoWalker.log
log
Definition: walker.py:75