LSST Applications  21.0.0+75b29a8a7f,21.0.0+e70536a077,21.0.0-1-ga51b5d4+62c747d40b,21.0.0-10-gbfb87ad6+3307648ee3,21.0.0-15-gedb9d5423+47cba9fc36,21.0.0-2-g103fe59+fdf0863a2a,21.0.0-2-g1367e85+d38a93257c,21.0.0-2-g45278ab+e70536a077,21.0.0-2-g5242d73+d38a93257c,21.0.0-2-g7f82c8f+e682ffb718,21.0.0-2-g8dde007+d179fbfa6a,21.0.0-2-g8f08a60+9402881886,21.0.0-2-ga326454+e682ffb718,21.0.0-2-ga63a54e+08647d4b1b,21.0.0-2-gde069b7+26c92b3210,21.0.0-2-gecfae73+0445ed2f95,21.0.0-2-gfc62afb+d38a93257c,21.0.0-27-gbbd0d29+ae871e0f33,21.0.0-28-g5fc5e037+feb0e9397b,21.0.0-3-g21c7a62+f4b9c0ff5c,21.0.0-3-g357aad2+57b0bddf0b,21.0.0-3-g4be5c26+d38a93257c,21.0.0-3-g65f322c+3f454acf5d,21.0.0-3-g7d9da8d+75b29a8a7f,21.0.0-3-gaa929c8+9e4ef6332c,21.0.0-3-ge02ed75+4b120a55c4,21.0.0-4-g3300ddd+e70536a077,21.0.0-4-g591bb35+4b120a55c4,21.0.0-4-gc004bbf+4911b9cd27,21.0.0-4-gccdca77+f94adcd104,21.0.0-4-ge8fba5a+2b3a696ff9,21.0.0-5-gb155db7+2c5429117a,21.0.0-5-gdf36809+637e4641ee,21.0.0-6-g00874e7+c9fd7f7160,21.0.0-6-g4e60332+4b120a55c4,21.0.0-7-gc8ca178+40eb9cf840,21.0.0-8-gfbe0b4b+9e4ef6332c,21.0.0-9-g2fd488a+d83b7cd606,w.2021.05
LSST Data Management Base Package
scanner.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 """Interfaces and common code for recursively scanning directories for Gen2
22 dataset files.
23 
24 The `PathElementHandler` ABC is defined here instead of ``handlers.py`` for
25 dependency reasons: `DirectoryScanner` uses the ABC, while its concrete
26 implementations use `DirectorySCanner`.
27 """
28 from __future__ import annotations
29 
30 __all__ = ["PathElementHandler", "DirectoryScanner"]
31 
32 from abc import ABC, abstractmethod
33 import bisect
34 import os
35 from typing import (
36  Callable,
37  Iterator,
38  List,
39  Mapping,
40  Optional,
41  Tuple,
42 )
43 
44 from lsst.log import Log
45 from lsst.daf.butler import (
46  DataCoordinate,
47  DatasetType,
48  FileDataset,
49 )
50 
51 
52 class PathElementHandler(ABC):
53  """An interface for objects that handle a single path element (directory or
54  file) in a Gen2 data repository.
55 
56  Handlers are added to a `DirectoryScanner` instance, which then calls them
57  until one succeeds when it processes each element in a directory.
58  """
59  def __init__(self):
60  self.lastDataId2lastDataId2 = {}
61 
62  __slots__ = ("lastDataId2", "log")
63 
64  @abstractmethod
65  def isForFiles(self) -> bool:
66  """Report what kind of path element this object handlers.
67 
68  Returns
69  -------
70  Return `True` if this handler is for file entries, or `False` if it
71  is for directories.
72  """
73  raise NotImplementedError()
74 
75  @abstractmethod
76  def __call__(self, path: str, name: str,
77  datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
78  predicate: Callable[[DataCoordinate], bool]) -> bool:
79  """Apply the handler to a file path.
80 
81  Parameters
82  ----------
83  path : `str`
84  Full path of the file or directory.
85  name : `str`
86  Local name of the file or directory within its parent directory.
87  datasets : `dict` [`DatasetType`, `dict` ]
88  Dictionary that found datasets should be added to. Nested dicts
89  are keyed by either `None` (for most datasets) or a `str`
90  "CALIBDATE" for calibration datasets.
91  predicate : `~collections.abc.Callable`
92  A callable taking a single `DataCoordinate` argument and returning
93  `bool`, indicating whether that (Gen3) data ID represents one
94  that should be included in the scan.'
95 
96  Returns
97  -------
98  matched : `bool`
99  `True` if this handler was a match for the given path and no other
100  handlers need to be tried on it, `False` otherwise.
101  """
102  raise NotImplementedError()
103 
104  @property
105  @abstractmethod
106  def rank(self) -> int:
107  """Return a rough indication of how flexible this handler is in terms
108  of the path element names it can match.
109 
110  Handlers that match a constant path element should always return zero.
111  """
112  raise NotImplementedError()
113 
114  def translate(self, dataId2: dict, *, partial: bool = False
115  ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
116  """Translate the given data ID from Gen2 to Gen3.
117 
118  The default implementation returns `None`. Subclasses that are able
119  to translate data IDs should override this method.
120 
121  Parameters
122  ----------
123  dataId2 : `dict`
124  Gen2 data ID.
125  partial : `bool`, optional
126  If `True` (`False` is default) this is a partial data ID for some
127  dataset, and missing keys are expected.
128 
129  Returns
130  -------
131  dataId3 : `lsst.daf.butler.DataCoordinate` or `None`
132  A Gen3 data ID, or `None` if this handler cannot translate data
133  IDs.
134  calibDate : `str` or `None`
135  A Gen2 calibration "CALIBDATE" value, or `None` if there was no
136  such value in the template.
137  """
138  return None, None
139 
140  def __lt__(self, other: PathElementHandler):
141  """Handlers are sorted by rank to reduce the possibility that more
142  flexible handlers will have a chance to match something they shouldn't.
143  """
144  return self.rankrank < other.rank
145 
146  lastDataId2: dict
147  """The Gen2 data ID obtained by processing parent levels in the directory
148  tree.
149 
150  This attribute should be reset by calling code whenever a new parent
151  directory is entered, before invoking `__call__`.
152  """
153 
154  log: Log
155  """A logger to use for all diagnostic messages (`lsst.log.Log`).
156 
157  This attribute is set on a handler in `DirectoryScanner.add`; this avoids
158  needing to forward one through all subclass constructors.
159  """
160 
161 
163  """An object that uses `PathElementHandler` instances to process the files
164  and subdirectories in a directory tree.
165 
166  Parameters
167  ----------
168  log : `Log`, optional
169  Log to use to report warnings and debug information.
170  """
171  def __init__(self, log: Optional[Log] = None):
172  self._files_files = []
173  self._subdirectories_subdirectories = []
174  if log is None:
175  log = Log.getLogger("obs.base.gen2to3.walker")
176  self.loglog = log
177 
178  __slots__ = ("_files", "_subdirectories", "log")
179 
180  def add(self, handler: PathElementHandler):
181  """Add a new handler to the scanner.
182 
183  Parameters
184  ----------
185  handler : `PathElementHandler`
186  The handler to be added.
187  """
188  handler.log = self.loglog
189  if handler.isForFiles():
190  bisect.insort(self._files_files, handler)
191  else:
192  bisect.insort(self._subdirectories_subdirectories, handler)
193 
194  def __iter__(self) -> Iterator[PathElementHandler]:
195  """Iterate over all handlers.
196  """
197  yield from self._files_files
198  yield from self._subdirectories_subdirectories
199 
200  def scan(self, path: str, datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
201  predicate: Callable[[DataCoordinate], bool]):
202  """Process a directory.
203 
204  Parameters
205  ----------
206  path : `str`
207  Full path to the directory to be processed.
208  datasets : `dict` [`DatasetType`, `list` ]
209  Dictionary that found datasets should be added to. Nested lists
210  elements are tuples of `FileDataset` and an optional "CALIBDATE"
211  `str` value (for calibration datasets only).
212  predicate : `~collections.abc.Callable`
213  A callable taking a single `DataCoordinate` argument and returning
214  `bool`, indicating whether that (Gen3) data ID represents one
215  that should be included in the scan.
216  """
217  unrecognized = []
218  for entry in os.scandir(path):
219  if entry.is_file():
220  handlers = self._files_files
221  elif entry.is_dir():
222  handlers = self._subdirectories_subdirectories
223  else:
224  continue
225  for handler in handlers:
226  if handler(entry.path, entry.name, datasets, predicate=predicate):
227  break
228  else:
229  unrecognized.append(entry.name)
230  if unrecognized:
231  self.loglog.warn("Skipped unrecognized entries in %s: %s", path, unrecognized)
def scan(self, str path, Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: scanner.py:201
def add(self, PathElementHandler handler)
Definition: scanner.py:180
def __init__(self, Optional[Log] log=None)
Definition: scanner.py:171
Iterator[PathElementHandler] __iter__(self)
Definition: scanner.py:194
Tuple[Optional[DataCoordinate], Optional[str]] translate(self, dict dataId2, *bool partial=False)
Definition: scanner.py:115
bool __call__(self, str path, str name, Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: scanner.py:78
def __lt__(self, PathElementHandler other)
Definition: scanner.py:140
Definition: Log.h:706