LSSTApplications  20.0.0
LSSTDataManagementBasePackage
handlers.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 """Concrete implementations of `PathElementHandler`.
22 
23 The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24 avoid a circular dependency between modules.
25 """
26 from __future__ import annotations
27 
28 __all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"]
29 
30 from abc import abstractmethod
31 import re
32 from typing import (
33  Callable,
34  List,
35  Mapping,
36  Optional,
37  TYPE_CHECKING
38 )
39 
40 import lsst.afw.fits
41 from lsst.daf.butler import (
42  DataCoordinate,
43  DatasetRef,
44  DatasetType,
45  FileDataset,
46 )
47 from ..translators import Translator, makeCalibrationLabel
48 from .parser import PathElementParser
49 from .scanner import PathElementHandler, DirectoryScanner
50 
51 if TYPE_CHECKING:
52  from lsst.daf.butler import FormatterParameter
53 
54 
56  """A `PathElementHandler` that matches via a regular expression, and does
57  nothing.
58 
59  An `IgnoreHandler` is used to ignore file or directory patterns that can
60  occur at any level in the directory tree, and have no relation to any
61  Gen2 filename template.
62 
63  Parameters
64  ----------
65  pattern : `re.Pattern`
66  A regular expression pattern.
67  isForFiles : `bool`
68  Whether this handler should be applied to files (`True`) or
69  directories (`False`).
70  """
71  def __init__(self, pattern: re.Pattern, isForFiles: bool):
72  super().__init__()
73  self._pattern = pattern
74  self._isForFiles = isForFiles
75 
76  __slots__ = ("_pattern", "_isForFiles")
77 
78  def __str__(self):
79  return f"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})"
80 
81  def isForFiles(self) -> bool:
82  # Docstring inherited from PathElementHandler.
83  return self._isForFiles
84 
85  @property
86  def rank(self) -> int:
87  # Docstring inherited from PathElementHandler.
88  return 0
89 
90  def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
91  predicate: Callable[[DataCoordinate], bool]) -> bool:
92  # Docstring inherited from PathElementHandler.
93  if self._pattern.fullmatch(name):
94  return True
95  else:
96  return False
97 
98 
100  """An intermediate base class for `PathElementHandler` classes that utilize
101  a `PathElementParser` to match a Gen2 filename template.
102 
103  Parameters
104  ----------
105  parser : `PathElementParser`
106  An object that matches the path element this handler is responsible for
107  and extracts a (partial) Gen2 data ID from it.
108  """
109  def __init__(self, parser: PathElementParser):
110  super().__init__()
111  self._parser = parser
112 
113  __slots__ = ("_parser",)
114 
115  def __str__(self):
116  return f"{type(self).__name__}(parser={self._parser})"
117 
118  def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
119  predicate: Callable[[DataCoordinate], bool]) -> bool:
120  # Docstring inherited from PathElementParser.
121  nextDataId2 = self._parser.parse(name, self.lastDataId2)
122  if nextDataId2 is None:
123  return False
124  self.handle(path, nextDataId2, datasets, predicate=predicate)
125  return True
126 
127  @property
128  def rank(self) -> int:
129  # Docstring inherited from PathElementParser.
130  return len(self._parser.keys)
131 
132  @abstractmethod
133  def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
134  predicate: Callable[[DataCoordinate], bool]):
135  """Customization hook for ``__call__``.
136 
137  Subclasses must override this method, while external callers (i.e.
138  `DirectoryScanner` should instead invoke `__call__`.
139 
140  Parameters
141  ----------
142  path : `str`
143  Full path of the file or directory.
144  nextDataId2 : `dict`
145  Gen2 data ID (usually partial) extracted from the path so far.
146  datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
147  Dictionary that found datasets should be added to.
148  predicate : `~collections.abc.Callable`
149  A callable taking a single `DataCoordinate` argument and returning
150  `bool`, indicating whether that (Gen3) data ID represents one
151  that should be included in the scan.
152  formatterMap : `dict`, optional
153  Map dataset type to specialist formatter.
154  """
155  raise NotImplementedError()
156 
157 
159  """A `ParsedPathElementHandler` that does nothing with an entry other
160  optionally logging a warning message.
161 
162  A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
163  want to (or cannot) extract Gen3 datasets from, or other files/directories
164  that alway appears at a fixed level in the diectory tree.
165 
166  Parameters
167  ----------
168  parser : `PathElementParser`
169  An object that matches the path element this handler is responsible for
170  and extracts a (partial) Gen2 data ID from it.
171  isForFiles : `bool`
172  Whether this handler should be applied to files (`True`) or
173  directories (`False`).
174  message : `str`, optional
175  A message to log at warning level when this handler matches a path
176  entry. If `None`, matched entrie will be silently skipped.
177  """
178  def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
179  super().__init__(parser=parser)
180  self._isForFiles = isForFiles
181  self._message = message
182 
183  __slots__ = ("_message", "_isForFiles")
184 
185  def isForFiles(self) -> bool:
186  # Docstring inherited from PathElementHandler.
187  return self._isForFiles
188 
189  def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
190  predicate: Callable[[DataCoordinate], bool]):
191  # Docstring inherited from ParsedPathElementHandler.
192  if self._message is not None:
193  self.log.warn("Skipping %s: %s", path, self._message)
194 
195 
197  """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
198 
199  Parameters
200  ----------
201  parser : `PathElementParser`
202  An object that matches the path element this handler is responsible for
203  and extracts a (partial) Gen2 data ID from it.
204 
205  Notes
206  -----
207  The nested `DirectoryScanner` is default-constructed and should be
208  populated with child handlers after the `SubdirectoryHandler` is created.
209  """
210 
211  def __init__(self, parser: PathElementParser):
212  super().__init__(parser=parser)
214 
215  __slots__ = ("scanner",)
216 
217  def isForFiles(self) -> bool:
218  # Docstring inherited from PathElementHandler.
219  return False
220 
221  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
222  predicate: Callable[[DataCoordinate], bool]):
223  # Docstring inherited from ParsedPathElementHandler.
224  if not nextDataId2:
225  # We matched, and there's no data ID at all yet. That means the
226  # full path so far is just a fixed string so we should descend
227  # and the match is exclusive.
228  scan = True
229  else:
230  dataId3 = self.translate(nextDataId2, partial=True)
231  if dataId3 is not None:
232  scan = predicate(dataId3)
233  else:
234  scan = True
235  if scan:
236  for handler in self.scanner:
237  handler.lastDataId2 = nextDataId2
238  self.scanner.scan(path, datasets, predicate=predicate)
239 
240  def translate(self, dataId2: dict, *, partial: bool = False) -> Optional[DataCoordinate]:
241  # Docstring inherited from PathElementHandler.
242  for handler in self.scanner:
243  # Since we're recursing, we're always asking for a partial match,
244  # because the data ID we have corresponds to different level than
245  # the one child handlers operate at.
246  result = handler.translate(dataId2, partial=True)
247  if result is not None:
248  return result
249  return None
250 
251  scanner: DirectoryScanner
252  """Scanner object that holds handlers for the entries of the subdirectory
253  matched by this handler (`DirectoryScanner`).
254  """
255 
256 
258  """A `PathElementHandler` that matches files that correspond to target
259  datasets and outputs `FileDataset` instances for them.
260 
261  Parameters
262  ----------
263  parser : `PathElementParser`
264  An object that matches the path element this handler is responsible for
265  and extracts a (partial) Gen2 data ID from it.
266  translator : `Translator`
267  Object that translates data IDs from Gen2 to Gen3.
268  datasetType : `lsst.daf.butler.DatasetType`
269  Gen3 dataset type for the datasets this handler matches.
270  formatter : `lsst.daf.butler.Formatter` or `str`, optional
271  A Gen 3 formatter class or fully-qualified name.
272  """
273  def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType,
274  formatter: FormatterParameter = None):
275  super().__init__(parser=parser)
276  self._translator = translator
277  self._datasetType = datasetType
278  self._formatter = formatter
279 
280  __slots__ = ("_translator", "_datasetType", "_formatter")
281 
282  def __str__(self):
283  return f"{type(self).__name__}({self._translator}, {self._datasetType})"
284 
285  def isForFiles(self) -> bool:
286  # Docstring inherited from PathElementHandler.
287  return True
288 
289  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
290  predicate: Callable[[DataCoordinate], bool]):
291  # Docstring inherited from ParsedPathElementHandler.
292  dataId3 = self.translate(nextDataId2, partial=False)
293  if predicate(dataId3):
294  datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)],
295  path=path, formatter=self._formatter))
296 
297  def translate(self, dataId2: dict, *, partial: bool = False) -> Optional[DataCoordinate]:
298  # Docstring inherited from PathElementHandler.
299  rawDataId3 = self._translator(dataId2, partial=partial)
300  if partial:
301  return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)
302  else:
303  return DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions)
304 
305 
307  """Handler for FITS files that store image and metadata in multiple HDUs
308  per file, for example DECam raw and Community Pipeline calibrations.
309 
310  Notes
311  -----
312  For now, this is only used by DECam, and may need to be made more generic
313  (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
314  with other obs packages.
315  """
316  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
317  predicate: Callable[[DataCoordinate], bool]):
318  dataId3 = self.translate(nextDataId2, partial=True)
319 
320  def get_detectors(filename):
321  fitsData = lsst.afw.fits.Fits(filename, 'r')
322  # NOTE: The primary header (HDU=0) does not contain detector data.
323  detectors = []
324  for i in range(1, fitsData.countHdus()):
325  fitsData.setHdu(i)
326  metadata = fitsData.readMetadata()
327  detectors.append(metadata['CCDNUM'])
328  return detectors
329 
330  if predicate(dataId3):
331  detectors = get_detectors(path)
332  refs = []
333  for detector in detectors:
334  label = makeCalibrationLabel(self._datasetType.name, nextDataId2["calibDate"],
335  ccd=detector, filter=nextDataId2.get("filter"))
336  newDataId3 = DataCoordinate.standardize(dataId3,
337  graph=self._datasetType.dimensions,
338  detector=detector,
339  calibration_label=label)
340  refs.append(DatasetRef(self._datasetType, newDataId3))
341 
342  datasets[self._datasetType].append(FileDataset(refs=refs, path=path, formatter=self._formatter))
343 
344  def translate(self, dataId2: dict, *, partial: bool = False) -> Optional[DataCoordinate]:
345  assert partial is True, "We always require partial, to ignore 'ccdnum'"
346  rawDataId3 = self._translator(dataId2, partial=partial)
347  return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)
lsst.obs.base.gen2to3.repoWalker.handlers.SkipHandler
Definition: handlers.py:158
lsst::log.log.logContinued.warn
def warn(fmt, *args)
Definition: logContinued.py:202
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler.__init__
def __init__(self, PathElementParser parser, Translator translator, DatasetType datasetType, FormatterParameter formatter=None)
Definition: handlers.py:273
lsst.obs.base.gen2to3.repoWalker.scanner.DirectoryScanner
Definition: scanner.py:154
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler._formatter
_formatter
Definition: handlers.py:277
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler.__init__
def __init__(self, re.Pattern pattern, bool isForFiles)
Definition: handlers.py:71
lsst.obs.base.gen2to3.repoWalker.handlers.SubdirectoryHandler
Definition: handlers.py:196
lsst.obs.base.gen2to3.repoWalker.handlers.SubdirectoryHandler.isForFiles
bool isForFiles(self)
Definition: handlers.py:217
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler._parser
_parser
Definition: handlers.py:111
lsst.obs.base.gen2to3.repoWalker.handlers.SkipHandler._message
_message
Definition: handlers.py:181
lsst.obs.base.gen2to3.repoWalker.scanner.PathElementHandler
Definition: scanner.py:51
lsst::afw::fits::Fits
A simple struct that combines the two arguments that must be passed to most cfitsio routines and cont...
Definition: fits.h:297
lsst.obs.base.gen2to3.repoWalker.handlers.SubdirectoryHandler.handle
def handle(self, str path, nextDataId2, Mapping[DatasetType, List[FileDataset]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:221
lsst.obs.base.gen2to3.repoWalker.scanner.PathElementHandler.lastDataId2
lastDataId2
Definition: scanner.py:59
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler.__call__
bool __call__(self, str path, str name, Mapping[DatasetType, List[FileDataset]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:90
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler.translate
Optional[DataCoordinate] translate(self, dict dataId2, *bool partial=False)
Definition: handlers.py:297
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler.__str__
def __str__(self)
Definition: handlers.py:115
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler.isForFiles
bool isForFiles(self)
Definition: handlers.py:81
ast::append
std::shared_ptr< FrameSet > append(FrameSet const &first, FrameSet const &second)
Construct a FrameSet that performs two transformations in series.
Definition: functional.cc:33
lsst.obs.base.gen2to3.repoWalker.handlers.SkipHandler.__init__
def __init__(self, PathElementParser parser, bool isForFiles, Optional[str] message)
Definition: handlers.py:178
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler.handle
def handle(self, str path, dict nextDataId2, Mapping[DatasetType, List[FileDataset]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:133
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler
Definition: handlers.py:257
lsst.obs.base.gen2to3.repoWalker.handlers.MultiExtensionFileHandler.translate
Optional[DataCoordinate] translate(self, dict dataId2, *bool partial=False)
Definition: handlers.py:344
lsst.obs.base.gen2to3.repoWalker.handlers.MultiExtensionFileHandler.handle
def handle(self, str path, nextDataId2, Mapping[DatasetType, List[FileDataset]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:316
lsst.obs.base.gen2to3.repoWalker.handlers.SkipHandler.handle
def handle(self, str path, dict nextDataId2, Mapping[DatasetType, List[FileDataset]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:189
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler.isForFiles
bool isForFiles(self)
Definition: handlers.py:285
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler.rank
int rank(self)
Definition: handlers.py:86
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler._pattern
_pattern
Definition: handlers.py:73
lsst.obs.base.gen2to3.translators.makeCalibrationLabel
str makeCalibrationLabel(str datasetTypeName, str calibDate, Optional[int] ccd=None, Optional[str] filter=None)
Definition: translators.py:36
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler._datasetType
_datasetType
Definition: handlers.py:276
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler.__str__
def __str__(self)
Definition: handlers.py:78
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler.__str__
def __str__(self)
Definition: handlers.py:282
lsst.obs.base.gen2to3.repoWalker.handlers.SubdirectoryHandler.scanner
scanner
Definition: handlers.py:213
lsst.obs.base.gen2to3.repoWalker.handlers.SkipHandler.isForFiles
bool isForFiles(self)
Definition: handlers.py:185
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler.rank
int rank(self)
Definition: handlers.py:128
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler
Definition: handlers.py:55
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler.__init__
def __init__(self, PathElementParser parser)
Definition: handlers.py:109
lsst.obs.base.gen2to3.repoWalker.handlers.SubdirectoryHandler.__init__
def __init__(self, PathElementParser parser)
Definition: handlers.py:211
lsst::afw::fits
Definition: fits.h:31
lsst.obs.base.gen2to3.repoWalker.handlers.SkipHandler._isForFiles
_isForFiles
Definition: handlers.py:180
lsst.obs.base.gen2to3.repoWalker.handlers.MultiExtensionFileHandler
Definition: handlers.py:306
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler._translator
_translator
Definition: handlers.py:275
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler._isForFiles
_isForFiles
Definition: handlers.py:74
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler.__call__
bool __call__(self, str path, str name, Mapping[DatasetType, List[FileDataset]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:118
lsst.obs.base.gen2to3.repoWalker.handlers.SubdirectoryHandler.translate
Optional[DataCoordinate] translate(self, dict dataId2, *bool partial=False)
Definition: handlers.py:240
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler
Definition: handlers.py:99
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler.handle
def handle(self, str path, nextDataId2, Mapping[DatasetType, List[FileDataset]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:289