LSST Applications  21.0.0+75b29a8a7f,21.0.0+e70536a077,21.0.0-1-ga51b5d4+62c747d40b,21.0.0-10-gbfb87ad6+3307648ee3,21.0.0-15-gedb9d5423+47cba9fc36,21.0.0-2-g103fe59+fdf0863a2a,21.0.0-2-g1367e85+d38a93257c,21.0.0-2-g45278ab+e70536a077,21.0.0-2-g5242d73+d38a93257c,21.0.0-2-g7f82c8f+e682ffb718,21.0.0-2-g8dde007+d179fbfa6a,21.0.0-2-g8f08a60+9402881886,21.0.0-2-ga326454+e682ffb718,21.0.0-2-ga63a54e+08647d4b1b,21.0.0-2-gde069b7+26c92b3210,21.0.0-2-gecfae73+0445ed2f95,21.0.0-2-gfc62afb+d38a93257c,21.0.0-27-gbbd0d29+ae871e0f33,21.0.0-28-g5fc5e037+feb0e9397b,21.0.0-3-g21c7a62+f4b9c0ff5c,21.0.0-3-g357aad2+57b0bddf0b,21.0.0-3-g4be5c26+d38a93257c,21.0.0-3-g65f322c+3f454acf5d,21.0.0-3-g7d9da8d+75b29a8a7f,21.0.0-3-gaa929c8+9e4ef6332c,21.0.0-3-ge02ed75+4b120a55c4,21.0.0-4-g3300ddd+e70536a077,21.0.0-4-g591bb35+4b120a55c4,21.0.0-4-gc004bbf+4911b9cd27,21.0.0-4-gccdca77+f94adcd104,21.0.0-4-ge8fba5a+2b3a696ff9,21.0.0-5-gb155db7+2c5429117a,21.0.0-5-gdf36809+637e4641ee,21.0.0-6-g00874e7+c9fd7f7160,21.0.0-6-g4e60332+4b120a55c4,21.0.0-7-gc8ca178+40eb9cf840,21.0.0-8-gfbe0b4b+9e4ef6332c,21.0.0-9-g2fd488a+d83b7cd606,w.2021.05
LSST Data Management Base Package
handlers.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 """Concrete implementations of `PathElementHandler`.
22 
23 The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24 avoid a circular dependency between modules.
25 """
26 from __future__ import annotations
27 
28 __all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"]
29 
30 from abc import abstractmethod
31 import re
32 from typing import (
33  Callable,
34  List,
35  Mapping,
36  Optional,
37  Tuple,
38  TYPE_CHECKING
39 )
40 
41 import lsst.afw.fits
42 from lsst.daf.butler import (
43  DataCoordinate,
44  DatasetRef,
45  DatasetType,
46  FileDataset,
47 )
48 from ..translators import Translator
49 from .parser import PathElementParser
50 from .scanner import PathElementHandler, DirectoryScanner
51 
52 if TYPE_CHECKING:
53  from lsst.daf.butler import FormatterParameter
54 
55 
57  """A `PathElementHandler` that matches via a regular expression, and does
58  nothing.
59 
60  An `IgnoreHandler` is used to ignore file or directory patterns that can
61  occur at any level in the directory tree, and have no relation to any
62  Gen2 filename template.
63 
64  Parameters
65  ----------
66  pattern : `re.Pattern`
67  A regular expression pattern.
68  isForFiles : `bool`
69  Whether this handler should be applied to files (`True`) or
70  directories (`False`).
71  """
72  def __init__(self, pattern: re.Pattern, isForFiles: bool):
73  super().__init__()
74  self._pattern_pattern = pattern
75  self._isForFiles_isForFiles = isForFiles
76 
77  __slots__ = ("_pattern", "_isForFiles")
78 
79  def __str__(self):
80  return f"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})"
81 
82  def isForFiles(self) -> bool:
83  # Docstring inherited from PathElementHandler.
84  return self._isForFiles_isForFiles
85 
86  @property
87  def rank(self) -> int:
88  # Docstring inherited from PathElementHandler.
89  return 0
90 
91  def __call__(self, path: str, name: str,
92  datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
93  predicate: Callable[[DataCoordinate], bool]) -> bool:
94  # Docstring inherited from PathElementHandler.
95  if self._pattern_pattern.fullmatch(name):
96  return True
97  else:
98  return False
99 
100 
102  """An intermediate base class for `PathElementHandler` classes that utilize
103  a `PathElementParser` to match a Gen2 filename template.
104 
105  Parameters
106  ----------
107  parser : `PathElementParser`
108  An object that matches the path element this handler is responsible for
109  and extracts a (partial) Gen2 data ID from it.
110  """
111  def __init__(self, parser: PathElementParser):
112  super().__init__()
113  self._parser_parser = parser
114 
115  __slots__ = ("_parser",)
116 
117  def __str__(self):
118  return f"{type(self).__name__}(parser={self._parser})"
119 
120  def __call__(self, path: str, name: str,
121  datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
122  predicate: Callable[[DataCoordinate], bool]) -> bool:
123  # Docstring inherited from PathElementParser.
124  nextDataId2 = self._parser_parser.parse(name, self.lastDataId2lastDataId2)
125  if nextDataId2 is None:
126  return False
127  self.handlehandle(path, nextDataId2, datasets, predicate=predicate)
128  return True
129 
130  @property
131  def rank(self) -> int:
132  # Docstring inherited from PathElementParser.
133  return len(self._parser_parser.keys)
134 
135  @abstractmethod
136  def handle(self, path: str, nextDataId2: dict,
137  datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
138  predicate: Callable[[DataCoordinate], bool]):
139  """Customization hook for ``__call__``.
140 
141  Subclasses must override this method, while external callers (i.e.
142  `DirectoryScanner` should instead invoke `__call__`.
143 
144  Parameters
145  ----------
146  path : `str`
147  Full path of the file or directory.
148  nextDataId2 : `dict`
149  Gen2 data ID (usually partial) extracted from the path so far.
150  datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
151  Dictionary that found datasets should be added to.
152  predicate : `~collections.abc.Callable`
153  A callable taking a single `DataCoordinate` argument and returning
154  `bool`, indicating whether that (Gen3) data ID represents one
155  that should be included in the scan.
156  formatterMap : `dict`, optional
157  Map dataset type to specialist formatter.
158  """
159  raise NotImplementedError()
160 
161 
163  """A `ParsedPathElementHandler` that does nothing with an entry other
164  optionally logging a warning message.
165 
166  A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
167  want to (or cannot) extract Gen3 datasets from, or other files/directories
168  that alway appears at a fixed level in the diectory tree.
169 
170  Parameters
171  ----------
172  parser : `PathElementParser`
173  An object that matches the path element this handler is responsible for
174  and extracts a (partial) Gen2 data ID from it.
175  isForFiles : `bool`
176  Whether this handler should be applied to files (`True`) or
177  directories (`False`).
178  message : `str`, optional
179  A message to log at warning level when this handler matches a path
180  entry. If `None`, matched entrie will be silently skipped.
181  """
182  def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
183  super().__init__(parser=parser)
184  self._isForFiles_isForFiles = isForFiles
185  self._message_message = message
186 
187  __slots__ = ("_message", "_isForFiles")
188 
189  def isForFiles(self) -> bool:
190  # Docstring inherited from PathElementHandler.
191  return self._isForFiles_isForFiles
192 
193  def handle(self, path: str, nextDataId2: dict,
194  datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
195  predicate: Callable[[DataCoordinate], bool]):
196  # Docstring inherited from ParsedPathElementHandler.
197  if self._message_message is not None:
198  self.log.warn("Skipping %s: %s", path, self._message_message)
199 
200 
202  """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
203 
204  Parameters
205  ----------
206  parser : `PathElementParser`
207  An object that matches the path element this handler is responsible for
208  and extracts a (partial) Gen2 data ID from it.
209 
210  Notes
211  -----
212  The nested `DirectoryScanner` is default-constructed and should be
213  populated with child handlers after the `SubdirectoryHandler` is created.
214  """
215 
216  def __init__(self, parser: PathElementParser):
217  super().__init__(parser=parser)
218  self.scannerscanner = DirectoryScanner()
219 
220  __slots__ = ("scanner",)
221 
222  def isForFiles(self) -> bool:
223  # Docstring inherited from PathElementHandler.
224  return False
225 
226  def handle(self, path: str, nextDataId2,
227  datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
228  predicate: Callable[[DataCoordinate], bool]):
229  # Docstring inherited from ParsedPathElementHandler.
230  if not nextDataId2:
231  # We matched, and there's no data ID at all yet. That means the
232  # full path so far is just a fixed string so we should descend
233  # and the match is exclusive.
234  scan = True
235  else:
236  dataId3, _ = self.translatetranslatetranslate(nextDataId2, partial=True)
237  if dataId3 is not None:
238  scan = predicate(dataId3)
239  else:
240  scan = True
241  if scan:
242  for handler in self.scannerscanner:
243  handler.lastDataId2 = nextDataId2
244  self.scannerscanner.scan(path, datasets, predicate=predicate)
245 
246  def translate(self, dataId2: dict, *, partial: bool = False
247  ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
248  # Docstring inherited from PathElementHandler.
249  for handler in self.scannerscanner:
250  # Since we're recursing, we're always asking for a partial match,
251  # because the data ID we have corresponds to different level than
252  # the one child handlers operate at.
253  result, calibDate = handler.translate(dataId2, partial=True)
254  if result is not None:
255  return result, calibDate
256  return None, None
257 
258  scanner: DirectoryScanner
259  """Scanner object that holds handlers for the entries of the subdirectory
260  matched by this handler (`DirectoryScanner`).
261  """
262 
263 
265  """A `PathElementHandler` that matches files that correspond to target
266  datasets and outputs `FileDataset` instances for them.
267 
268  Parameters
269  ----------
270  parser : `PathElementParser`
271  An object that matches the path element this handler is responsible for
272  and extracts a (partial) Gen2 data ID from it.
273  translator : `Translator`
274  Object that translates data IDs from Gen2 to Gen3.
275  datasetType : `lsst.daf.butler.DatasetType`
276  Gen3 dataset type for the datasets this handler matches.
277  formatter : `lsst.daf.butler.Formatter` or `str`, optional
278  A Gen 3 formatter class or fully-qualified name.
279  """
280  def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType,
281  formatter: FormatterParameter = None):
282  super().__init__(parser=parser)
283  self._translator_translator = translator
284  self._datasetType_datasetType = datasetType
285  self._formatter_formatter = formatter
286 
287  __slots__ = ("_translator", "_datasetType", "_formatter")
288 
289  def __str__(self):
290  return f"{type(self).__name__}({self._translator}, {self._datasetType})"
291 
292  def isForFiles(self) -> bool:
293  # Docstring inherited from PathElementHandler.
294  return True
295 
296  def handle(self, path: str, nextDataId2,
297  datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
298  predicate: Callable[[DataCoordinate], bool]):
299  # Docstring inherited from ParsedPathElementHandler.
300  dataId3, calibDate = self.translatetranslatetranslate(nextDataId2, partial=False)
301  if predicate(dataId3):
302  datasets[self._datasetType_datasetType][calibDate].append(
303  FileDataset(
304  refs=[DatasetRef(self._datasetType_datasetType, dataId3)],
305  path=path, formatter=self._formatter_formatter
306  )
307  )
308 
309  def translate(self, dataId2: dict, *, partial: bool = False
310  ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
311  # Docstring inherited from PathElementHandler.
312  rawDataId3, calibDate = self._translator_translator(dataId2, partial=partial)
313  if partial:
314  return (
315  DataCoordinate.standardize(rawDataId3, universe=self._datasetType_datasetType.dimensions.universe),
316  calibDate,
317  )
318  else:
319  return (
320  DataCoordinate.standardize(rawDataId3, graph=self._datasetType_datasetType.dimensions),
321  calibDate
322  )
323 
324 
326  """Handler for FITS files that store image and metadata in multiple HDUs
327  per file, for example DECam raw and Community Pipeline calibrations.
328 
329  Notes
330  -----
331  For now, this is only used by DECam, and may need to be made more generic
332  (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
333  with other obs packages.
334  """
335  def handle(self, path: str, nextDataId2,
336  datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
337  predicate: Callable[[DataCoordinate], bool]):
338  dataId3, calibDate = self.translatetranslatetranslatetranslate(nextDataId2, partial=True)
339 
340  def get_detectors(filename):
341  fitsData = lsst.afw.fits.Fits(filename, 'r')
342  # NOTE: The primary header (HDU=0) does not contain detector data.
343  detectors = []
344  for i in range(1, fitsData.countHdus()):
345  fitsData.setHdu(i)
346  metadata = fitsData.readMetadata()
347  detectors.append(metadata['CCDNUM'])
348  return detectors
349 
350  if predicate(dataId3):
351  detectors = get_detectors(path)
352  refs = []
353  for detector in detectors:
354  newDataId3 = DataCoordinate.standardize(dataId3,
355  graph=self._datasetType_datasetType.dimensions,
356  detector=detector)
357  refs.append(DatasetRef(self._datasetType_datasetType, newDataId3))
358 
359  datasets[self._datasetType_datasetType][calibDate].append(
360  FileDataset(refs=refs, path=path, formatter=self._formatter_formatter)
361  )
362 
363  def translate(self, dataId2: dict, *, partial: bool = False
364  ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
365  assert partial is True, "We always require partial, to ignore 'ccdnum'"
366  rawDataId3, calibDate = self._translator_translator(dataId2, partial=partial)
367  return (
368  DataCoordinate.standardize(rawDataId3, universe=self._datasetType_datasetType.dimensions.universe),
369  calibDate,
370  )
A simple struct that combines the two arguments that must be passed to most cfitsio routines and cont...
Definition: fits.h:297
bool __call__(self, str path, str name, Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:93
def __init__(self, re.Pattern pattern, bool isForFiles)
Definition: handlers.py:72
def handle(self, str path, nextDataId2, Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:337
Tuple[Optional[DataCoordinate], Optional[str]] translate(self, dict dataId2, *bool partial=False)
Definition: handlers.py:364
def handle(self, str path, dict nextDataId2, Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:138
bool __call__(self, str path, str name, Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:122
def handle(self, str path, dict nextDataId2, Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:195
def __init__(self, PathElementParser parser, bool isForFiles, Optional[str] message)
Definition: handlers.py:182
def handle(self, str path, nextDataId2, Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:228
Tuple[Optional[DataCoordinate], Optional[str]] translate(self, dict dataId2, *bool partial=False)
Definition: handlers.py:247
Tuple[Optional[DataCoordinate], Optional[str]] translate(self, dict dataId2, *bool partial=False)
Definition: handlers.py:310
def handle(self, str path, nextDataId2, Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] datasets, *Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:298
def __init__(self, PathElementParser parser, Translator translator, DatasetType datasetType, FormatterParameter formatter=None)
Definition: handlers.py:281
Tuple[Optional[DataCoordinate], Optional[str]] translate(self, dict dataId2, *bool partial=False)
Definition: scanner.py:115
std::shared_ptr< FrameSet > append(FrameSet const &first, FrameSet const &second)
Construct a FrameSet that performs two transformations in series.
Definition: functional.cc:33