LSST Applications  21.0.0+75b29a8a7f,21.0.0+e70536a077,21.0.0-1-ga51b5d4+62c747d40b,21.0.0-10-gbfb87ad6+3307648ee3,21.0.0-15-gedb9d5423+47cba9fc36,21.0.0-2-g103fe59+fdf0863a2a,21.0.0-2-g1367e85+d38a93257c,21.0.0-2-g45278ab+e70536a077,21.0.0-2-g5242d73+d38a93257c,21.0.0-2-g7f82c8f+e682ffb718,21.0.0-2-g8dde007+d179fbfa6a,21.0.0-2-g8f08a60+9402881886,21.0.0-2-ga326454+e682ffb718,21.0.0-2-ga63a54e+08647d4b1b,21.0.0-2-gde069b7+26c92b3210,21.0.0-2-gecfae73+0445ed2f95,21.0.0-2-gfc62afb+d38a93257c,21.0.0-27-gbbd0d29+ae871e0f33,21.0.0-28-g5fc5e037+feb0e9397b,21.0.0-3-g21c7a62+f4b9c0ff5c,21.0.0-3-g357aad2+57b0bddf0b,21.0.0-3-g4be5c26+d38a93257c,21.0.0-3-g65f322c+3f454acf5d,21.0.0-3-g7d9da8d+75b29a8a7f,21.0.0-3-gaa929c8+9e4ef6332c,21.0.0-3-ge02ed75+4b120a55c4,21.0.0-4-g3300ddd+e70536a077,21.0.0-4-g591bb35+4b120a55c4,21.0.0-4-gc004bbf+4911b9cd27,21.0.0-4-gccdca77+f94adcd104,21.0.0-4-ge8fba5a+2b3a696ff9,21.0.0-5-gb155db7+2c5429117a,21.0.0-5-gdf36809+637e4641ee,21.0.0-6-g00874e7+c9fd7f7160,21.0.0-6-g4e60332+4b120a55c4,21.0.0-7-gc8ca178+40eb9cf840,21.0.0-8-gfbe0b4b+9e4ef6332c,21.0.0-9-g2fd488a+d83b7cd606,w.2021.05
LSST Data Management Base Package
builders.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 """Classes used in `RepoWalker` construction.
22 
23 The objects here form a temporary tree that is pruned and then transformed
24 into a similar tree of `PathElementHandler` instances. See `BuilderNode`
25 method documentation for more information.
26 """
27 from __future__ import annotations
28 
29 __all__ = ["BuilderSkipInput", "BuilderTargetInput", "BuilderTree"]
30 
31 from abc import ABC, abstractmethod
32 import os
33 import re
34 from typing import (
35  Any,
36  Dict,
37  List,
38  Optional,
39  Tuple,
40 )
41 
42 from lsst.daf.butler import DatasetType, DimensionUniverse, StorageClass, FormatterParameter
43 from ..translators import TranslatorFactory
44 from .parser import PathElementParser
45 from .scanner import PathElementHandler, DirectoryScanner
46 from .handlers import (IgnoreHandler, SubdirectoryHandler, SkipHandler,
47  TargetFileHandler)
48 
49 
50 class BuilderNode(ABC):
51  """Abstract interface for nodes in the temporary tree that is used to
52  construct a `RepoWalker`.
53  """
54 
55  @abstractmethod
56  def prune(self) -> Tuple[BuilderNode, List[str], bool]:
57  """Attempt to prune this node and its children from the tree.
58 
59  Returns
60  -------
61  replacement : `BuilderNode`
62  The result of recursively pruning child nodes; often just ``self``.
63  messages : `list` [`str`]
64  Warning messages that should be logged by a parent node when a
65  matching path element is encountered, if this node is pruned.
66  prune : `bool`
67  If `True`, this node may be pruned from the tree (but will not
68  necessarily be - it may correspond to a path element that should
69  be skipped with siblings that should not be).
70  """
71  raise NotImplementedError()
72 
73  @abstractmethod
74  def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
75  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
76  ) -> PathElementHandler:
77  """Transform this node in the build tree into a corresponding
78  `PathElementHandler`, recursing to any children.
79 
80  Must be called after `prune`.
81 
82  Parameters
83  ----------
84  parser : `PathElementParser`
85  An object that matches the path element the new handler is
86  responsible for and extracts a (partial) Gen2 data ID from it.
87  allKeys : `dict` [`str`, `type`]
88  A mapping from Gen2 data ID key to the type of its value. Will
89  contain all keys that may be extracted by the given parser, and
90  possibly others.
91  cumulativeKeys : `dict` [`str`, `type`], optional
92  A dictionary containing key strings and types for Gen2 data ID keys
93  that have been extracted from previous path elements for this
94  template, including those extracted by ``parser``.
95 
96  Returns
97  -------
98  handler : `PathElementHandler`
99  A new handler object.
100  """
101  raise NotImplementedError()
102 
103 
105  """An intermediate base for `BuilderNode` classes that are provided as
106  direct inputs to a `RepoWalker`, and generally correspond to exactly one
107  Gen2 dataset type.
108 
109  Parameters
110  ----------
111  template : `str`
112  The complete Gen2 template to be matched (not just the template for
113  one path element).
114  keys : `dict` [`str`, `type`]
115  A mapping from Gen2 data ID key to the type of its value.
116  """
117  def __init__(self, template: str, keys: Dict[str, type]):
118  self.templatetemplate = template
119  self.keyskeys = keys
120  self.elementselements = self.templatetemplate.split(os.path.sep)
121 
122  template: str
123  """The complete Gen2 template to be matched (`str`).
124  """
125 
126  keys: Dict[str, type]
127  """A mapping from Gen2 data ID key to the type of its value
128  (`dict` [`str`, `type`]).
129  """
130 
131  elements: List[str]
132  """The path elements (file or directory levels) of `template`
133  (`list` of `str`).
134  """
135 
136 
138  """An input to a `RepoWalker` that indicates that matched files should be
139  skipped, possibly with a warning message.
140 
141  BuilderSkipInputs can be pruned. When they are not pruned, they build
142  `SkipHandler` instances.
143 
144  Parameters
145  ----------
146  template : `str`
147  The complete Gen2 template to be matched (not just the template for
148  one path element).
149  keys : `dict` [`str`, `type`]
150  A mapping from Gen2 data ID key to the type of its value.
151  message : `str`, optional
152  If not `None`, a warning message that should be printed either when a
153  matching file is enountered or a directory that may contain such files
154  is skipped.
155  isForFiles : `bool`, optional
156  If `True` (default), this handler should be run on files. Otherwise it
157  should be run on directories.
158  """
159  def __init__(self, template: str, keys: Dict[str, type], message: Optional[str] = None, *,
160  isForFiles: bool = True):
161  super().__init__(template=template, keys=keys)
162  self._message_message = message
163  self._isForFiles_isForFiles = isForFiles
164 
165  def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
166  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
167  ) -> PathElementHandler:
168  # Docstring inherited from BuilderNode.
169  return SkipHandler(parser=parser, isForFiles=self._isForFiles_isForFiles, message=self._message_message)
170 
171  def prune(self) -> Tuple[BuilderNode, List[str], bool]:
172  # Docstring inherited from BuilderNode.
173  return self, [self._message_message] if self._message_message is not None else [], True
174 
175 
177  """An input to a `RepoWalker` that matches files that correspond to
178  datasets that we want to extract.
179 
180  BuilderTargetInputs can never be pruned, and always build
181  `TargetFileHandler` instances.
182 
183  Parameters
184  ----------
185  datasetTypeName : `str`
186  Name of the dataset type.
187  template : `str`
188  Full Gen2 filename template.
189  keys : `dict` [`str`, `type`]
190  Dictionary that maps Gen2 data ID key to the type of its value.
191  storageClass : `StorageClass`
192  `StorageClass` for the Gen3 dataset type.
193  universe : `DimensionUniverse`
194  All candidate dimensions for the Gen3 dataset type.
195  formatter : `lsst.daf.butler.Formatter` or `str`, optional
196  A Gen 3 formatter class or fully-qualified name.
197  translatorFactory : `TranslatorFactory`
198  Object that can be used to construct data ID translators.
199  targetHandler : `PathElementHandler`, optional
200  Override target handler for this dataset type.
201  **kwargs:
202  Additional keyword arguments are passed to `Translator.makeMatching`,
203  in along with ``datasetTypeName`` and ``keys``.
204  """
205  def __init__(self, *, datasetTypeName: str, template: str, keys: Dict[str, type],
206  storageClass: StorageClass, universe: DimensionUniverse,
207  formatter: FormatterParameter, translatorFactory: TranslatorFactory,
208  targetHandler: Optional[PathElementHandler] = None,
209  **kwargs: Any):
210  # strip off [%HDU] identifiers from e.g. DECAM Community Pipeline
211  # products
212  template = template.split('[%(')[0]
213  super().__init__(template=template, keys=keys)
214  self._translator_translator = translatorFactory.makeMatching(datasetTypeName, keys, **kwargs)
215  self.datasetTypedatasetType = DatasetType(datasetTypeName, dimensions=self._translator_translator.dimensionNames,
216  storageClass=storageClass, universe=universe,
217  isCalibration=("calibDate" in keys))
218  self._formatter_formatter = formatter
219  if targetHandler is None:
220  targetHandler = TargetFileHandler
221  self._handler_handler = targetHandler
222 
223  def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
224  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
225  ) -> PathElementHandler:
226  # Docstring inherited from BuilderNode.
227  return self._handler_handler(parser=parser, translator=self._translator_translator, datasetType=self.datasetTypedatasetType,
228  formatter=self._formatter_formatter)
229 
230  def prune(self) -> Tuple[BuilderNode, List[str], bool]:
231  # Docstring inherited from BuilderNode.
232  return self, [], False
233 
234  datasetType: DatasetType
235  """The Gen3 dataset type extracted by the handler this object builds
236  (`lsst.daf.butler.DatasetType`).
237  """
238 
239 
241  """A `BuilderNode` that represents a subdirectory to be skipped,
242  created by pruning `BuilderTree` that contained only `BuilderSkipInput`
243  instances.
244 
245  BuilderPrunedTrees can be pruned. When they are not pruned, they
246  build `SkipHandler` instances.
247 
248  Parameters
249  ----------
250  messages : `list` [`str`]
251  A list of warning messages to be printed when the handler produced by
252  this builder matches a subdirectory.
253  """
254 
255  def __init__(self, messages: List[str]):
256  self._messages_messages = messages
257 
258  def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
259  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
260  ) -> PathElementHandler:
261  # Docstring inherited from BuilderNode.
262  message = "; ".join(self._messages_messages) if self._messages_messages else None
263  return SkipHandler(parser=parser, isForFiles=False, message=message)
264 
265  def prune(self) -> Tuple[BuilderNode, List[str], bool]:
266  # Docstring inherited from BuilderNode.
267  return self, self._messages_messages, True
268 
269 
271  """A `BuilderNode` that represents a collection of `BuilderInput` instances
272  that all have the same template.
273  """
274  def __init__(self, old: BuilderInput, new: BuilderInput):
275  self._children_children = []
276  if isinstance(old, BuilderDuplicateInputs):
277  self._children_children.extend(old._children)
278  else:
279  self._children_children.append(old)
280  self._children_children.append(new)
281  self._messages_messages = [] # populated in prune()
282 
283  def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
284  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
285  ) -> PathElementHandler:
286  # Docstring inherited from BuilderNode.
287  message = "; ".join(self._messages_messages) if self._messages_messages else None
288  return SkipHandler(parser=parser, isForFiles=False, message=message)
289 
290  def prune(self) -> Tuple[BuilderNode, List[str], bool]:
291  # Docstring inherited from BuilderNode.
292  unprunable = []
293  newChildren = []
294  for child in self._children_children:
295  newChild, childMessages, toPruneChild = child.prune()
296  if toPruneChild:
297  self._messages_messages.extend(childMessages)
298  else:
299  unprunable.append(newChild)
300  newChildren.append(newChildren)
301  self._children_children = newChildren
302  if len(unprunable) == 0:
303  # All children are just skips, so we can prune this node if we
304  # remember their messages.
305  return self, self._messages_messages, True
306  elif len(unprunable) == 1 and not self._messages_messages:
307  # Exactly one child is a target, and the others were ignored with
308  # no warning messages. Tell parent node to just use that child,
309  # so if we see any matching files, we just assume they're for that
310  # target.
311  return unprunable[0], [], False
312  else:
313  # Multiple targets or skips with messages, which means we won't
314  # know how to handle any matching files. Replace any messages we
315  # have with a single message that combines them all as well as
316  # any target dataset types that they are ambiguous with.
317  nested = [f"{c.datasetType.name} (target)" for c in unprunable]
318  nested.extend(self._messages_messages)
319  self._messages_messages = [f"ambiguous match: [{', '.join(nested)}]"]
320  return self, self._messages_messages, True
321 
322 
324  """A `BuilderNode` that represents a directory.
325 
326  This is the only `BuilderNode` class that is not a leaf node. If all
327  of its children can be pruned, it is replaced by a `BuilderPrunedTree`
328  (which can then be pruned itself). It builds `SubdirectoryHandler`
329  instances when not pruned.
330  """
331  def __init__(self):
332  self._children_children = {} # Maps template path element to BuilderNode
333 
334  def insert(self, level: int, leaf: BuilderInput):
335  """Insert an input leaf node into the tree, recursively constructing
336  intermediate parents in order to put it at the right level.
337 
338  Parameters
339  ----------
340  level : `int`
341  The level ``self``is at in the larger tree, with zero the
342  repository root. The right level for the leaf is given by the
343  length of ``leaf.elements``.
344  leaf : `BuilderInput`
345  The leaf node to insert.
346  """
347  nextLevel = level + 1
348  element = leaf.elements[level]
349  if nextLevel == len(leaf.elements):
350  conflict = self._children_children.get(element)
351  if conflict is not None:
352  # Sadly, the Gen2 butler has some actual dataset types that
353  # use the exact same template.
354  leaf = BuilderDuplicateInputs(conflict, leaf)
355  self._children_children[element] = leaf
356  else:
357  child = self._children_children.setdefault(element, BuilderTree())
358  child.insert(nextLevel, leaf)
359 
360  def fill(self, scanner: DirectoryScanner, allKeys: Dict[str, type], previousKeys: Dict[str, type], *,
361  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]):
362  """Fill a `DirectoryScanner` instance by recursively building all
363  child nodes.
364 
365  Parameters
366  ----------
367  scanner : `DirectoryScanner`
368  Object to populate.
369  allKeys : `dict` [`str`, `type`]
370  Mapping from Gen2 data ID key to its value type, covering all keys
371  that could be used in any child template.
372  previousKeys : `dict` [`str`, `type`], optional
373  A dictionary containing key strings and types for Gen2 data ID keys
374  that have been extracted from previous path elements of the same
375  template.
376  fileIgnoreRegEx : `re.Pattern`, optional
377  A regular expression pattern that identifies non-dataset files that
378  can be ignored, to be applied at all levels of the directory tree.
379  dirIgnoreRegEx : `re.Pattern`, optional
380  A regular expression pattern that identifies non-dataset
381  subdirectories that can be ignored, to be applied at all levels of
382  the directory tree.
383  """
384  if fileIgnoreRegEx is not None:
385  scanner.add(IgnoreHandler(fileIgnoreRegEx, isForFiles=True))
386  if dirIgnoreRegEx is not None:
387  scanner.add(IgnoreHandler(dirIgnoreRegEx, isForFiles=False))
388  for template, child in self._children_children.items():
389  parser = PathElementParser(template, allKeys, previousKeys=previousKeys)
390  cumulativeKeys = previousKeys.copy()
391  cumulativeKeys.update(parser.keys)
392  scanner.add(child.build(parser, allKeys, cumulativeKeys, fileIgnoreRegEx=fileIgnoreRegEx,
393  dirIgnoreRegEx=dirIgnoreRegEx))
394 
395  def prune(self) -> Tuple[BuilderNode, List[str], bool]:
396  # Docstring inherited from BuilderNode.
397  toPruneThis = True
398  newChildren = {}
399  messages = []
400  # Recursively prune children.
401  for template, child in list(self._children_children.items()):
402  newChild, childMessages, toPruneChild = child.prune()
403  newChildren[template] = newChild
404  messages.extend(childMessages)
405  if not toPruneChild:
406  toPruneThis = False
407  self._children_children = newChildren
408  if toPruneThis:
409  return BuilderPrunedTree(messages), messages, True
410  else:
411  return self, [], False
412 
413  def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
414  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
415  ) -> PathElementHandler:
416  # Docstring inherited from BuilderNode.
417  built = SubdirectoryHandler(parser)
418  self.fillfill(built.scanner, allKeys, cumulativeKeys, fileIgnoreRegEx=fileIgnoreRegEx,
419  dirIgnoreRegEx=dirIgnoreRegEx)
420  return built
std::vector< SchemaItem< Flag > > * items
Tuple[BuilderNode, List[str], bool] prune(self)
Definition: builders.py:290
PathElementHandler build(self, PathElementParser parser, Dict[str, type] allKeys, Dict[str, type] cumulativeKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:285
def __init__(self, BuilderInput old, BuilderInput new)
Definition: builders.py:274
def __init__(self, str template, Dict[str, type] keys)
Definition: builders.py:117
PathElementHandler build(self, PathElementParser parser, Dict[str, type] allKeys, Dict[str, type] cumulativeKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:76
Tuple[BuilderNode, List[str], bool] prune(self)
Definition: builders.py:56
Tuple[BuilderNode, List[str], bool] prune(self)
Definition: builders.py:265
PathElementHandler build(self, PathElementParser parser, Dict[str, type] allKeys, Dict[str, type] cumulativeKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:260
Tuple[BuilderNode, List[str], bool] prune(self)
Definition: builders.py:171
def __init__(self, str template, Dict[str, type] keys, Optional[str] message=None, *bool isForFiles=True)
Definition: builders.py:160
PathElementHandler build(self, PathElementParser parser, Dict[str, type] allKeys, Dict[str, type] cumulativeKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:167
PathElementHandler build(self, PathElementParser parser, Dict[str, type] allKeys, Dict[str, type] cumulativeKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:225
def __init__(self, *str datasetTypeName, str template, Dict[str, type] keys, StorageClass storageClass, DimensionUniverse universe, FormatterParameter formatter, TranslatorFactory translatorFactory, Optional[PathElementHandler] targetHandler=None, **Any kwargs)
Definition: builders.py:209
Tuple[BuilderNode, List[str], bool] prune(self)
Definition: builders.py:230
PathElementHandler build(self, PathElementParser parser, Dict[str, type] allKeys, Dict[str, type] cumulativeKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:415
def fill(self, DirectoryScanner scanner, Dict[str, type] allKeys, Dict[str, type] previousKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:361
def insert(self, int level, BuilderInput leaf)
Definition: builders.py:334
Tuple[BuilderNode, List[str], bool] prune(self)
Definition: builders.py:395
std::shared_ptr< FrameSet > append(FrameSet const &first, FrameSet const &second)
Construct a FrameSet that performs two transformations in series.
Definition: functional.cc:33
daf::base::PropertyList * list
Definition: fits.cc:913