LSST Applications  22.0.1,22.0.1+01bcf6a671,22.0.1+046ee49490,22.0.1+05c7de27da,22.0.1+0c6914dbf6,22.0.1+1220d50b50,22.0.1+12fd109e95,22.0.1+1a1dd69893,22.0.1+1c910dc348,22.0.1+1ef34551f5,22.0.1+30170c3d08,22.0.1+39153823fd,22.0.1+611137eacc,22.0.1+771eb1e3e8,22.0.1+94e66cc9ed,22.0.1+9a075d06e2,22.0.1+a5ff6e246e,22.0.1+a7db719c1a,22.0.1+ba0d97e778,22.0.1+bfe1ee9056,22.0.1+c4e1e0358a,22.0.1+cc34b8281e,22.0.1+d640e2c0fa,22.0.1+d72a2e677a,22.0.1+d9a6b571bd,22.0.1+e485e9761b,22.0.1+ebe8d3385e
LSST Data Management Base Package
connectionTypes.py
Go to the documentation of this file.
1 # This file is part of pipe_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 
22 """Module defining connection types to be used within a
23 `PipelineTaskConnections` class.
24 """
25 
26 __all__ = ["InitInput", "InitOutput", "Input", "PrerequisiteInput",
27  "Output", "BaseConnection"]
28 
29 import dataclasses
30 import typing
31 from typing import Callable, Iterable, Optional
32 
33 from lsst.daf.butler import (
34  CollectionSearch,
35  DataCoordinate,
36  DatasetRef,
37  DatasetType,
38  DimensionUniverse,
39  Registry,
40  StorageClass,
41 )
42 
43 
44 @dataclasses.dataclass(frozen=True)
46  """Base class used for declaring PipelineTask connections
47 
48  Parameters
49  ----------
50  name : `str`
51  The name used to identify the dataset type
52  storageClass : `str`
53  The storage class used when (un)/persisting the dataset type
54  multiple : `bool`
55  Indicates if this connection should expect to contain multiple objects
56  of the given dataset type
57  """
58  name: str
59  storageClass: str
60  doc: str = ""
61  multiple: bool = False
62 
63  def __get__(self, inst, klass):
64  """Descriptor method
65 
66  This is a method used to turn a connection into a descriptor.
67  When a connection is added to a connection class, it is a class level
68  variable. This method makes accessing this connection, on the
69  instance of the connection class owning this connection, return a
70  result specialized for that instance. In the case of connections
71  this specifically means names specified in a config instance will
72  be visible instead of the default names for the connection.
73  """
74  # If inst is None, this is being accessed by the class and not an
75  # instance, return this connection itself
76  if inst is None:
77  return self
78  # If no object cache exists, create one to track the instances this
79  # connection has been accessed by
80  if not hasattr(inst, '_connectionCache'):
81  object.__setattr__(inst, '_connectionCache', {})
82  # Look up an existing cached instance
83  idSelf = id(self)
84  if idSelf in inst._connectionCache:
85  return inst._connectionCache[idSelf]
86  # Accumulate the parameters that define this connection
87  params = {}
88  for field in dataclasses.fields(self):
89  params[field.name] = getattr(self, field.name)
90  # Get the name override defined by the instance of the connection class
91  params['name'] = inst._nameOverrides[self.varName]
92  # Return a new instance of this connection specialized with the
93  # information provided by the connection class instance
94  return inst._connectionCache.setdefault(idSelf, self.__class__(**params))
95 
96  def makeDatasetType(self, universe: DimensionUniverse,
97  parentStorageClass: Optional[StorageClass] = None):
98  """Construct a true `DatasetType` instance with normalized dimensions.
99 
100  Parameters
101  ----------
102  universe : `lsst.daf.butler.DimensionUniverse`
103  Set of all known dimensions to be used to normalize the dimension
104  names specified in config.
105  parentStorageClass : `lsst.daf.butler.StorageClass`, optional
106  Parent storage class for component datasets; `None` otherwise.
107 
108  Returns
109  -------
110  datasetType : `DatasetType`
111  The `DatasetType` defined by this connection.
112  """
113  return DatasetType(self.name,
114  universe.empty,
115  self.storageClass,
116  parentStorageClass=parentStorageClass)
117 
118 
119 @dataclasses.dataclass(frozen=True)
121  """Class used for declaring PipelineTask connections that includes
122  dimensions
123 
124  Parameters
125  ----------
126  name : `str`
127  The name used to identify the dataset type
128  storageClass : `str`
129  The storage class used when (un)/persisting the dataset type
130  multiple : `bool`
131  Indicates if this connection should expect to contain multiple objects
132  of the given dataset type
133  dimensions : iterable of `str`
134  The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
135  to identify the dataset type identified by the specified name
136  isCalibration: `bool`, optional
137  `True` if this dataset type may be included in CALIBRATION-type
138  collections to associate it with a validity range, `False` (default)
139  otherwise.
140  """
141  dimensions: typing.Iterable[str] = ()
142  isCalibration: bool = False
143 
144  def __post_init__(self):
145  if isinstance(self.dimensions, str):
146  raise TypeError("Dimensions must be iterable of dimensions, got str,"
147  "possibly omitted trailing comma")
148  if not isinstance(self.dimensions, typing.Iterable):
149  raise TypeError("Dimensions must be iterable of dimensions")
150 
151  def makeDatasetType(self, universe: DimensionUniverse,
152  parentStorageClass: Optional[StorageClass] = None):
153  """Construct a true `DatasetType` instance with normalized dimensions.
154 
155  Parameters
156  ----------
157  universe : `lsst.daf.butler.DimensionUniverse`
158  Set of all known dimensions to be used to normalize the dimension
159  names specified in config.
160  parentStorageClass : `lsst.daf.butler.StorageClass`, optional
161  Parent storage class for component datasets; `None` otherwise.
162 
163  Returns
164  -------
165  datasetType : `DatasetType`
166  The `DatasetType` defined by this connection.
167  """
168  return DatasetType(self.name,
169  universe.extract(self.dimensions),
170  self.storageClass, isCalibration=self.isCalibration,
171  parentStorageClass=parentStorageClass)
172 
173 
174 @dataclasses.dataclass(frozen=True)
176  """Class used for declaring PipelineTask input connections
177 
178  Parameters
179  ----------
180  name : `str`
181  The default name used to identify the dataset type
182  storageClass : `str`
183  The storage class used when (un)/persisting the dataset type
184  multiple : `bool`
185  Indicates if this connection should expect to contain multiple objects
186  of the given dataset type
187  dimensions : iterable of `str`
188  The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
189  to identify the dataset type identified by the specified name
190  deferLoad : `bool`
191  Indicates that this dataset type will be loaded as a
192  `lsst.daf.butler.DeferredDatasetHandle`. PipelineTasks can use this
193  object to load the object at a later time.
194  """
195  deferLoad: bool = False
196 
197 
198 @dataclasses.dataclass(frozen=True)
200  pass
201 
202 
203 @dataclasses.dataclass(frozen=True)
205  """Class used for declaring PipelineTask prerequisite connections
206 
207  Parameters
208  ----------
209  name : `str`
210  The default name used to identify the dataset type
211  storageClass : `str`
212  The storage class used when (un)/persisting the dataset type
213  multiple : `bool`
214  Indicates if this connection should expect to contain multiple objects
215  of the given dataset type
216  dimensions : iterable of `str`
217  The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
218  to identify the dataset type identified by the specified name
219  deferLoad : `bool`
220  Indicates that this dataset type will be loaded as a
221  `lsst.daf.butler.DeferredDatasetHandle`. PipelineTasks can use this
222  object to load the object at a later time.
223  lookupFunction: `typing.Callable`, optional
224  An optional callable function that will look up PrerequisiteInputs
225  using the DatasetType, registry, quantum dataId, and input collections
226  passed to it. If no function is specified, the default temporal spatial
227  lookup will be used.
228 
229  Notes
230  -----
231  Prerequisite inputs are used for datasets that must exist in the data
232  repository before a pipeline including this is run; they cannot be produced
233  by another task in the same pipeline.
234 
235  In exchange for this limitation, they have a number of advantages relative
236  to regular `Input` connections:
237 
238  - The query used to find them then during `QuantumGraph` generation can be
239  fully customized by providing a ``lookupFunction``.
240  - Failed searches for prerequisites during `QuantumGraph` generation will
241  usually generate more helpful diagnostics than those for regular `Input`
242  connections.
243  - The default query for prerequisite inputs relates the quantum dimensions
244  directly to the dimensions of its dataset type, without being constrained
245  by any of the other dimensions in the pipeline. This allows them to be
246  used for temporal calibration lookups (which regular `Input` connections
247  cannot do at present) and to work around `QuantumGraph` generation
248  limitations involving cases where naive spatial overlap relationships
249  between dimensions are not desired (e.g. a task that wants all detectors
250  in each visit for which the visit overlaps a tract, not just those where
251  that detector+visit combination overlaps the tract).
252 
253  """
254  lookupFunction: Optional[Callable[[DatasetType, Registry, DataCoordinate, CollectionSearch],
255  Iterable[DatasetRef]]] = None
256 
257 
258 @dataclasses.dataclass(frozen=True)
260  pass
261 
262 
263 @dataclasses.dataclass(frozen=True)
265  pass
266 
267 
268 @dataclasses.dataclass(frozen=True)
270  pass
table::Key< int > id
Definition: Detector.cc:162
def makeDatasetType(self, DimensionUniverse universe, Optional[StorageClass] parentStorageClass=None)
def makeDatasetType(self, DimensionUniverse universe, Optional[StorageClass] parentStorageClass=None)