doxygen/x_masterDoxyDoc/pipeline_8py_source.html

 # This file is part of pipe_base.

 #

 # Developed for the LSST Data Management System.

 # This product includes software developed by the LSST Project

 # (http://www.lsst.org).

 # See the COPYRIGHT file at the top-level directory of this distribution

 # for details of code ownership.

 #

 # This program is free software: you can redistribute it and/or modify

 # it under the terms of the GNU General Public License as published by

 # the Free Software Foundation, either version 3 of the License, or

 # (at your option) any later version.

 #

 # This program is distributed in the hope that it will be useful,

 # but WITHOUT ANY WARRANTY; without even the implied warranty of

 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 # GNU General Public License for more details.

 #

 # You should have received a copy of the GNU General Public License

 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

 from __future__ import annotations


 """Module defining Pipeline class and related methods.

 """


 __all__ = ["Pipeline", "TaskDef", "TaskDatasetTypes", "PipelineDatasetTypes", "LabelSpecifier"]


 # -------------------------------

 #  Imports of standard modules --

 # -------------------------------

 from dataclasses import dataclass

 from types import MappingProxyType

 from typing import Dict, Mapping, Set, Union, Generator, TYPE_CHECKING, Optional, Tuple


 import copy

 import re

 import os

 import urllib.parse

 import warnings


 # -----------------------------

 #  Imports for other modules --

 from lsst.daf.butler import DatasetType, NamedValueSet, Registry, SkyPixDimension, ButlerURI

 from lsst.utils import doImport

 from .configOverrides import ConfigOverrides

 from .connections import iterConnections

 from .pipelineTask import PipelineTask


 from . import pipelineIR

 from . import pipeTools


 if TYPE_CHECKING:  # Imports needed only for type annotations; may be circular.

     from lsst.obs.base import Instrument


 # ----------------------------------

 #  Local non-exported definitions --

 # ----------------------------------


 # ------------------------

 #  Exported definitions --

 # ------------------------


 @dataclass

 class LabelSpecifier:

     """A structure to specify a subset of labels to load


     This structure may contain a set of labels to be used in subsetting a

     pipeline, or a beginning and end point. Beginning or end may be empty,

     in which case the range will be a half open interval. Unlike python

     iteration bounds, end bounds are *INCLUDED*. Note that range based

     selection is not well defined for pipelines that are not linear in nature,

     and correct behavior is not guaranteed, or may vary from run to run.

     """

     labels: Optional[Set[str]] = None

     begin: Optional[str] = None

     end: Optional[str] = None


     def __post_init__(self):

         if self.labels is not None and (self.begin or self.end):

             raise ValueError("This struct can only be initialized with a labels set or "

                              "a begin (and/or) end specifier")


 class TaskDef:

     """TaskDef is a collection of information about task needed by Pipeline.


     The information includes task name, configuration object and optional

     task class. This class is just a collection of attributes and it exposes

     all of them so that attributes could potentially be modified in place

     (e.g. if configuration needs extra overrides).


     Attributes

     ----------

     taskName : `str`

         `PipelineTask` class name, currently it is not specified whether this

         is a fully-qualified name or partial name (e.g. ``module.TaskClass``).

         Framework should be prepared to handle all cases.

     config : `lsst.pex.config.Config`

         Instance of the configuration class corresponding to this task class,

         usually with all overrides applied. This config will be frozen.

     taskClass : `type` or ``None``

         `PipelineTask` class object, can be ``None``. If ``None`` then

         framework will have to locate and load class.

     label : `str`, optional

         Task label, usually a short string unique in a pipeline.

     """

     def __init__(self, taskName, config, taskClass=None, label=""):

         self.taskNametaskName = taskName

         config.freeze()

         self.configconfig = config

         self.taskClasstaskClass = taskClass

         self.labellabel = label

         self.connectionsconnections = config.connections.ConnectionsClass(config=config)


     @property

     def configDatasetName(self) -> str:

         """Name of a dataset type for configuration of this task (`str`)

         """

         return self.labellabel + "_config"


     @property

     def metadataDatasetName(self) -> Optional[str]:

         """Name of a dataset type for metadata of this task, `None` if

         metadata is not to be saved (`str`)

         """

         if self.configconfig.saveMetadata:

             return self.labellabel + "_metadata"

         else:

             return None


     def __str__(self):

         rep = "TaskDef(" + self.taskNametaskName

         if self.labellabel:

             rep += ", label=" + self.labellabel

         rep += ")"

         return rep


     def __eq__(self, other: object) -> bool:

         if not isinstance(other, TaskDef):

             return False

         # This does not consider equality of configs when determining equality

         # as config equality is a difficult thing to define. Should be updated

         # after DM-27847

         return self.taskClasstaskClass == other.taskClass and self.labellabel == other.label


     def __hash__(self):

         return hash((self.taskClasstaskClass, self.labellabel))


 class Pipeline:

     """A `Pipeline` is a representation of a series of tasks to run, and the

     configuration for those tasks.


     Parameters

     ----------

     description : `str`

         A description of that this pipeline does.

     """

     def __init__(self, description: str):

         pipeline_dict = {"description": description, "tasks": {}}

         self._pipelineIR_pipelineIR = pipelineIR.PipelineIR(pipeline_dict)


     @classmethod

     def fromFile(cls, filename: str) -> Pipeline:

         """Load a pipeline defined in a pipeline yaml file.


         Parameters

         ----------

         filename: `str`

            A path that points to a pipeline defined in yaml format. This

            filename may also supply additional labels to be used in

            subsetting the loaded Pipeline. These labels are separated from

            the path by a \\#, and may be specified as a comma separated

            list, or a range denoted as beginning..end. Beginning or end may

            be empty, in which case the range will be a half open interval.

            Unlike python iteration bounds, end bounds are *INCLUDED*. Note

            that range based selection is not well defined for pipelines that

            are not linear in nature, and correct behavior is not guaranteed,

            or may vary from run to run.


         Returns

         -------

         pipeline: `Pipeline`

             The pipeline loaded from specified location with appropriate (if

             any) subsetting


         Notes

         -----

         This method attempts to prune any contracts that contain labels which

         are not in the declared subset of labels. This pruning is done using a

         string based matching due to the nature of contracts and may prune more

         than it should.

         """

         return cls.from_urifrom_uri(filename)


     @classmethod

     def from_uri(cls, uri: Union[str, ButlerURI]) -> Pipeline:

         """Load a pipeline defined in a pipeline yaml file at a location

         specified by a URI.


         Parameters

         ----------

         uri: `str` or `ButlerURI`

            If a string is supplied this should be a URI path that points to a

            pipeline defined in yaml format. This uri may also supply

            additional labels to be used in subsetting the loaded Pipeline.

            These labels are separated from the path by a \\#, and may be

            specified as a comma separated list, or a range denoted as

            beginning..end. Beginning or end may be empty, in which case the

            range will be a half open interval. Unlike python iteration

            bounds, end bounds are *INCLUDED*. Note that range based selection

            is not well defined for pipelines that are not linear in nature,

            and correct behavior is not guaranteed, or may vary from run to

            run. The same specifiers can be used with a ButlerURI object, by

            being the sole contents in the fragments attribute.


         Returns

         -------

         pipeline: `Pipeline`

             The pipeline loaded from specified location with appropriate (if

             any) subsetting


         Notes

         -----

         This method attempts to prune any contracts that contain labels which

         are not in the declared subset of labels. This pruning is done using a

         string based matching due to the nature of contracts and may prune more

         than it should.

         """

         # Split up the uri and any labels that were supplied

         uri, label_specifier = cls._parse_file_specifier_parse_file_specifier(uri)

         pipeline: Pipeline = cls.fromIRfromIR(pipelineIR.PipelineIR.from_uri(uri))


         # If there are labels supplied, only keep those

         if label_specifier is not None:

             pipeline = pipeline.subsetFromLabels(label_specifier)

         return pipeline


     def subsetFromLabels(self, labelSpecifier: LabelSpecifier) -> Pipeline:

         """Subset a pipeline to contain only labels specified in labelSpecifier


         Parameters

         ----------

         labelSpecifier : `labelSpecifier`

             Object containing labels that describes how to subset a pipeline.


         Returns

         -------

         pipeline : `Pipeline`

             A new pipeline object that is a subset of the old pipeline


         Raises

         ------

         ValueError

             Raised if there is an issue with specified labels


         Notes

         -----

         This method attempts to prune any contracts that contain labels which

         are not in the declared subset of labels. This pruning is done using a

         string based matching due to the nature of contracts and may prune more

         than it should.

         """

         # Labels supplied as a set

         if labelSpecifier.labels:

             labelSet = labelSpecifier.labels

         # Labels supplied as a range, first create a list of all the labels

         # in the pipeline sorted according to task dependency. Then only

         # keep labels that lie between the supplied bounds

         else:

             # Create a copy of the pipeline to use when assessing the label

             # ordering. Use a dict for fast searching while preserving order.

             # Remove contracts so they do not fail in the expansion step. This

             # is needed because a user may only configure the tasks they intend

             # to run, which may cause some contracts to fail if they will later

             # be dropped

             pipeline = copy.deepcopy(self)

             pipeline._pipelineIR.contracts = []

             labels = {taskdef.label: True for taskdef in pipeline.toExpandedPipeline()}


             # Verify the bounds are in the labels

             if labelSpecifier.begin is not None:

                 if labelSpecifier.begin not in labels:

                     raise ValueError(f"Beginning of range subset, {labelSpecifier.begin}, not found in "

                                      "pipeline definition")

             if labelSpecifier.end is not None:

                 if labelSpecifier.end not in labels:

                     raise ValueError(f"End of range subset, {labelSpecifier.end}, not found in pipeline "

                                      "definition")


             labelSet = set()

             for label in labels:

                 if labelSpecifier.begin is not None:

                     if label != labelSpecifier.begin:

                         continue

                     else:

                         labelSpecifier.begin = None

                 labelSet.add(label)

                 if labelSpecifier.end is not None and label == labelSpecifier.end:

                     break

         return Pipeline.fromIR(self._pipelineIR_pipelineIR.subset_from_labels(labelSet))


     @staticmethod

     def _parse_file_specifier(uri: Union[str, ButlerURI]

                               ) -> Tuple[ButlerURI, Optional[LabelSpecifier]]:

         """Split appart a uri and any possible label subsets

         """

         if isinstance(uri, str):

             # This is to support legacy pipelines during transition

             uri, num_replace = re.subn("[:](?!\\/\\/)", "#", uri)

             if num_replace:

                 warnings.warn(f"The pipeline file {uri} seems to use the legacy : to separate "

                               "labels, this is deprecated and will be removed after June 2021, please use "

                               "# instead.",

                               category=FutureWarning)

             if uri.count("#") > 1:

                 raise ValueError("Only one set of labels is allowed when specifying a pipeline to load")

             uri = ButlerURI(uri)

         label_subset = uri.fragment or None


         specifier: Optional[LabelSpecifier]

         if label_subset is not None:

             label_subset = urllib.parse.unquote(label_subset)

             args: Dict[str, Union[Set[str], str, None]]

             # labels supplied as a list

             if ',' in label_subset:

                 if '..' in label_subset:

                     raise ValueError("Can only specify a list of labels or a range"

                                      "when loading a Pipline not both")

                 args = {"labels": set(label_subset.split(","))}

             # labels supplied as a range

             elif '..' in label_subset:

                 # Try to de-structure the labelSubset, this will fail if more

                 # than one range is specified

                 begin, end, *rest = label_subset.split("..")

                 if rest:

                     raise ValueError("Only one range can be specified when loading a pipeline")

                 args = {"begin": begin if begin else None, "end": end if end else None}

             # Assume anything else is a single label

             else:

                 args = {"labels": {label_subset}}


             specifier = LabelSpecifier(**args)

         else:

             specifier = None


         return uri, specifier


     @classmethod

     def fromString(cls, pipeline_string: str) -> Pipeline:

         """Create a pipeline from string formatted as a pipeline document.


         Parameters

         ----------

         pipeline_string : `str`

             A string that is formatted according like a pipeline document


         Returns

         -------

         pipeline: `Pipeline`

         """

         pipeline = cls.fromIRfromIR(pipelineIR.PipelineIR.from_string(pipeline_string))

         return pipeline


     @classmethod

     def fromIR(cls, deserialized_pipeline: pipelineIR.PipelineIR) -> Pipeline:

         """Create a pipeline from an already created `PipelineIR` object.


         Parameters

         ----------

         deserialized_pipeline: `PipelineIR`

             An already created pipeline intermediate representation object


         Returns

         -------

         pipeline: `Pipeline`

         """

         pipeline = cls.__new__(cls)

         pipeline._pipelineIR = deserialized_pipeline

         return pipeline


     @classmethod

     def fromPipeline(cls, pipeline: pipelineIR.PipelineIR) -> Pipeline:

         """Create a new pipeline by copying an already existing `Pipeline`.


         Parameters

         ----------

         pipeline: `Pipeline`

             An already created pipeline intermediate representation object


         Returns

         -------

         pipeline: `Pipeline`

         """

         return cls.fromIRfromIR(copy.deepcopy(pipeline._pipelineIR))


     def __str__(self) -> str:

         return str(self._pipelineIR_pipelineIR)


     def addInstrument(self, instrument: Union[Instrument, str]) -> None:

         """Add an instrument to the pipeline, or replace an instrument that is

         already defined.


         Parameters

         ----------

         instrument : `~lsst.daf.butler.instrument.Instrument` or `str`

             Either a derived class object of a `lsst.daf.butler.instrument` or

             a string corresponding to a fully qualified

             `lsst.daf.butler.instrument` name.

         """

         if isinstance(instrument, str):

             pass

         else:

             # TODO: assume that this is a subclass of Instrument, no type

             # checking

             instrument = f"{instrument.__module__}.{instrument.__qualname__}"

         self._pipelineIR_pipelineIR.instrument = instrument


     def getInstrument(self) -> Instrument:

         """Get the instrument from the pipeline.


         Returns

         -------

         instrument : `~lsst.daf.butler.instrument.Instrument`, `str`, or None

             A derived class object of a `lsst.daf.butler.instrument`, a string

             corresponding to a fully qualified `lsst.daf.butler.instrument`

             name, or None if the pipeline does not have an instrument.

         """

         return self._pipelineIR_pipelineIR.instrument


     def addTask(self, task: Union[PipelineTask, str], label: str) -> None:

         """Add a new task to the pipeline, or replace a task that is already

         associated with the supplied label.


         Parameters

         ----------

         task: `PipelineTask` or `str`

             Either a derived class object of a `PipelineTask` or a string

             corresponding to a fully qualified `PipelineTask` name.

         label: `str`

             A label that is used to identify the `PipelineTask` being added

         """

         if isinstance(task, str):

             taskName = task

         elif issubclass(task, PipelineTask):

             taskName = f"{task.__module__}.{task.__qualname__}"

         else:

             raise ValueError("task must be either a child class of PipelineTask or a string containing"

                              " a fully qualified name to one")

         if not label:

             # in some cases (with command line-generated pipeline) tasks can

             # be defined without label which is not acceptable, use task

             # _DefaultName in that case

             if isinstance(task, str):

                 task = doImport(task)

             label = task._DefaultName

         self._pipelineIR_pipelineIR.tasks[label] = pipelineIR.TaskIR(label, taskName)


     def removeTask(self, label: str) -> None:

         """Remove a task from the pipeline.


         Parameters

         ----------

         label : `str`

             The label used to identify the task that is to be removed


         Raises

         ------

         KeyError

             If no task with that label exists in the pipeline


         """

         self._pipelineIR_pipelineIR.tasks.pop(label)


     def addConfigOverride(self, label: str, key: str, value: object) -> None:

         """Apply single config override.


         Parameters

         ----------

         label : `str`

             Label of the task.

         key: `str`

             Fully-qualified field name.

         value : object

             Value to be given to a field.

         """

         self._addConfigImpl_addConfigImpl(label, pipelineIR.ConfigIR(rest={key: value}))


     def addConfigFile(self, label: str, filename: str) -> None:

         """Add overrides from a specified file.


         Parameters

         ----------

         label : `str`

             The label used to identify the task associated with config to

             modify

         filename : `str`

             Path to the override file.

         """

         self._addConfigImpl_addConfigImpl(label, pipelineIR.ConfigIR(file=[filename]))


     def addConfigPython(self, label: str, pythonString: str) -> None:

         """Add Overrides by running a snippet of python code against a config.


         Parameters

         ----------

         label : `str`

             The label used to identity the task associated with config to

             modify.

         pythonString: `str`

             A string which is valid python code to be executed. This is done

             with config as the only local accessible value.

         """

         self._addConfigImpl_addConfigImpl(label, pipelineIR.ConfigIR(python=pythonString))


     def _addConfigImpl(self, label: str, newConfig: pipelineIR.ConfigIR) -> None:

         if label == "parameters":

             if newConfig.rest.keys() - self._pipelineIR_pipelineIR.parameters.mapping.keys():

                 raise ValueError("Cannot override parameters that are not defined in pipeline")

             self._pipelineIR_pipelineIR.parameters.mapping.update(newConfig.rest)

             if newConfig.file:

                 raise ValueError("Setting parameters section with config file is not supported")

             if newConfig.python:

                 raise ValueError("Setting parameters section using python block in unsupported")

             return

         if label not in self._pipelineIR_pipelineIR.tasks:

             raise LookupError(f"There are no tasks labeled '{label}' in the pipeline")

         self._pipelineIR_pipelineIR.tasks[label].add_or_update_config(newConfig)


     def toFile(self, filename: str) -> None:

         self._pipelineIR_pipelineIR.to_file(filename)


     def write_to_uri(self, uri: Union[str, ButlerURI]) -> None:

         self._pipelineIR_pipelineIR.write_to_uri(uri)


     def toExpandedPipeline(self) -> Generator[TaskDef, None, None]:

         """Returns a generator of TaskDefs which can be used to create quantum

         graphs.


         Returns

         -------

         generator : generator of `TaskDef`

             The generator returned will be the sorted iterator of tasks which

             are to be used in constructing a quantum graph.


         Raises

         ------

         NotImplementedError

             If a dataId is supplied in a config block. This is in place for

             future use

         """

         taskDefs = []

         for label, taskIR in self._pipelineIR_pipelineIR.tasks.items():

             taskClass = doImport(taskIR.klass)

             taskName = taskClass.__qualname__

             config = taskClass.ConfigClass()

             overrides = ConfigOverrides()

             if self._pipelineIR_pipelineIR.instrument is not None:

                 overrides.addInstrumentOverride(self._pipelineIR_pipelineIR.instrument, taskClass._DefaultName)

             if taskIR.config is not None:

                 for configIR in (configIr.formatted(self._pipelineIR_pipelineIR.parameters)

                                  for configIr in taskIR.config):

                     if configIR.dataId is not None:

                         raise NotImplementedError("Specializing a config on a partial data id is not yet "

                                                   "supported in Pipeline definition")

                     # only apply override if it applies to everything

                     if configIR.dataId is None:

                         if configIR.file:

                             for configFile in configIR.file:

                                 overrides.addFileOverride(os.path.expandvars(configFile))

                         if configIR.python is not None:

                             overrides.addPythonOverride(configIR.python)

                         for key, value in configIR.rest.items():

                             overrides.addValueOverride(key, value)

             overrides.applyTo(config)

             # This may need to be revisited

             config.validate()

             taskDefs.append(TaskDef(taskName=taskName, config=config, taskClass=taskClass, label=label))


         # lets evaluate the contracts

         if self._pipelineIR_pipelineIR.contracts is not None:

             label_to_config = {x.label: x.config for x in taskDefs}

             for contract in self._pipelineIR_pipelineIR.contracts:

                 # execute this in its own line so it can raise a good error

                 # message if there was problems with the eval

                 success = eval(contract.contract, None, label_to_config)

                 if not success:

                     extra_info = f": {contract.msg}" if contract.msg is not None else ""

                     raise pipelineIR.ContractError(f"Contract(s) '{contract.contract}' were not "

                                                    f"satisfied{extra_info}")


         yield from pipeTools.orderPipeline(taskDefs)


     def __len__(self):

         return len(self._pipelineIR_pipelineIR.tasks)


     def __eq__(self, other: object):

         if not isinstance(other, Pipeline):

             return False

         return self._pipelineIR_pipelineIR == other._pipelineIR


 @dataclass(frozen=True)

 class TaskDatasetTypes:

     """An immutable struct that extracts and classifies the dataset types used

     by a `PipelineTask`

     """


     initInputs: NamedValueSet[DatasetType]

     """Dataset types that are needed as inputs in order to construct this Task.


     Task-level `initInputs` may be classified as either

     `~PipelineDatasetTypes.initInputs` or

     `~PipelineDatasetTypes.initIntermediates` at the Pipeline level.

     """


     initOutputs: NamedValueSet[DatasetType]

     """Dataset types that may be written after constructing this Task.


     Task-level `initOutputs` may be classified as either

     `~PipelineDatasetTypes.initOutputs` or

     `~PipelineDatasetTypes.initIntermediates` at the Pipeline level.

     """


     inputs: NamedValueSet[DatasetType]

     """Dataset types that are regular inputs to this Task.


     If an input dataset needed for a Quantum cannot be found in the input

     collection(s) or produced by another Task in the Pipeline, that Quantum

     (and all dependent Quanta) will not be produced.


     Task-level `inputs` may be classified as either

     `~PipelineDatasetTypes.inputs` or `~PipelineDatasetTypes.intermediates`

     at the Pipeline level.

     """


     prerequisites: NamedValueSet[DatasetType]

     """Dataset types that are prerequisite inputs to this Task.


     Prerequisite inputs must exist in the input collection(s) before the

     pipeline is run, but do not constrain the graph - if a prerequisite is

     missing for a Quantum, `PrerequisiteMissingError` is raised.


     Prerequisite inputs are not resolved until the second stage of

     QuantumGraph generation.

     """


     outputs: NamedValueSet[DatasetType]

     """Dataset types that are produced by this Task.


     Task-level `outputs` may be classified as either

     `~PipelineDatasetTypes.outputs` or `~PipelineDatasetTypes.intermediates`

     at the Pipeline level.

     """


     @classmethod

     def fromTaskDef(cls, taskDef: TaskDef, *, registry: Registry) -> TaskDatasetTypes:

         """Extract and classify the dataset types from a single `PipelineTask`.


         Parameters

         ----------

         taskDef: `TaskDef`

             An instance of a `TaskDef` class for a particular `PipelineTask`.

         registry: `Registry`

             Registry used to construct normalized `DatasetType` objects and

             retrieve those that are incomplete.


         Returns

         -------

         types: `TaskDatasetTypes`

             The dataset types used by this task.

         """

         def makeDatasetTypesSet(connectionType: str, freeze: bool = True) -> NamedValueSet[DatasetType]:

             """Constructs a set of true `DatasetType` objects


             Parameters

             ----------

             connectionType : `str`

                 Name of the connection type to produce a set for, corresponds

                 to an attribute of type `list` on the connection class instance

             freeze : `bool`, optional

                 If `True`, call `NamedValueSet.freeze` on the object returned.


             Returns

             -------

             datasetTypes : `NamedValueSet`

                 A set of all datasetTypes which correspond to the input

                 connection type specified in the connection class of this

                 `PipelineTask`


             Notes

             -----

             This function is a closure over the variables ``registry`` and

             ``taskDef``.

             """

             datasetTypes = NamedValueSet()

             for c in iterConnections(taskDef.connections, connectionType):

                 dimensions = set(getattr(c, 'dimensions', set()))

                 if "skypix" in dimensions:

                     try:

                         datasetType = registry.getDatasetType(c.name)

                     except LookupError as err:

                         raise LookupError(

                             f"DatasetType '{c.name}' referenced by "

                             f"{type(taskDef.connections).__name__} uses 'skypix' as a dimension "

                             f"placeholder, but does not already exist in the registry.  "

                             f"Note that reference catalog names are now used as the dataset "

                             f"type name instead of 'ref_cat'."

                         ) from err

                     rest1 = set(registry.dimensions.extract(dimensions - set(["skypix"])).names)

                     rest2 = set(dim.name for dim in datasetType.dimensions

                                 if not isinstance(dim, SkyPixDimension))

                     if rest1 != rest2:

                         raise ValueError(f"Non-skypix dimensions for dataset type {c.name} declared in "

                                          f"connections ({rest1}) are inconsistent with those in "

                                          f"registry's version of this dataset ({rest2}).")

                 else:

                     # Component dataset types are not explicitly in the

                     # registry.  This complicates consistency checks with

                     # registry and requires we work out the composite storage

                     # class.

                     registryDatasetType = None

                     try:

                         registryDatasetType = registry.getDatasetType(c.name)

                     except KeyError:

                         compositeName, componentName = DatasetType.splitDatasetTypeName(c.name)

                         parentStorageClass = DatasetType.PlaceholderParentStorageClass \

                             if componentName else None

                         datasetType = c.makeDatasetType(

                             registry.dimensions,

                             parentStorageClass=parentStorageClass

                         )

                         registryDatasetType = datasetType

                     else:

                         datasetType = c.makeDatasetType(

                             registry.dimensions,

                             parentStorageClass=registryDatasetType.parentStorageClass

                         )


                     if registryDatasetType and datasetType != registryDatasetType:

                         raise ValueError(f"Supplied dataset type ({datasetType}) inconsistent with "

                                          f"registry definition ({registryDatasetType}) "

                                          f"for {taskDef.label}.")

                 datasetTypes.add(datasetType)

             if freeze:

                 datasetTypes.freeze()

             return datasetTypes


         # optionally add output dataset for metadata

         outputs = makeDatasetTypesSet("outputs", freeze=False)

         if taskDef.metadataDatasetName is not None:

             # Metadata is supposed to be of the PropertySet type, its

             # dimensions correspond to a task quantum

             dimensions = registry.dimensions.extract(taskDef.connections.dimensions)

             outputs |= {DatasetType(taskDef.metadataDatasetName, dimensions, "PropertySet")}

         outputs.freeze()


         return cls(

             initInputs=makeDatasetTypesSet("initInputs"),

             initOutputs=makeDatasetTypesSet("initOutputs"),

             inputs=makeDatasetTypesSet("inputs"),

             prerequisites=makeDatasetTypesSet("prerequisiteInputs"),

             outputs=outputs,

         )


 @dataclass(frozen=True)

 class PipelineDatasetTypes:

     """An immutable struct that classifies the dataset types used in a

     `Pipeline`.

     """


     initInputs: NamedValueSet[DatasetType]

     """Dataset types that are needed as inputs in order to construct the Tasks

     in this Pipeline.


     This does not include dataset types that are produced when constructing

     other Tasks in the Pipeline (these are classified as `initIntermediates`).

     """


     initOutputs: NamedValueSet[DatasetType]

     """Dataset types that may be written after constructing the Tasks in this

     Pipeline.


     This does not include dataset types that are also used as inputs when

     constructing other Tasks in the Pipeline (these are classified as

     `initIntermediates`).

     """


     initIntermediates: NamedValueSet[DatasetType]

     """Dataset types that are both used when constructing one or more Tasks

     in the Pipeline and produced as a side-effect of constructing another

     Task in the Pipeline.

     """


     inputs: NamedValueSet[DatasetType]

     """Dataset types that are regular inputs for the full pipeline.


     If an input dataset needed for a Quantum cannot be found in the input

     collection(s), that Quantum (and all dependent Quanta) will not be

     produced.

     """


     prerequisites: NamedValueSet[DatasetType]

     """Dataset types that are prerequisite inputs for the full Pipeline.


     Prerequisite inputs must exist in the input collection(s) before the

     pipeline is run, but do not constrain the graph - if a prerequisite is

     missing for a Quantum, `PrerequisiteMissingError` is raised.


     Prerequisite inputs are not resolved until the second stage of

     QuantumGraph generation.

     """


     intermediates: NamedValueSet[DatasetType]

     """Dataset types that are output by one Task in the Pipeline and consumed

     as inputs by one or more other Tasks in the Pipeline.

     """


     outputs: NamedValueSet[DatasetType]

     """Dataset types that are output by a Task in the Pipeline and not consumed

     by any other Task in the Pipeline.

     """


     byTask: Mapping[str, TaskDatasetTypes]

     """Per-Task dataset types, keyed by label in the `Pipeline`.


     This is guaranteed to be zip-iterable with the `Pipeline` itself (assuming

     neither has been modified since the dataset types were extracted, of

     course).

     """


     @classmethod

     def fromPipeline(cls, pipeline, *, registry: Registry) -> PipelineDatasetTypes:

         """Extract and classify the dataset types from all tasks in a

         `Pipeline`.


         Parameters

         ----------

         pipeline: `Pipeline`

             An ordered collection of tasks that can be run together.

         registry: `Registry`

             Registry used to construct normalized `DatasetType` objects and

             retrieve those that are incomplete.


         Returns

         -------

         types: `PipelineDatasetTypes`

             The dataset types used by this `Pipeline`.


         Raises

         ------

         ValueError

             Raised if Tasks are inconsistent about which datasets are marked

             prerequisite.  This indicates that the Tasks cannot be run as part

             of the same `Pipeline`.

         """

         allInputs = NamedValueSet()

         allOutputs = NamedValueSet()

         allInitInputs = NamedValueSet()

         allInitOutputs = NamedValueSet()

         prerequisites = NamedValueSet()

         byTask = dict()

         if isinstance(pipeline, Pipeline):

             pipeline = pipeline.toExpandedPipeline()

         for taskDef in pipeline:

             thisTask = TaskDatasetTypes.fromTaskDef(taskDef, registry=registry)

             allInitInputs |= thisTask.initInputs

             allInitOutputs |= thisTask.initOutputs

             allInputs |= thisTask.inputs

             prerequisites |= thisTask.prerequisites

             allOutputs |= thisTask.outputs

             byTask[taskDef.label] = thisTask

         if not prerequisites.isdisjoint(allInputs):

             raise ValueError("{} marked as both prerequisites and regular inputs".format(

                 {dt.name for dt in allInputs & prerequisites}

             ))

         if not prerequisites.isdisjoint(allOutputs):

             raise ValueError("{} marked as both prerequisites and outputs".format(

                 {dt.name for dt in allOutputs & prerequisites}

             ))

         # Make sure that components which are marked as inputs get treated as

         # intermediates if there is an output which produces the composite

         # containing the component

         intermediateComponents = NamedValueSet()

         intermediateComposites = NamedValueSet()

         outputNameMapping = {dsType.name: dsType for dsType in allOutputs}

         for dsType in allInputs:

             # get the name of a possible component

             name, component = dsType.nameAndComponent()

             # if there is a component name, that means this is a component

             # DatasetType, if there is an output which produces the parent of

             # this component, treat this input as an intermediate

             if component is not None:

                 if name in outputNameMapping:

                     if outputNameMapping[name].dimensions != dsType.dimensions:

                         raise ValueError(f"Component dataset type {dsType.name} has different "

                                          f"dimensions ({dsType.dimensions}) than its parent "

                                          f"({outputNameMapping[name].dimensions}).")

                     composite = DatasetType(name, dsType.dimensions, outputNameMapping[name].storageClass,

                                             universe=registry.dimensions)

                     intermediateComponents.add(dsType)

                     intermediateComposites.add(composite)


         def checkConsistency(a: NamedValueSet, b: NamedValueSet):

             common = a.names & b.names

             for name in common:

                 if a[name] != b[name]:

                     raise ValueError(f"Conflicting definitions for dataset type: {a[name]} != {b[name]}.")


         checkConsistency(allInitInputs, allInitOutputs)

         checkConsistency(allInputs, allOutputs)

         checkConsistency(allInputs, intermediateComposites)

         checkConsistency(allOutputs, intermediateComposites)


         def frozen(s: NamedValueSet) -> NamedValueSet:

             s.freeze()

             return s


         return cls(

             initInputs=frozen(allInitInputs - allInitOutputs),

             initIntermediates=frozen(allInitInputs & allInitOutputs),

             initOutputs=frozen(allInitOutputs - allInitInputs),

             inputs=frozen(allInputs - allOutputs - intermediateComponents),

             intermediates=frozen(allInputs & allOutputs | intermediateComponents),

             outputs=frozen(allOutputs - allInputs - intermediateComposites),

             prerequisites=frozen(prerequisites),

             byTask=MappingProxyType(byTask),  # MappingProxyType -> frozen view of dict for immutability

         )

lsst.pipe.base.configOverrides.ConfigOverrides
Definition: configOverrides.py:121

lsst.pipe.base.pipeline.LabelSpecifier
Definition: pipeline.py:65

lsst.pipe.base.pipeline.LabelSpecifier.__post_init__
def __post_init__(self)
Definition: pipeline.py:79

lsst.pipe.base.pipeline.PipelineDatasetTypes
Definition: pipeline.py:769

lsst.pipe.base.pipeline.PipelineDatasetTypes.fromPipeline
PipelineDatasetTypes fromPipeline(cls, pipeline, *Registry registry)
Definition: pipeline.py:835

lsst.pipe.base.pipeline.Pipeline
Definition: pipeline.py:151

lsst.pipe.base.pipeline.Pipeline.__len__
def __len__(self)
Definition: pipeline.py:595

lsst.pipe.base.pipeline.Pipeline.toFile
None toFile(self, str filename)
Definition: pipeline.py:531

lsst.pipe.base.pipeline.Pipeline.__init__
def __init__(self, str description)
Definition: pipeline.py:160

lsst.pipe.base.pipeline.Pipeline.subsetFromLabels
Pipeline subsetFromLabels(self, LabelSpecifier labelSpecifier)
Definition: pipeline.py:240

lsst.pipe.base.pipeline.Pipeline._addConfigImpl
None _addConfigImpl(self, str label, pipelineIR.ConfigIR newConfig)
Definition: pipeline.py:517

lsst.pipe.base.pipeline.Pipeline.addInstrument
None addInstrument(self, Union[Instrument, str] instrument)
Definition: pipeline.py:401

lsst.pipe.base.pipeline.Pipeline.from_uri
Pipeline from_uri(cls, Union[str, ButlerURI] uri)
Definition: pipeline.py:198

lsst.pipe.base.pipeline.Pipeline.addTask
None addTask(self, Union[PipelineTask, str] task, str label)
Definition: pipeline.py:432

lsst.pipe.base.pipeline.Pipeline.addConfigFile
None addConfigFile(self, str label, str filename)
Definition: pipeline.py:490

lsst.pipe.base.pipeline.Pipeline._pipelineIR
_pipelineIR
Definition: pipeline.py:162

lsst.pipe.base.pipeline.Pipeline.fromPipeline
Pipeline fromPipeline(cls, pipelineIR.PipelineIR pipeline)
Definition: pipeline.py:384

lsst.pipe.base.pipeline.Pipeline.addConfigOverride
None addConfigOverride(self, str label, str key, object value)
Definition: pipeline.py:476

lsst.pipe.base.pipeline.Pipeline.toExpandedPipeline
Generator[TaskDef, None, None] toExpandedPipeline(self)
Definition: pipeline.py:537

lsst.pipe.base.pipeline.Pipeline.addConfigPython
None addConfigPython(self, str label, str pythonString)
Definition: pipeline.py:503

lsst.pipe.base.pipeline.Pipeline.write_to_uri
None write_to_uri(self, Union[str, ButlerURI] uri)
Definition: pipeline.py:534

lsst.pipe.base.pipeline.Pipeline.fromString
Pipeline fromString(cls, str pipeline_string)
Definition: pipeline.py:351

lsst.pipe.base.pipeline.Pipeline.__eq__
def __eq__(self, object other)
Definition: pipeline.py:598

lsst.pipe.base.pipeline.Pipeline.fromFile
Pipeline fromFile(cls, str filename)
Definition: pipeline.py:165

lsst.pipe.base.pipeline.Pipeline.removeTask
None removeTask(self, str label)
Definition: pipeline.py:460

lsst.pipe.base.pipeline.Pipeline.getInstrument
Instrument getInstrument(self)
Definition: pipeline.py:420

lsst.pipe.base.pipeline.Pipeline.fromIR
Pipeline fromIR(cls, pipelineIR.PipelineIR deserialized_pipeline)
Definition: pipeline.py:367

lsst.pipe.base.pipeline.Pipeline._parse_file_specifier
Tuple[ButlerURI, Optional[LabelSpecifier]] _parse_file_specifier(Union[str, ButlerURI] uri)
Definition: pipeline.py:306

lsst.pipe.base.pipeline.Pipeline.__str__
str __str__(self)
Definition: pipeline.py:398

lsst.pipe.base.pipeline.TaskDatasetTypes
Definition: pipeline.py:605

lsst.pipe.base.pipeline.TaskDatasetTypes.fromTaskDef
TaskDatasetTypes fromTaskDef(cls, TaskDef taskDef, *Registry registry)
Definition: pipeline.py:658

lsst.pipe.base.pipeline.TaskDef
Definition: pipeline.py:85

lsst.pipe.base.pipeline.TaskDef.config
config
Definition: pipeline.py:111

lsst.pipe.base.pipeline.TaskDef.__str__
def __str__(self)
Definition: pipeline.py:132

lsst.pipe.base.pipeline.TaskDef.label
label
Definition: pipeline.py:113

lsst.pipe.base.pipeline.TaskDef.__init__
def __init__(self, taskName, config, taskClass=None, label="")
Definition: pipeline.py:108

lsst.pipe.base.pipeline.TaskDef.metadataDatasetName
Optional[str] metadataDatasetName(self)
Definition: pipeline.py:123

lsst.pipe.base.pipeline.TaskDef.configDatasetName
str configDatasetName(self)
Definition: pipeline.py:117

lsst.pipe.base.pipeline.TaskDef.__hash__
def __hash__(self)
Definition: pipeline.py:147

lsst.pipe.base.pipeline.TaskDef.__eq__
bool __eq__(self, object other)
Definition: pipeline.py:139

lsst.pipe.base.pipeline.TaskDef.taskName
taskName
Definition: pipeline.py:109

lsst.pipe.base.pipeline.TaskDef.taskClass
taskClass
Definition: pipeline.py:112

lsst.pipe.base.pipeline.TaskDef.connections
connections
Definition: pipeline.py:114

lsst.pipe.base.pipelineIR.ConfigIR
Definition: pipelineIR.py:211

lsst.pipe.base.pipelineIR.ContractError
Definition: pipelineIR.py:66

lsst.pipe.base.pipelineIR.PipelineIR
Definition: pipelineIR.py:459

lsst.pipe.base.pipelineIR.TaskIR
Definition: pipelineIR.py:327

set
daf::base::PropertySet * set
Definition: fits.cc:912

lsst.obs.base

lsst.pex.config.config.doImport
doImport
Definition: config.py:48

lsst.pex.config.history.format
def format(config, name=None, writeSourceLine=True, prefix="", verbose=False)
Definition: history.py:174

lsst.pipe.base.connections.iterConnections
typing.Generator[BaseConnection, None, None] iterConnections(PipelineTaskConnections connections, Union[str, Iterable[str]] connectionType)
Definition: connections.py:503

lsst::utils
Definition: Backtrace.h:29