doxygen/xlink_master_2021_04_01_08.31.19/pipeline_i_r_8py_source.html

 # This file is part of pipe_base.

 #

 # Developed for the LSST Data Management System.

 # This product includes software developed by the LSST Project

 # (http://www.lsst.org).

 # See the COPYRIGHT file at the top-level directory of this distribution

 # for details of code ownership.

 #

 # This program is free software: you can redistribute it and/or modify

 # it under the terms of the GNU General Public License as published by

 # the Free Software Foundation, either version 3 of the License, or

 # (at your option) any later version.

 #

 # This program is distributed in the hope that it will be useful,

 # but WITHOUT ANY WARRANTY; without even the implied warranty of

 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 # GNU General Public License for more details.

 #

 # You should have received a copy of the GNU General Public License

 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

 from __future__ import annotations


 __all__ = ("ConfigIR", "ContractError", "ContractIR", "ImportIR", "PipelineIR", "TaskIR", "LabeledSubset")


 from collections import Counter

 from collections.abc import Iterable as abcIterable

 from dataclasses import dataclass, field

 from deprecated.sphinx import deprecated

 from typing import Any, List, Set, Union, Generator, MutableMapping, Optional, Dict, Type


 import copy

 import re

 import os

 import yaml

 import warnings


 from lsst.daf.butler import ButlerURI


 class KeepInstrument:

     pass


 class PipelineYamlLoader(yaml.SafeLoader):

     """This is a specialized version of yaml's SafeLoader. It checks and raises

     an exception if it finds that there are multiple instances of the same key

     found inside a pipeline file at a given scope.

     """

     def construct_mapping(self, node, deep=False):

         # do the call to super first so that it can do all the other forms of

         # checking on this node. If you check the uniqueness of keys first

         # it would save the work that super does in the case of a failure, but

         # it might fail in the case that the node was the incorrect node due

         # to a parsing error, and the resulting exception would be difficult to

         # understand.

         mapping = super().construct_mapping(node, deep)

         # Check if there are any duplicate keys

         all_keys = Counter(key_node.value for key_node, _ in node.value)

         duplicates = {k for k, i in all_keys.items() if i != 1}

         if duplicates:

             raise KeyError("Pipeline files must not have duplicated keys, "

                            f"{duplicates} appeared multiple times")

         return mapping


 class ContractError(Exception):

     """An exception that is raised when a pipeline contract is not satisfied

     """

     pass


 @dataclass

 class ContractIR:

     """Intermediate representation of contracts read from a pipeline yaml file.

     """

     contract: str

     """A string of python code representing one or more conditions on configs

     in a pipeline. This code-as-string should, once evaluated, should be True

     if the configs are fine, and False otherwise.

     """

     msg: Union[str, None] = None

     """An optional message to be shown to the user if a contract fails

     """


     def to_primitives(self) -> Dict[str, str]:

         """Convert to a representation used in yaml serialization

         """

         accumulate = {"contract": self.contractcontract}

         if self.msgmsg is not None:

             accumulate['msg'] = self.msgmsg

         return accumulate


     def __eq__(self, other: object):

         if not isinstance(other, ContractIR):

             return False

         elif self.contractcontract == other.contract and self.msgmsg == other.msg:

             return True

         else:

             return False


 @dataclass

 class LabeledSubset:

     """Intermediate representation of named subset of task labels read from

     a pipeline yaml file.

     """

     label: str

     """The label used to identify the subset of task labels.

     """

     subset: Set[str]

     """A set of task labels contained in this subset.

     """

     description: Optional[str]

     """A description of what this subset of tasks is intended to do

     """


     @staticmethod

     def from_primitives(label: str, value: Union[List[str], dict]) -> LabeledSubset:

         """Generate `LabeledSubset` objects given a properly formatted object

         that as been created by a yaml loader.


         Parameters

         ----------

         label : `str`

             The label that will be used to identify this labeled subset.

         value : `list` of `str` or `dict`

             Object returned from loading a labeled subset section from a yaml

             document.


         Returns

         -------

         labeledSubset : `LabeledSubset`

             A `LabeledSubset` object build from the inputs.


         Raises

         ------

         ValueError

             Raised if the value input is not properly formatted for parsing

         """

         if isinstance(value, MutableMapping):

             subset = value.pop("subset", None)

             if subset is None:

                 raise ValueError("If a labeled subset is specified as a mapping, it must contain the key "

                                  "'subset'")

             description = value.pop("description", None)

         elif isinstance(value, abcIterable):

             subset = value

             description = None

         else:

             raise ValueError(f"There was a problem parsing the labeled subset {label}, make sure the "

                              "definition is either a valid yaml list, or a mapping with keys "

                              "(subset, description) where subset points to a yaml list, and description is "

                              "associated with a string")

         return LabeledSubset(label, set(subset), description)


     def to_primitives(self) -> Dict[str, Union[List[str], str]]:

         """Convert to a representation used in yaml serialization

         """

         accumulate: Dict[str, Union[List[str], str]] = {"subset": list(self.subset)}

         if self.description is not None:

             accumulate["description"] = self.description

         return accumulate


 @dataclass

 class ParametersIR:

     """Intermediate representation of parameters that are global to a pipeline


     These parameters are specified under a top level key named `parameters`

     and are declared as a yaml mapping. These entries can then be used inside

     task configuration blocks to specify configuration values. They may not be

     used in the special ``file`` or ``python`` blocks.


     Example:

     paramters:

       shared_value: 14

     tasks:

       taskA:

         class: modA

         config:

           field1: parameters.shared_value

       taskB:

         class: modB

         config:

           field2: parameters.shared_value

     """

     mapping: MutableMapping[str, str]

     """A mutable mapping of identifiers as keys, and shared configuration

     as values.

     """

     def update(self, other: Optional[ParametersIR]):

         if other is not None:

             self.mapping.update(other.mapping)


     def to_primitives(self) -> MutableMapping[str, str]:

         """Convert to a representation used in yaml serialization

         """

         return self.mapping


     def __contains__(self, value: str) -> bool:

         return value in self.mapping


     def __getitem__(self, item: str) -> Any:

         return self.mapping[item]


     def __bool__(self) -> bool:

         return bool(self.mapping)


 @dataclass

 class ConfigIR:

     """Intermediate representation of configurations read from a pipeline yaml

     file.

     """

     python: Union[str, None] = None

     """A string of python code that is used to modify a configuration. This can

     also be None if there are no modifications to do.

     """

     dataId: Union[dict, None] = None

     """A dataId that is used to constrain these config overrides to only quanta

     with matching dataIds. This field can be None if there is no constraint.

     This is currently an unimplemented feature, and is placed here for future

     use.

     """

     file: List[str] = field(default_factory=list)

     """A list of paths which points to a file containing config overrides to be

     applied. This value may be an empty list if there are no overrides to

     apply.

     """

     rest: dict = field(default_factory=dict)

     """This is a dictionary of key value pairs, where the keys are strings

     corresponding to qualified fields on a config to override, and the values

     are strings representing the values to apply.

     """


     def to_primitives(self) -> Dict[str, Union[str, dict, List[str]]]:

         """Convert to a representation used in yaml serialization

         """

         accumulate = {}

         for name in ("python", "dataId", "file"):

             # if this attribute is thruthy add it to the accumulation

             # dictionary

             if getattr(self, name):

                 accumulate[name] = getattr(self, name)

         # Add the dictionary containing the rest of the config keys to the

         # # accumulated dictionary

         accumulate.update(self.rest)

         return accumulate


     def formatted(self, parameters: ParametersIR) -> ConfigIR:

         """Returns a new ConfigIR object that is formatted according to the

         specified parameters


         Parameters

         ----------

         parameters : ParametersIR

             Object that contains variable mappings used in substitution.


         Returns

         -------

         config : ConfigIR

             A new ConfigIR object formatted with the input parameters

         """

         new_config = copy.deepcopy(self)

         for key, value in new_config.rest.items():

             if not isinstance(value, str):

                 continue

             match = re.match("parameters[.](.*)", value)

             if match and match.group(1) in parameters:

                 new_config.rest[key] = parameters[match.group(1)]

             if match and match.group(1) not in parameters:

                 warnings.warn(f"config {key} contains value {match.group(0)} which is formatted like a "

                               "Pipeline parameter but was not found within the Pipeline, if this was not "

                               "intentional, check for a typo")

         return new_config


     def maybe_merge(self, other_config: "ConfigIR") -> Generator["ConfigIR", None, None]:

         """Merges another instance of a `ConfigIR` into this instance if

         possible. This function returns a generator that is either self

         if the configs were merged, or self, and other_config if that could

         not be merged.


         Parameters

         ----------

         other_config : `ConfigIR`

             An instance of `ConfigIR` to merge into this instance.


         Returns

         -------

         Generator : `ConfigIR`

             A generator containing either self, or self and other_config if

             the configs could be merged or not respectively.

         """

         # Verify that the config blocks can be merged

         if self.dataId != other_config.dataId or self.python or other_config.python or\

                 self.filefile or other_config.file:

             yield from (self, other_config)

             return


         # create a set of all keys, and verify two keys do not have different

         # values

         key_union = self.rest.keys() & other_config.rest.keys()

         for key in key_union:

             if self.rest[key] != other_config.rest[key]:

                 yield from (self, other_config)

                 return

         self.rest.update(other_config.rest)


         # Combine the lists of override files to load

         self_file_set = set(self.filefile)

         other_file_set = set(other_config.file)

         self.filefile = list(self_file_set.union(other_file_set))


         yield self


     def __eq__(self, other: object):

         if not isinstance(other, ConfigIR):

             return False

         elif all(getattr(self, attr) == getattr(other, attr) for attr in

                  ("python", "dataId", "file", "rest")):

             return True

         else:

             return False


 @dataclass

 class TaskIR:

     """Intermediate representation of tasks read from a pipeline yaml file.

     """

     label: str

     """An identifier used to refer to a task.

     """

     klass: str

     """A string containing a fully qualified python class to be run in a

     pipeline.

     """

     config: Union[List[ConfigIR], None] = None

     """List of all configs overrides associated with this task, and may be

     `None` if there are no config overrides.

     """


     def to_primitives(self) -> Dict[str, Union[str, List[dict]]]:

         """Convert to a representation used in yaml serialization

         """

         accumulate: Dict[str, Union[str, List[dict]]] = {'class': self.klass}

         if self.configconfig:

             accumulate['config'] = [c.to_primitives() for c in self.configconfig]

         return accumulate


     def add_or_update_config(self, other_config: ConfigIR):

         """Adds a `ConfigIR` to this task if one is not present. Merges configs

         if there is a `ConfigIR` present and the dataId keys of both configs

         match, otherwise adds a new entry to the config list. The exception to

         the above is that if either the last config or other_config has a

         python block, then other_config is always added, as python blocks can

         modify configs in ways that cannot be predicted.


         Parameters

         ----------

         other_config : `ConfigIR`

             A `ConfigIR` instance to add or merge into the config attribute of

             this task.

         """

         if not self.configconfig:

             self.configconfig = [other_config]

             return

         self.configconfig.extend(self.configconfig.pop().maybe_merge(other_config))


     def __eq__(self, other: object):

         if not isinstance(other, TaskIR):

             return False

         elif all(getattr(self, attr) == getattr(other, attr) for attr in

                  ("label", "klass", "config")):

             return True

         else:

             return False


 @dataclass

 class ImportIR:

     """An intermediate representation of imported pipelines

     """

     location: str

     """This is the location of the pipeline to inherit. The path should be

     specified as an absolute path. Environment variables may be used in the

     path and should be specified as a python string template, with the name of

     the environment variable inside braces.

     """

     include: Union[List[str], None] = None

     """List of tasks that should be included when inheriting this pipeline.

     Either the include or exclude attributes may be specified, but not both.

     """

     exclude: Union[List[str], None] = None

     """List of tasks that should be excluded when inheriting this pipeline.

     Either the include or exclude attributes may be specified, but not both.

     """

     importContracts: bool = True

     """Boolean attribute to dictate if contracts should be inherited with the

     pipeline or not.

     """

     instrument: Union[Type[KeepInstrument], str, None] = KeepInstrument

     """Instrument to assign to the Pipeline at import. The default value of

     KEEP_INSTRUMENT indicates that whatever instrument the pipeline is declared

     with will not be modified. Setting this value to None will drop any

     declared instrument prior to import.

     """


     def toPipelineIR(self) -> "PipelineIR":

         """Load in the Pipeline specified by this object, and turn it into a

         PipelineIR instance.


         Returns

         -------

         pipeline : `PipelineIR`

             A pipeline generated from the imported pipeline file

         """

         if self.include and self.exclude:

             raise ValueError("Both an include and an exclude list cant be specified"

                              " when declaring a pipeline import")

         tmp_pipeline = PipelineIR.from_uri(os.path.expandvars(self.location))

         if self.instrument is not KeepInstrument:

             tmp_pipeline.instrument = self.instrument


         included_labels = set()

         for label in tmp_pipeline.tasks:

             if (self.include and label in self.include) or (self.exclude and label not in self.exclude)\

                     or (self.include is None and self.exclude is None):

                 included_labels.add(label)


         # Handle labeled subsets being specified in the include or exclude

         # list, adding or removing labels.

         if self.include is not None:

             subsets_in_include = tmp_pipeline.labeled_subsets.keys() & self.include

             for label in subsets_in_include:

                 included_labels.update(tmp_pipeline.labeled_subsets[label].subset)


         elif self.exclude is not None:

             subsets_in_exclude = tmp_pipeline.labeled_subsets.keys() & self.exclude

             for label in subsets_in_exclude:

                 included_labels.difference_update(tmp_pipeline.labeled_subsets[label].subset)


         tmp_pipeline = tmp_pipeline.subset_from_labels(included_labels)


         if not self.importContracts:

             tmp_pipeline.contracts = []


         return tmp_pipeline


     def __eq__(self, other: object):

         if not isinstance(other, ImportIR):

             return False

         elif all(getattr(self, attr) == getattr(other, attr) for attr in

                  ("location", "include", "exclude", "importContracts")):

             return True

         else:

             return False


 class PipelineIR:

     """Intermediate representation of a pipeline definition


     Parameters

     ----------

     loaded_yaml : `dict`

         A dictionary which matches the structure that would be produced by a

         yaml reader which parses a pipeline definition document


     Raises

     ------

     ValueError :

         - If a pipeline is declared without a description

         - If no tasks are declared in a pipeline, and no pipelines are to be

           inherited

         - If more than one instrument is specified

         - If more than one inherited pipeline share a label

     """

     def __init__(self, loaded_yaml):

         # Check required fields are present

         if "description" not in loaded_yaml:

             raise ValueError("A pipeline must be declared with a description")

         if "tasks" not in loaded_yaml and len({"imports", "inherits"} - loaded_yaml.keys()) == 2:

             raise ValueError("A pipeline must be declared with one or more tasks")


         # These steps below must happen in this call order


         # Process pipeline description

         self.descriptiondescription = loaded_yaml.pop("description")


         # Process tasks

         self._read_tasks_read_tasks(loaded_yaml)


         # Process instrument keys

         inst = loaded_yaml.pop("instrument", None)

         if isinstance(inst, list):

             raise ValueError("Only one top level instrument can be defined in a pipeline")

         self.instrumentinstrument = inst


         # Process any contracts

         self._read_contracts_read_contracts(loaded_yaml)


         # Process any defined parameters

         self._read_parameters_read_parameters(loaded_yaml)


         # Process any named label subsets

         self._read_labeled_subsets_read_labeled_subsets(loaded_yaml)


         # Process any inherited pipelines

         self._read_imports_read_imports(loaded_yaml)


         # verify named subsets, must be done after inheriting

         self._verify_labeled_subsets_verify_labeled_subsets()


     def _read_contracts(self, loaded_yaml):

         """Process the contracts portion of the loaded yaml document


         Parameters

         ---------

         loaded_yaml : `dict`

             A dictionary which matches the structure that would be produced by

             a yaml reader which parses a pipeline definition document

         """

         loaded_contracts = loaded_yaml.pop("contracts", [])

         if isinstance(loaded_contracts, str):

             loaded_contracts = [loaded_contracts]

         self.contractscontracts = []

         for contract in loaded_contracts:

             if isinstance(contract, dict):

                 self.contractscontracts.append(ContractIR(**contract))

             if isinstance(contract, str):

                 self.contractscontracts.append(ContractIR(contract=contract))


     def _read_parameters(self, loaded_yaml):

         """Process the parameters portion of the loaded yaml document


         Parameters

         ---------

         loaded_yaml : `dict`

             A dictionary which matches the structure that would be produced by

             a yaml reader which parses a pipeline definition document

         """

         loaded_parameters = loaded_yaml.pop("parameters", {})

         if not isinstance(loaded_parameters, dict):

             raise ValueError("The parameters section must be a yaml mapping")

         self.parametersparameters = ParametersIR(loaded_parameters)


     def _read_labeled_subsets(self, loaded_yaml: dict):

         """Process the subsets portion of the loaded yaml document


         Parameters

         ----------

         loaded_yaml: `MutableMapping`

             A dictionary which matches the structure that would be produced

             by a yaml reader which parses a pipeline definition document

         """

         loaded_subsets = loaded_yaml.pop("subsets", {})

         self.labeled_subsetslabeled_subsets = {}

         if not loaded_subsets and "subset" in loaded_yaml:

             raise ValueError("Top level key should be subsets and not subset, add an s")

         for key, value in loaded_subsets.items():

             self.labeled_subsetslabeled_subsets[key] = LabeledSubset.from_primitives(key, value)


     def _verify_labeled_subsets(self):

         """Verifies that all the labels in each named subset exist within the

         pipeline.

         """

         # Verify that all labels defined in a labeled subset are in the

         # Pipeline

         for labeled_subset in self.labeled_subsetslabeled_subsets.values():

             if not labeled_subset.subset.issubset(self.taskstasks.keys()):

                 raise ValueError(f"Labels {labeled_subset.subset - self.tasks.keys()} were not found in the "

                                  "declared pipeline")

         # Verify subset labels are not already task labels

         label_intersection = self.labeled_subsetslabeled_subsets.keys() & self.taskstasks.keys()

         if label_intersection:

             raise ValueError(f"Labeled subsets can not use the same label as a task: {label_intersection}")


     def _read_imports(self, loaded_yaml):

         """Process the inherits portion of the loaded yaml document


         Parameters

         ---------

         loaded_yaml : `dict`

             A dictionary which matches the structure that would be produced by

             a yaml reader which parses a pipeline definition document

         """

         def process_args(argument: Union[str, dict]) -> dict:

             if isinstance(argument, str):

                 return {"location": argument}

             elif isinstance(argument, dict):

                 if "exclude" in argument and isinstance(argument["exclude"], str):

                     argument["exclude"] = [argument["exclude"]]

                 if "include" in argument and isinstance(argument["include"], str):

                     argument["include"] = [argument["include"]]

                 if "instrument" in argument and argument["instrument"] == "None":

                     argument["instrument"] = None

                 return argument

         if not {"inherits", "imports"} - loaded_yaml.keys():

             raise ValueError("Cannot define both inherits and imports sections, use imports")

         tmp_import = loaded_yaml.pop("inherits", None)

         if tmp_import is None:

             tmp_import = loaded_yaml.pop("imports", None)

         else:

             warnings.warn("The 'inherits' key is deprecated, and will be "

                           "removed around June 2021. Please use the key "

                           "'imports' instead")

         if tmp_import is None:

             self.importsimports = []

         elif isinstance(tmp_import, list):

             self.importsimports = [ImportIR(**process_args(args)) for args in tmp_import]

         else:

             self.importsimports = [ImportIR(**process_args(tmp_import))]


         # integrate any imported pipelines

         accumulate_tasks = {}

         accumulate_labeled_subsets = {}

         accumulated_parameters = ParametersIR({})

         for other_pipeline in self.importsimports:

             tmp_IR = other_pipeline.toPipelineIR()

             if self.instrumentinstrument is None:

                 self.instrumentinstrument = tmp_IR.instrument

             elif self.instrumentinstrument != tmp_IR.instrument and tmp_IR.instrument is not None:

                 raise ValueError("Only one instrument can be declared in a pipeline or it's imports")

             if accumulate_tasks.keys() & tmp_IR.tasks.keys():

                 raise ValueError("Task labels in the imported pipelines must "

                                  "be unique")

             accumulate_tasks.update(tmp_IR.tasks)

             self.contractscontracts.extend(tmp_IR.contracts)

             # verify that tmp_IR has unique labels for named subset among

             # existing labeled subsets, and with existing task labels.

             overlapping_subsets = accumulate_labeled_subsets.keys() & tmp_IR.labeled_subsets.keys()

             task_subset_overlap = ((accumulate_labeled_subsets.keys() | tmp_IR.labeled_subsets.keys())

                                    & accumulate_tasks.keys())

             if overlapping_subsets or task_subset_overlap:

                 raise ValueError("Labeled subset names must be unique amongst imports in both labels and "

                                  f" named Subsets. Duplicate: {overlapping_subsets | task_subset_overlap}")

             accumulate_labeled_subsets.update(tmp_IR.labeled_subsets)

             accumulated_parameters.update(tmp_IR.parameters)


         # verify that any accumulated labeled subsets dont clash with a label

         # from this pipeline

         if accumulate_labeled_subsets.keys() & self.taskstasks.keys():

             raise ValueError("Labeled subset names must be unique amongst imports in both labels and "

                              " named Subsets")

         # merge in the named subsets for self so this document can override any

         # that have been delcared

         accumulate_labeled_subsets.update(self.labeled_subsetslabeled_subsets)

         self.labeled_subsetslabeled_subsets = accumulate_labeled_subsets


         # merge the dict of label:TaskIR objects, preserving any configs in the

         # imported pipeline if the labels point to the same class

         for label, task in self.taskstasks.items():

             if label not in accumulate_tasks:

                 accumulate_tasks[label] = task

             elif accumulate_tasks[label].klass == task.klass:

                 if task.config is not None:

                     for config in task.config:

                         accumulate_tasks[label].add_or_update_config(config)

             else:

                 accumulate_tasks[label] = task

         self.taskstasks = accumulate_tasks

         self.parametersparameters.update(accumulated_parameters)


     def _read_tasks(self, loaded_yaml):

         """Process the tasks portion of the loaded yaml document


         Parameters

         ---------

         loaded_yaml : `dict`

             A dictionary which matches the structure that would be produced by

             a yaml reader which parses a pipeline definition document

         """

         self.taskstasks = {}

         tmp_tasks = loaded_yaml.pop("tasks", None)

         if tmp_tasks is None:

             tmp_tasks = {}


         if "parameters" in tmp_tasks:

             raise ValueError("parameters is a reserved word and cannot be used as a task label")


         for label, definition in tmp_tasks.items():

             if isinstance(definition, str):

                 definition = {"class": definition}

             config = definition.get('config', None)

             if config is None:

                 task_config_ir = None

             else:

                 if isinstance(config, dict):

                     config = [config]

                 task_config_ir = []

                 for c in config:

                     file = c.pop("file", None)

                     if file is None:

                         file = []

                     elif not isinstance(file, list):

                         file = [file]

                     task_config_ir.append(ConfigIR(python=c.pop("python", None),

                                                    dataId=c.pop("dataId", None),

                                                    file=file,

                                                    rest=c))

             self.taskstasks[label] = TaskIR(label, definition["class"], task_config_ir)


     def _remove_contracts(self, label: str):

         """Remove any contracts that contain the given label


         String comparison used in this way is not the most elegant and may

         have issues, but it is the only feasible way when users can specify

         contracts with generic strings.

         """

         new_contracts = []

         for contract in self.contractscontracts:

             # match a label that is not preceded by an ASCII identifier, or

             # is the start of a line and is followed by a dot

             if re.match(f".*([^A-Za-z0-9_]|^){label}[.]", contract.contract):

                 continue

             new_contracts.append(contract)

         self.contractscontracts = new_contracts


     def subset_from_labels(self, labelSpecifier: Set[str]) -> PipelineIR:

         """Subset a pipelineIR to contain only labels specified in

         labelSpecifier.


         Parameters

         ----------

         labelSpecifier : `set` of `str`

             Set containing labels that describes how to subset a pipeline.


         Returns

         -------

         pipeline : `PipelineIR`

             A new pipelineIR object that is a subset of the old pipelineIR


         Raises

         ------

         ValueError

             Raised if there is an issue with specified labels


         Notes

         -----

         This method attempts to prune any contracts that contain labels which

         are not in the declared subset of labels. This pruning is done using a

         string based matching due to the nature of contracts and may prune more

         than it should. Any labeled subsets defined that no longer have all

         members of the subset present in the pipeline will be removed from the

         resulting pipeline.

         """


         pipeline = copy.deepcopy(self)


         # update the label specifier to expand any named subsets

         toRemove = set()

         toAdd = set()

         for label in labelSpecifier:

             if label in pipeline.labeled_subsets:

                 toRemove.add(label)

                 toAdd.update(pipeline.labeled_subsets[label].subset)

         labelSpecifier.difference_update(toRemove)

         labelSpecifier.update(toAdd)

         # verify all the labels are in the pipeline

         if not labelSpecifier.issubset(pipeline.tasks.keys()

                                        | pipeline.labeled_subsets):

             difference = labelSpecifier.difference(pipeline.tasks.keys())

             raise ValueError("Not all supplied labels (specified or named subsets) are in the pipeline "

                              f"definition, extra labels: {difference}")

         # copy needed so as to not modify while iterating

         pipeline_labels = set(pipeline.tasks.keys())

         # Remove the labels from the pipelineIR, and any contracts that contain

         # those labels (see docstring on _remove_contracts for why this may

         # cause issues)

         for label in pipeline_labels:

             if label not in labelSpecifier:

                 pipeline.tasks.pop(label)

                 pipeline._remove_contracts(label)


         # create a copy of the object to iterate over

         labeled_subsets = copy.copy(pipeline.labeled_subsets)

         # remove any labeled subsets that no longer have a complete set

         for label, labeled_subset in labeled_subsets.items():

             if labeled_subset.subset - pipeline.tasks.keys():

                 pipeline.labeled_subsets.pop(label)


         return pipeline


     @classmethod

     def from_string(cls, pipeline_string: str):

         """Create a `PipelineIR` object from a string formatted like a pipeline

         document


         Parameters

         ----------

         pipeline_string : `str`

             A string that is formatted according like a pipeline document

         """

         loaded_yaml = yaml.load(pipeline_string, Loader=PipelineYamlLoader)

         return cls(loaded_yaml)


     @classmethod

     @deprecated(reason="This has been replaced with `from_uri`. will be removed after v23",
                version="v21.0,", category=FutureWarning)  # type: ignore

     def from_file(cls, filename: str) -> PipelineIR:

         """Create a `PipelineIR` object from the document specified by the

         input path.


         Parameters

         ----------

         filename : `str`

             Location of document to use in creating a `PipelineIR` object.


         Returns

         -------

         pipelineIR : `PipelineIR`

             The loaded pipeline


         Note

         ----

         This method is deprecated, please use from_uri

         """

         return cls.from_urifrom_uri(filename)


     @classmethod

     def from_uri(cls, uri: Union[str, ButlerURI]) -> PipelineIR:

         """Create a `PipelineIR` object from the document specified by the

         input uri.


         Parameters

         ----------

         uri: `str` or `ButlerURI`

             Location of document to use in creating a `PipelineIR` object.


         Returns

         -------

         pipelineIR : `PipelineIR`

             The loaded pipeline

         """

         loaded_uri = ButlerURI(uri)

         # With ButlerURI we have the choice of always using a local file or

         # reading in the bytes directly. Reading in bytes can be more

         # efficient for reasonably-sized files when the resource is remote.

         # For now use the local file variant. For a local file as_local() does

         # nothing.

         with loaded_uri.as_local() as local:

             # explicitly read here, there was some issue with yaml trying

             # to read the ButlerURI itself (I think because it only

             # pretends to be conformant to the io api)

             loaded_yaml = yaml.load(local.read(), Loader=PipelineYamlLoader)

             return cls(loaded_yaml)


     @deprecated(reason="This has been replaced with `write_to_uri`. will be removed after v23",
                version="v21.0,", category=FutureWarning)  # type: ignore

     def to_file(self, filename: str):

         """Serialize this `PipelineIR` object into a yaml formatted string and

         write the output to a file at the specified path.


         Parameters

         ----------

         filename : `str`

             Location of document to write a `PipelineIR` object.

         """

         self.write_to_uriwrite_to_uri(filename)


     def write_to_uri(self, uri: Union[ButlerURI, str]):

         """Serialize this `PipelineIR` object into a yaml formatted string and

         write the output to a file at the specified uri.


         Parameters

         ----------

         uri: `str` or `ButlerURI`

             Location of document to write a `PipelineIR` object.

         """

         butlerUri = ButlerURI(uri)

         butlerUri.write(yaml.dump(self.to_primitivesto_primitives(), sort_keys=False).encode())


     def to_primitives(self) -> Dict[str, Any]:

         """Convert to a representation used in yaml serialization

         """

         accumulate = {"description": self.descriptiondescription}

         if self.instrumentinstrument is not None:

             accumulate['instrument'] = self.instrumentinstrument

         if self.parametersparameters:

             accumulate['parameters'] = self.parametersparameters.to_primitives()

         accumulate['tasks'] = {m: t.to_primitives() for m, t in self.taskstasks.items()}

         if len(self.contractscontracts) > 0:

             accumulate['contracts'] = [c.to_primitives() for c in self.contractscontracts]

         if self.labeled_subsetslabeled_subsets:

             accumulate['subsets'] = {k: v.to_primitives() for k, v in self.labeled_subsetslabeled_subsets.items()}

         return accumulate


     def __str__(self) -> str:

         """Instance formatting as how it would look in yaml representation

         """

         return yaml.dump(self.to_primitivesto_primitives(), sort_keys=False)


     def __repr__(self) -> str:

         """Instance formatting as how it would look in yaml representation

         """

         return str(self)


     def __eq__(self, other: object):

         if not isinstance(other, PipelineIR):

             return False

         elif all(getattr(self, attr) == getattr(other, attr) for attr in

                  ("contracts", "tasks", "instrument")):

             return True

         else:

             return False

 items
std::vector< SchemaItem< Flag > > * items
Definition: BaseColumnView.cc:142

lsst.pipe.base.pipelineIR.ConfigIR
Definition: pipelineIR.py:211

lsst.pipe.base.pipelineIR.ConfigIR.maybe_merge
Generator["ConfigIR", None, None] maybe_merge(self, "ConfigIR" other_config)
Definition: pipelineIR.py:277

lsst.pipe.base.pipelineIR.ConfigIR.to_primitives
Dict[str, Union[str, dict, List[str]]] to_primitives(self)
Definition: pipelineIR.py:236

lsst.pipe.base.pipelineIR.ConfigIR.formatted
ConfigIR formatted(self, ParametersIR parameters)
Definition: pipelineIR.py:250

lsst.pipe.base.pipelineIR.ConfigIR.file
file
Definition: pipelineIR.py:312

lsst.pipe.base.pipelineIR.ConfigIR.__eq__
def __eq__(self, object other)
Definition: pipelineIR.py:316

lsst.pipe.base.pipelineIR.ContractError
Definition: pipelineIR.py:66

lsst.pipe.base.pipelineIR.ContractIR
Definition: pipelineIR.py:73

lsst.pipe.base.pipelineIR.ContractIR.__eq__
def __eq__(self, object other)
Definition: pipelineIR.py:93

lsst.pipe.base.pipelineIR.ContractIR.msg
msg
Definition: pipelineIR.py:96

lsst.pipe.base.pipelineIR.ContractIR.to_primitives
Dict[str, str] to_primitives(self)
Definition: pipelineIR.py:85

lsst.pipe.base.pipelineIR.ContractIR.contract
contract
Definition: pipelineIR.py:96

lsst.pipe.base.pipelineIR.ImportIR
Definition: pipelineIR.py:380

lsst.pipe.base.pipelineIR.ImportIR.__eq__
def __eq__(self, object other)
Definition: pipelineIR.py:449

lsst.pipe.base.pipelineIR.ImportIR.toPipelineIR
"PipelineIR" toPipelineIR(self)
Definition: pipelineIR.py:408

lsst.pipe.base.pipelineIR.KeepInstrument
Definition: pipelineIR.py:40

lsst.pipe.base.pipelineIR.LabeledSubset
Definition: pipelineIR.py:103

lsst.pipe.base.pipelineIR.LabeledSubset.to_primitives
Dict[str, Union[List[str], str]] to_primitives(self)
Definition: pipelineIR.py:156

lsst.pipe.base.pipelineIR.LabeledSubset.from_primitives
LabeledSubset from_primitives(str label, Union[List[str], dict] value)
Definition: pipelineIR.py:118

lsst.pipe.base.pipelineIR.ParametersIR
Definition: pipelineIR.py:166

lsst.pipe.base.pipelineIR.ParametersIR.to_primitives
MutableMapping[str, str] to_primitives(self)
Definition: pipelineIR.py:195

lsst.pipe.base.pipelineIR.ParametersIR.__bool__
bool __bool__(self)
Definition: pipelineIR.py:206

lsst.pipe.base.pipelineIR.ParametersIR.__getitem__
Any __getitem__(self, str item)
Definition: pipelineIR.py:203

lsst.pipe.base.pipelineIR.ParametersIR.update
def update(self, Optional[ParametersIR] other)
Definition: pipelineIR.py:191

lsst.pipe.base.pipelineIR.ParametersIR.__contains__
bool __contains__(self, str value)
Definition: pipelineIR.py:200

lsst.pipe.base.pipelineIR.PipelineIR
Definition: pipelineIR.py:459

lsst.pipe.base.pipelineIR.PipelineIR._read_tasks
def _read_tasks(self, loaded_yaml)
Definition: pipelineIR.py:663

lsst.pipe.base.pipelineIR.PipelineIR._read_labeled_subsets
def _read_labeled_subsets(self, dict loaded_yaml)
Definition: pipelineIR.py:546

lsst.pipe.base.pipelineIR.PipelineIR.__repr__
str __repr__(self)
Definition: pipelineIR.py:890

lsst.pipe.base.pipelineIR.PipelineIR.labeled_subsets
labeled_subsets
Definition: pipelineIR.py:556

lsst.pipe.base.pipelineIR.PipelineIR.contracts
contracts
Definition: pipelineIR.py:525

lsst.pipe.base.pipelineIR.PipelineIR._read_parameters
def _read_parameters(self, loaded_yaml)
Definition: pipelineIR.py:532

lsst.pipe.base.pipelineIR.PipelineIR.write_to_uri
def write_to_uri(self, Union[ButlerURI, str] uri)
Definition: pipelineIR.py:858

lsst.pipe.base.pipelineIR.PipelineIR.subset_from_labels
PipelineIR subset_from_labels(self, Set[str] labelSpecifier)
Definition: pipelineIR.py:718

lsst.pipe.base.pipelineIR.PipelineIR.description
description
Definition: pipelineIR.py:487

lsst.pipe.base.pipelineIR.PipelineIR.parameters
parameters
Definition: pipelineIR.py:544

lsst.pipe.base.pipelineIR.PipelineIR.__init__
def __init__(self, loaded_yaml)
Definition: pipelineIR.py:477

lsst.pipe.base.pipelineIR.PipelineIR._verify_labeled_subsets
def _verify_labeled_subsets(self)
Definition: pipelineIR.py:562

lsst.pipe.base.pipelineIR.PipelineIR._read_contracts
def _read_contracts(self, loaded_yaml)
Definition: pipelineIR.py:513

lsst.pipe.base.pipelineIR.PipelineIR.tasks
tasks
Definition: pipelineIR.py:660

lsst.pipe.base.pipelineIR.PipelineIR.from_string
def from_string(cls, str pipeline_string)
Definition: pipelineIR.py:784

lsst.pipe.base.pipelineIR.PipelineIR.imports
imports
Definition: pipelineIR.py:607

lsst.pipe.base.pipelineIR.PipelineIR.from_file
PipelineIR from_file(cls, str filename)
Definition: pipelineIR.py:798

lsst.pipe.base.pipelineIR.PipelineIR.from_uri
PipelineIR from_uri(cls, Union[str, ButlerURI] uri)
Definition: pipelineIR.py:819

lsst.pipe.base.pipelineIR.PipelineIR.instrument
instrument
Definition: pipelineIR.py:496

lsst.pipe.base.pipelineIR.PipelineIR.__str__
str __str__(self)
Definition: pipelineIR.py:885

lsst.pipe.base.pipelineIR.PipelineIR.to_primitives
Dict[str, Any] to_primitives(self)
Definition: pipelineIR.py:870

lsst.pipe.base.pipelineIR.PipelineIR._read_imports
def _read_imports(self, loaded_yaml)
Definition: pipelineIR.py:577

lsst.pipe.base.pipelineIR.PipelineIR.__eq__
def __eq__(self, object other)
Definition: pipelineIR.py:895

lsst.pipe.base.pipelineIR.PipelineIR.to_file
def to_file(self, str filename)
Definition: pipelineIR.py:847

lsst.pipe.base.pipelineIR.PipelineYamlLoader
Definition: pipelineIR.py:44

lsst.pipe.base.pipelineIR.PipelineYamlLoader.construct_mapping
def construct_mapping(self, node, deep=False)
Definition: pipelineIR.py:49

lsst.pipe.base.pipelineIR.TaskIR
Definition: pipelineIR.py:327

lsst.pipe.base.pipelineIR.TaskIR.to_primitives
Dict[str, Union[str, List[dict]]] to_primitives(self)
Definition: pipelineIR.py:342

lsst.pipe.base.pipelineIR.TaskIR.__eq__
def __eq__(self, object other)
Definition: pipelineIR.py:369

lsst.pipe.base.pipelineIR.TaskIR.config
config
Definition: pipelineIR.py:365

lsst.pipe.base.pipelineIR.TaskIR.add_or_update_config
def add_or_update_config(self, ConfigIR other_config)
Definition: pipelineIR.py:350

list
daf::base::PropertyList * list
Definition: fits.cc:913

set
daf::base::PropertySet * set
Definition: fits.cc:912

ast::append
std::shared_ptr< FrameSet > append(FrameSet const &first, FrameSet const &second)
Construct a FrameSet that performs two transformations in series.
Definition: functional.cc:33

astshim.keyMap.keyMapContinued.keys
def keys(self)
Definition: keyMapContinued.py:6

collections.abc

field

lsst::geom::all
bool all(CoordinateExpr< N > const &expr) noexcept
Return true if all elements are true.
Definition: CoordinateExpr.h:81