doxygen/x_mainDoxyDoc/diff__matched__tract__catalog_8py_source.html

# This file is part of pipe_tasks.

#

# Developed for the LSST Data Management System.

# This product includes software developed by the LSST Project

# (https://www.lsst.org).

# See the COPYRIGHT file at the top-level directory of this distribution

# for details of code ownership.

#

# This program is free software: you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation, either version 3 of the License, or

# (at your option) any later version.

#

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

# GNU General Public License for more details.

#

# You should have received a copy of the GNU General Public License

# along with this program.  If not, see <https://www.gnu.org/licenses/>.


__all__ = [

    'DiffMatchedTractCatalogConfig', 'DiffMatchedTractCatalogTask', 'MatchedCatalogFluxesConfig',

]


import lsst.afw.geom as afwGeom

from lsst.meas.astrom.matcher_probabilistic import ConvertCatalogCoordinatesConfig

from lsst.meas.astrom.match_probabilistic_task import radec_to_xy

import lsst.pex.config as pexConfig

import lsst.pipe.base as pipeBase

import lsst.pipe.base.connectionTypes as cT

from lsst.skymap import BaseSkyMap

from lsst.daf.butler import DatasetProvenance


import astropy.table

import astropy.units as u

import numpy as np

import pandas as pd

from smatch.matcher import sphdist

from typing import Sequence

import warnings


def is_sequence_set(x: Sequence):

    return len(x) == len(set(x))


def is_sequence_set(x: Sequence): …

DiffMatchedTractCatalogBaseTemplates = {

    "name_input_cat_ref": "truth_summary",

    "name_input_cat_target": "objectTable_tract",

    "name_skymap": BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,

}


class DiffMatchedTractCatalogConnections(

    pipeBase.PipelineTaskConnections,

    dimensions=("tract", "skymap"),

    defaultTemplates=DiffMatchedTractCatalogBaseTemplates,

):

    cat_ref = cT.Input(

        doc="Reference object catalog to match from",

        name="{name_input_cat_ref}",

        storageClass="ArrowAstropy",

        dimensions=("tract", "skymap"),

        deferLoad=True,

    )

    cat_target = cT.Input(

        doc="Target object catalog to match",

        name="{name_input_cat_target}",

        storageClass="ArrowAstropy",

        dimensions=("tract", "skymap"),

        deferLoad=True,

    )

    skymap = cT.Input(

        doc="Input definition of geometry/bbox and projection/wcs for coadded exposures",

        name="{name_skymap}",

        storageClass="SkyMap",

        dimensions=("skymap",),

    )

    cat_match_ref = cT.Input(

        doc="Reference match catalog with indices of target matches",

        name="match_ref_{name_input_cat_ref}_{name_input_cat_target}",

        storageClass="ArrowAstropy",

        dimensions=("tract", "skymap"),

        deferLoad=True,

    )

    cat_match_target = cT.Input(

        doc="Target match catalog with indices of references matches",

        name="match_target_{name_input_cat_ref}_{name_input_cat_target}",

        storageClass="ArrowAstropy",

        dimensions=("tract", "skymap"),

        deferLoad=True,

    )

    columns_match_target = cT.Input(

        doc="Target match catalog columns",

        name="match_target_{name_input_cat_ref}_{name_input_cat_target}.columns",

        storageClass="ArrowColumnList",

        dimensions=("tract", "skymap"),

    )

    cat_matched = cT.Output(

        doc="Catalog with reference and target columns for joined sources",

        name="matched_{name_input_cat_ref}_{name_input_cat_target}",

        storageClass="ArrowAstropy",

        dimensions=("tract", "skymap"),

    )


    def __init__(self, *, config=None):

        if config.refcat_sharding_type != "tract":

            if config.refcat_sharding_type == "none":

                old = self.cat_ref

                del self.cat_ref

                self.cat_ref = cT.Input(

                    doc=old.doc,

                    name=old.name,

                    storageClass=old.storageClass,

                    dimensions=(),

                    deferLoad=old.deferLoad,

                )


    def __init__(self, *, config=None): …

class DiffMatchedTractCatalogConnections( …


class MatchedCatalogFluxesConfig(pexConfig.Config):

    column_ref_flux = pexConfig.Field(

        dtype=str,

        doc='Reference catalog flux column name',

    )

    columns_target_flux = pexConfig.ListField(

        dtype=str,

        listCheck=is_sequence_set,

        doc="List of target catalog flux column names",

    )

    columns_target_flux_err = pexConfig.ListField(

        dtype=str,

        listCheck=is_sequence_set,

        doc="List of target catalog flux error column names",

    )


    # this should be an orderedset

    @property


    def columns_in_ref(self) -> list[str]:

        return [self.column_ref_flux]


    def columns_in_ref(self) -> list[str]: …

    # this should also be an orderedset

    @property


    def columns_in_target(self) -> list[str]:

        columns = [col for col in self.columns_target_flux]

        columns.extend(col for col in self.columns_target_flux_err if col not in columns)

        return columns


    def columns_in_target(self) -> list[str]: …

class MatchedCatalogFluxesConfig(pexConfig.Config): …


class DiffMatchedTractCatalogConfig(

    pipeBase.PipelineTaskConfig,

    pipelineConnections=DiffMatchedTractCatalogConnections,

):

    column_matched_prefix_ref = pexConfig.Field[str](

        default='refcat_',

        doc='The prefix for matched columns copied from the reference catalog',

    )

    include_unmatched = pexConfig.Field[bool](

        default=False,

        doc='Whether to include unmatched rows in the matched table',

    )


    @property


    def columns_in_ref(self) -> list[str]:

        columns_all = [self.coord_format.column_ref_coord1, self.coord_format.column_ref_coord2]

        for column_lists in (

            (

                self.columns_ref_copy,

            ),

            (x.columns_in_ref for x in self.columns_flux.values()),

        ):

            for column_list in column_lists:

                columns_all.extend(column_list)


        return list({column: None for column in columns_all}.keys())


    def columns_in_ref(self) -> list[str]: …

    @property


    def columns_in_target(self) -> list[str]:

        columns_all = [self.coord_format.column_target_coord1, self.coord_format.column_target_coord2]

        if self.coord_format.coords_ref_to_convert is not None:

            columns_all.extend(col for col in self.coord_format.coords_ref_to_convert.values()

                               if col not in columns_all)

        for column_lists in (

            (

                self.columns_target_coord_err,

                self.columns_target_select_false,

                self.columns_target_select_true,

                self.columns_target_copy,

            ),

            (x.columns_in_target for x in self.columns_flux.values()),

        ):

            for column_list in column_lists:

                columns_all.extend(col for col in column_list if col not in columns_all)

        return columns_all


    def columns_in_target(self) -> list[str]: …

    columns_flux = pexConfig.ConfigDictField(

        doc="Configs for flux columns for each band",

        keytype=str,

        itemtype=MatchedCatalogFluxesConfig,

        default={},

    )

    columns_ref_mag_to_nJy = pexConfig.DictField[str, str](

        doc='Reference table AB mag columns to convert to nJy flux columns with new names',

        default={},

    )

    columns_ref_copy = pexConfig.ListField[str](

        doc='Reference table columns to copy into cat_matched',

        default=[],

        listCheck=is_sequence_set,

    )

    columns_target_coord_err = pexConfig.ListField[str](

        doc='Target table coordinate columns with standard errors (sigma)',

        listCheck=lambda x: (len(x) == 2) and (x[0] != x[1]),

    )

    columns_target_copy = pexConfig.ListField[str](

        doc='Target table columns to copy into cat_matched',

        default=('patch',),

        listCheck=is_sequence_set,

    )

    columns_target_mag_to_nJy = pexConfig.DictField[str, str](

        doc='Target table AB mag columns to convert to nJy flux columns with new names',

        default={},

    )

    columns_target_select_true = pexConfig.ListField[str](

        doc='Target table columns to require to be True for selecting sources',

        default=('detect_isPrimary',),

        listCheck=is_sequence_set,

    )

    columns_target_select_false = pexConfig.ListField[str](

        doc='Target table columns to require to be False for selecting sources',

        default=('merge_peak_sky',),

        listCheck=is_sequence_set,

    )

    coord_format = pexConfig.ConfigField[ConvertCatalogCoordinatesConfig](

        doc="Configuration for coordinate conversion",

    )

    refcat_sharding_type = pexConfig.ChoiceField[str](

        doc="The type of sharding (spatial splitting) for the reference catalog",

        allowed={"tract": "Tract-based shards", "none": "No sharding at all"},

        default="tract",

    )


    def validate(self):

        super().validate()


        errors = []


        for columns_mag, columns_in, name_columns_copy in (

            (self.columns_ref_mag_to_nJy, self.columns_in_ref, "columns_ref_copy"),

            (self.columns_target_mag_to_nJy, self.columns_in_target, "columns_target_copy"),

        ):

            columns_copy = getattr(self, name_columns_copy)

            for column_old, column_new in columns_mag.items():

                if column_old not in columns_in:

                    errors.append(

                        f"{column_old=} key in self.columns_mag_to_nJy not found in {columns_in=}; did you"

                        f" forget to add it to self.{name_columns_copy}={columns_copy}?"

                    )

                if column_new in columns_copy:

                    errors.append(

                        f"{column_new=} value found in self.{name_columns_copy}={columns_copy}"

                        f" this will cause a collision. Please choose a different name."

                    )

        if errors:

            raise ValueError("\n".join(errors))


    def validate(self): …

class DiffMatchedTractCatalogConfig( …


class DiffMatchedTractCatalogTask(pipeBase.PipelineTask):

    """Load subsets of matched catalogs and output a merged catalog of matched sources.

    """

    ConfigClass = DiffMatchedTractCatalogConfig

    _DefaultName = "DiffMatchedTractCatalog"


    def runQuantum(self, butlerQC, inputRefs, outputRefs):

        inputs = butlerQC.get(inputRefs)

        skymap = inputs.pop("skymap")


        columns_match_target = ['match_row']

        if 'match_candidate' in inputs['columns_match_target']:

            columns_match_target.append('match_candidate')


        outputs = self.run(

            catalog_ref=inputs['cat_ref'].get(parameters={'columns': self.config.columns_in_ref}),

            catalog_target=inputs['cat_target'].get(parameters={'columns': self.config.columns_in_target}),

            catalog_match_ref=inputs['cat_match_ref'].get(

                parameters={'columns': ['match_candidate', 'match_row']},

            ),

            catalog_match_target=inputs['cat_match_target'].get(

                parameters={'columns': columns_match_target},

            ),

            wcs=skymap[butlerQC.quantum.dataId["tract"]].wcs,

        )

        butlerQC.put(outputs, outputRefs)


    def runQuantum(self, butlerQC, inputRefs, outputRefs): …


    def run(

        self,

        catalog_ref: pd.DataFrame | astropy.table.Table,

        catalog_target: pd.DataFrame | astropy.table.Table,

        catalog_match_ref: pd.DataFrame | astropy.table.Table,

        catalog_match_target: pd.DataFrame | astropy.table.Table,

        wcs: afwGeom.SkyWcs = None,

    ) -> pipeBase.Struct:

        """Load matched reference and target (measured) catalogs, measure summary statistics, and output

        a combined matched catalog with columns from both inputs.


        Parameters

        ----------

        catalog_ref : `pandas.DataFrame` | `astropy.table.Table`

            A reference catalog to diff objects/sources from.

        catalog_target : `pandas.DataFrame` | `astropy.table.Table`

            A target catalog to diff reference objects/sources to.

        catalog_match_ref : `pandas.DataFrame` | `astropy.table.Table`

            A catalog with match indices of target sources and selection flags

            for each reference source.

        catalog_match_target : `pandas.DataFrame` | `astropy.table.Table`

            A catalog with selection flags for each target source.

        wcs : `lsst.afw.image.SkyWcs`

            A coordinate system to convert catalog positions to sky coordinates,

            if necessary.


        Returns

        -------

        retStruct : `lsst.pipe.base.Struct`

            A struct with output_ref and output_target attribute containing the

            output matched catalogs.

        """

        # Would be nice if this could refer directly to ConfigClass

        config: DiffMatchedTractCatalogConfig = self.config


        is_ref_pd = isinstance(catalog_ref, pd.DataFrame)

        is_target_pd = isinstance(catalog_target, pd.DataFrame)

        is_match_ref_pd = isinstance(catalog_match_ref, pd.DataFrame)

        is_match_target_pd = isinstance(catalog_match_target, pd.DataFrame)

        if is_ref_pd:

            catalog_ref = astropy.table.Table.from_pandas(catalog_ref)

        if is_target_pd:

            catalog_target = astropy.table.Table.from_pandas(catalog_target)

        if is_match_ref_pd:

            catalog_match_ref = astropy.table.Table.from_pandas(catalog_match_ref)

        if is_match_target_pd:

            catalog_match_target = astropy.table.Table.from_pandas(catalog_match_target)


        # Strip any provenance from tables before merging to prevent

        # warnings from conflicts being issued by astropy.utils.merge during

        # vstack or hstack calls.

        DatasetProvenance.strip_provenance_from_flat_dict(catalog_ref.meta)

        DatasetProvenance.strip_provenance_from_flat_dict(catalog_target.meta)

        DatasetProvenance.strip_provenance_from_flat_dict(catalog_match_ref.meta)

        DatasetProvenance.strip_provenance_from_flat_dict(catalog_match_target.meta)


        # TODO: Remove pandas support in DM-46523

        if is_ref_pd or is_target_pd or is_match_ref_pd or is_match_target_pd:

            warnings.warn("pandas usage in MatchProbabilisticTask is deprecated; it will be removed "

                          " in favour of astropy.table after release 28.0.0", category=FutureWarning)


        select_ref = catalog_match_ref['match_candidate']

        # Add additional selection criteria for target sources beyond those for matching

        # (not recommended, but can be done anyway)

        select_target = (catalog_match_target['match_candidate']

                         if 'match_candidate' in catalog_match_target.columns

                         else np.ones(len(catalog_match_target), dtype=bool))

        for column in config.columns_target_select_true:

            select_target &= catalog_target[column]

        for column in config.columns_target_select_false:

            select_target &= ~catalog_target[column]


        ref, target = config.coord_format.format_catalogs(

            catalog_ref=catalog_ref, catalog_target=catalog_target,

            select_ref=None, select_target=select_target, wcs=wcs, radec_to_xy_func=radec_to_xy,

        )

        cat_ref = ref.catalog

        cat_target = target.catalog

        n_target = len(cat_target)


        if config.include_unmatched:

            for cat_add, cat_match in ((cat_ref, catalog_match_ref), (cat_target, catalog_match_target)):

                cat_add['match_candidate'] = cat_match['match_candidate']


        match_row = catalog_match_ref['match_row']

        matched_ref = match_row >= 0

        matched_row = match_row[matched_ref]

        matched_target = np.zeros(n_target, dtype=bool)

        matched_target[matched_row] = True


        # Add/compute distance columns

        coord1_target_err, coord2_target_err = config.columns_target_coord_err

        column_dist, column_dist_err = 'match_distance', 'match_distanceErr'

        dist = np.full(n_target, np.nan)


        target_match_c1, target_match_c2 = (coord[matched_row] for coord in (target.coord1, target.coord2))

        target_ref_c1, target_ref_c2 = (coord[matched_ref] for coord in (ref.coord1, ref.coord2))


        dist_err = np.full(n_target, np.nan)

        dist[matched_row] = sphdist(

            target_match_c1, target_match_c2, target_ref_c1, target_ref_c2

        ) if config.coord_format.coords_spherical else np.hypot(

            target_match_c1 - target_ref_c1, target_match_c2 - target_ref_c2,

        )

        cat_target_matched = cat_target[matched_row]

        # This will convert a masked array to an array filled with nans

        # wherever there are bad values (otherwise sphdist can raise)

        c1_err, c2_err = (

            np.ma.getdata(cat_target_matched[c_err]) for c_err in (coord1_target_err, coord2_target_err)

        )

        # Should probably explicitly add cosine terms if ref has errors too

        dist_err[matched_row] = sphdist(

            target_match_c1, target_match_c2, target_match_c1 + c1_err, target_match_c2 + c2_err

        ) if config.coord_format.coords_spherical else np.hypot(c1_err, c2_err)

        cat_target[column_dist], cat_target[column_dist_err] = dist, dist_err


        # Create a matched table, preserving the target catalog's named index (if it has one)

        cat_left = cat_target[matched_row]

        cat_right = cat_ref[matched_ref]

        cat_right.rename_columns(

            list(cat_right.columns),

            new_names=[f'{config.column_matched_prefix_ref}{col}' for col in cat_right.columns],

        )

        cat_matched = astropy.table.hstack((cat_left, cat_right))


        if config.include_unmatched:

            # Create an unmatched table with the same schema as the matched one

            # ... but only for objects with no matches (for completeness/purity)

            # and that were selected for matching (or inclusion via config)

            cat_right = astropy.table.Table(

                cat_ref[~matched_ref & select_ref]

            )

            cat_right.rename_columns(

                cat_right.colnames,

                [f"{config.column_matched_prefix_ref}{col}" for col in cat_right.colnames],

            )

            match_row_target = catalog_match_target['match_row']

            cat_left = cat_target[~(match_row_target >= 0) & select_target]

            # This may be slower than pandas but will, for example, create

            # masked columns for booleans, which pandas does not support.

            # See https://github.com/pandas-dev/pandas/issues/46662

            cat_unmatched = astropy.table.vstack([cat_left, cat_right])


        for columns_convert_base, prefix in (

            (config.columns_ref_mag_to_nJy, config.column_matched_prefix_ref),

            (config.columns_target_mag_to_nJy, ""),

        ):

            if columns_convert_base:

                columns_convert = {

                    f"{prefix}{k}": f"{prefix}{v}" for k, v in columns_convert_base.items()

                } if prefix else columns_convert_base

                to_convert = [cat_matched]

                if config.include_unmatched:

                    to_convert.append(cat_unmatched)

                for cat_convert in to_convert:

                    cat_convert.rename_columns(

                        tuple(columns_convert.keys()),

                        tuple(columns_convert.values()),

                    )

                    for column_flux in columns_convert.values():

                        cat_convert[column_flux] = u.ABmag.to(u.nJy, cat_convert[column_flux])


        if config.include_unmatched:

            # This is probably less efficient than just doing an outer join originally; worth checking

            cat_matched = astropy.table.vstack([cat_matched, cat_unmatched])


        retStruct = pipeBase.Struct(cat_matched=cat_matched)

        return retStruct

    def run( …

class DiffMatchedTractCatalogTask(pipeBase.PipelineTask): …

lsst::afw::geom::SkyWcs
A 2-dimensional celestial WCS that transform pixels to ICRS RA/Dec, using the LSST standard for pixel...
Definition SkyWcs.h:117

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConfig
Definition diff_matched_tract_catalog.py:153

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConfig.columns_ref_mag_to_nJy
columns_ref_mag_to_nJy
Definition diff_matched_tract_catalog.py:202

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConfig.validate
validate(self)
Definition diff_matched_tract_catalog.py:243

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConfig.columns_flux
columns_flux
Definition diff_matched_tract_catalog.py:196

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConfig.columns_target_select_true
columns_target_select_true
Definition diff_matched_tract_catalog.py:224

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConfig.columns_in_target
columns_in_target
Definition diff_matched_tract_catalog.py:250

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConfig.columns_ref_copy
columns_ref_copy
Definition diff_matched_tract_catalog.py:206

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConfig.columns_target_coord_err
columns_target_coord_err
Definition diff_matched_tract_catalog.py:211

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConfig.columns_in_ref
columns_in_ref
Definition diff_matched_tract_catalog.py:249

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConfig.columns_target_select_false
columns_target_select_false
Definition diff_matched_tract_catalog.py:229

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConfig.columns_target_mag_to_nJy
columns_target_mag_to_nJy
Definition diff_matched_tract_catalog.py:220

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConfig.coord_format
coord_format
Definition diff_matched_tract_catalog.py:234

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConfig.columns_target_copy
columns_target_copy
Definition diff_matched_tract_catalog.py:215

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConnections
Definition diff_matched_tract_catalog.py:59

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConnections.cat_ref
cat_ref
Definition diff_matched_tract_catalog.py:60

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogConnections.__init__
__init__(self, *, config=None)
Definition diff_matched_tract_catalog.py:107

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogTask
Definition diff_matched_tract_catalog.py:268

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogTask.run
pipeBase.Struct run(self, pd.DataFrame|astropy.table.Table catalog_ref, pd.DataFrame|astropy.table.Table catalog_target, pd.DataFrame|astropy.table.Table catalog_match_ref, pd.DataFrame|astropy.table.Table catalog_match_target, afwGeom.SkyWcs wcs=None)
Definition diff_matched_tract_catalog.py:302

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogTask.runQuantum
runQuantum(self, butlerQC, inputRefs, outputRefs)
Definition diff_matched_tract_catalog.py:274

lsst.pipe.tasks.diff_matched_tract_catalog.MatchedCatalogFluxesConfig
Definition diff_matched_tract_catalog.py:121

lsst.pipe.tasks.diff_matched_tract_catalog.MatchedCatalogFluxesConfig.column_ref_flux
column_ref_flux
Definition diff_matched_tract_catalog.py:122

lsst.pipe.tasks.diff_matched_tract_catalog.MatchedCatalogFluxesConfig.columns_target_flux_err
columns_target_flux_err
Definition diff_matched_tract_catalog.py:131

lsst.pipe.tasks.diff_matched_tract_catalog.MatchedCatalogFluxesConfig.columns_in_target
list[str] columns_in_target(self)
Definition diff_matched_tract_catalog.py:144

lsst.pipe.tasks.diff_matched_tract_catalog.MatchedCatalogFluxesConfig.columns_in_ref
list[str] columns_in_ref(self)
Definition diff_matched_tract_catalog.py:139

lsst.pipe.tasks.diff_matched_tract_catalog.MatchedCatalogFluxesConfig.columns_target_flux
columns_target_flux
Definition diff_matched_tract_catalog.py:126

lsst::afw::geom
Definition frameSetUtils.h:40

lsst::meas::astrom.match_probabilistic_task
Definition match_probabilistic_task.py:1

lsst::meas::astrom.matcher_probabilistic
Definition matcher_probabilistic.py:1

lsst.pex.config
Definition __init__.py:1

lsst.pipe.base

lsst.pipe.tasks.diff_matched_tract_catalog.is_sequence_set
is_sequence_set(Sequence x)
Definition diff_matched_tract_catalog.py:44

lsst.skymap
Definition __init__.py:1