LSST Applications g0265f82a02+0e5473021a,g02d81e74bb+f5613e8b4f,g1470d8bcf6+190ad2ba91,g14a832a312+311607e4ab,g2079a07aa2+86d27d4dc4,g2305ad1205+a8e3196225,g295015adf3+b67ee847e5,g2bbee38e9b+0e5473021a,g337abbeb29+0e5473021a,g3ddfee87b4+a761f810f3,g487adcacf7+17c8fdbcbd,g50ff169b8f+96c6868917,g52b1c1532d+585e252eca,g591dd9f2cf+65b5bd823e,g5a732f18d5+53520f316c,g64a986408d+f5613e8b4f,g6c1bc301e9+51106c2951,g858d7b2824+f5613e8b4f,g8a8a8dda67+585e252eca,g99cad8db69+6729933424,g9ddcbc5298+9a081db1e4,ga1e77700b3+15fc3df1f7,ga8c6da7877+ef4e3a5875,gb0e22166c9+60f28cb32d,gb6a65358fc+0e5473021a,gba4ed39666+c2a2e4ac27,gbb8dafda3b+e9bba80f27,gc120e1dc64+eee469a5e5,gc28159a63d+0e5473021a,gcf0d15dbbd+a761f810f3,gdaeeff99f8+f9a426f77a,ge6526c86ff+d4c1d4bfef,ge79ae78c31+0e5473021a,gee10cc3b42+585e252eca,gf1cff7945b+f5613e8b4f,w.2024.16
LSST Data Management Base Package
Loading...
Searching...
No Matches
Public Member Functions | Static Public Attributes | Static Protected Attributes | List of all members
lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogTask Class Reference
Inheritance diagram for lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogTask:

Public Member Functions

 runQuantum (self, butlerQC, inputRefs, outputRefs)
 
pipeBase.Struct run (self, pd.DataFrame catalog_ref, pd.DataFrame catalog_target, pd.DataFrame catalog_match_ref, pd.DataFrame catalog_match_target, afwGeom.SkyWcs wcs=None)
 

Static Public Attributes

 ConfigClass = DiffMatchedTractCatalogConfig
 

Static Protected Attributes

str _DefaultName = "DiffMatchedTractCatalog"
 

Detailed Description

Load subsets of matched catalogs and output a merged catalog of matched sources.

Definition at line 557 of file diff_matched_tract_catalog.py.

Member Function Documentation

◆ run()

pipeBase.Struct lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogTask.run ( self,
pd.DataFrame catalog_ref,
pd.DataFrame catalog_target,
pd.DataFrame catalog_match_ref,
pd.DataFrame catalog_match_target,
afwGeom.SkyWcs wcs = None )
Load matched reference and target (measured) catalogs, measure summary statistics, and output
a combined matched catalog with columns from both inputs.

Parameters
----------
catalog_ref : `pandas.DataFrame`
    A reference catalog to diff objects/sources from.
catalog_target : `pandas.DataFrame`
    A target catalog to diff reference objects/sources to.
catalog_match_ref : `pandas.DataFrame`
    A catalog with match indices of target sources and selection flags
    for each reference source.
catalog_match_target : `pandas.DataFrame`
    A catalog with selection flags for each target source.
wcs : `lsst.afw.image.SkyWcs`
    A coordinate system to convert catalog positions to sky coordinates,
    if necessary.

Returns
-------
retStruct : `lsst.pipe.base.Struct`
    A struct with output_ref and output_target attribute containing the
    output matched catalogs.

Definition at line 584 of file diff_matched_tract_catalog.py.

591 ) -> pipeBase.Struct:
592 """Load matched reference and target (measured) catalogs, measure summary statistics, and output
593 a combined matched catalog with columns from both inputs.
594
595 Parameters
596 ----------
597 catalog_ref : `pandas.DataFrame`
598 A reference catalog to diff objects/sources from.
599 catalog_target : `pandas.DataFrame`
600 A target catalog to diff reference objects/sources to.
601 catalog_match_ref : `pandas.DataFrame`
602 A catalog with match indices of target sources and selection flags
603 for each reference source.
604 catalog_match_target : `pandas.DataFrame`
605 A catalog with selection flags for each target source.
606 wcs : `lsst.afw.image.SkyWcs`
607 A coordinate system to convert catalog positions to sky coordinates,
608 if necessary.
609
610 Returns
611 -------
612 retStruct : `lsst.pipe.base.Struct`
613 A struct with output_ref and output_target attribute containing the
614 output matched catalogs.
615 """
616 config = self.config
617
618 select_ref = catalog_match_ref['match_candidate'].values
619 # Add additional selection criteria for target sources beyond those for matching
620 # (not recommended, but can be done anyway)
621 select_target = (catalog_match_target['match_candidate'].values
622 if 'match_candidate' in catalog_match_target.columns
623 else np.ones(len(catalog_match_target), dtype=bool))
624 for column in config.columns_target_select_true:
625 select_target &= catalog_target[column].values
626 for column in config.columns_target_select_false:
627 select_target &= ~catalog_target[column].values
628
629 ref, target = config.coord_format.format_catalogs(
630 catalog_ref=catalog_ref, catalog_target=catalog_target,
631 select_ref=None, select_target=select_target, wcs=wcs, radec_to_xy_func=radec_to_xy,
632 return_converted_columns=config.coord_format.coords_ref_to_convert is not None,
633 )
634 cat_ref = ref.catalog
635 cat_target = target.catalog
636 n_target = len(cat_target)
637
638 match_row = catalog_match_ref['match_row'].values
639 matched_ref = match_row >= 0
640 matched_row = match_row[matched_ref]
641 matched_target = np.zeros(n_target, dtype=bool)
642 matched_target[matched_row] = True
643
644 # Create a matched table, preserving the target catalog's named index (if it has one)
645 cat_left = cat_target.iloc[matched_row]
646 has_index_left = cat_left.index.name is not None
647 cat_right = cat_ref[matched_ref].reset_index()
648 cat_matched = pd.concat(objs=(cat_left.reset_index(drop=True), cat_right), axis=1, sort=False)
649 if has_index_left:
650 cat_matched.index = cat_left.index
651 cat_matched.columns.values[len(cat_target.columns):] = [f'refcat_{col}' for col in cat_right.columns]
652
653 # Add/compute distance columns
654 coord1_target_err, coord2_target_err = config.columns_target_coord_err
655 column_dist, column_dist_err = 'distance', 'distanceErr'
656 dist = np.full(n_target, np.Inf)
657
658 dist[matched_row] = np.hypot(
659 target.coord1[matched_row] - ref.coord1[matched_ref],
660 target.coord2[matched_row] - ref.coord2[matched_ref],
661 )
662 dist_err = np.full(n_target, np.Inf)
663 dist_err[matched_row] = np.hypot(cat_target.iloc[matched_row][coord1_target_err].values,
664 cat_target.iloc[matched_row][coord2_target_err].values)
665 cat_target[column_dist], cat_target[column_dist_err] = dist, dist_err
666
667 # Slightly smelly hack for when a column (like distance) is already relative to truth
668 column_dummy = 'dummy'
669 cat_ref[column_dummy] = np.zeros_like(ref.coord1)
670
671 # Add a boolean column for whether a match is classified correctly
672 extended_ref = cat_ref[config.column_ref_extended]
673 if config.column_ref_extended_inverted:
674 extended_ref = 1 - extended_ref
675
676 extended_target = cat_target[config.column_target_extended].values >= config.extendedness_cut
677
678 # Define difference/chi columns and statistics thereof
679 suffixes = {MeasurementType.DIFF: 'diff', MeasurementType.CHI: 'chi'}
680 # Skip diff for fluxes - covered by mags
681 suffixes_flux = {MeasurementType.CHI: suffixes[MeasurementType.CHI]}
682 # Skip chi for magnitudes, which have strange errors
683 suffixes_mag = {MeasurementType.DIFF: suffixes[MeasurementType.DIFF]}
684 stats = {stat.name_short(): stat() for stat in (Median, SigmaIQR, SigmaMAD)}
685
686 for percentile in self.config.percentiles:
687 stat = Percentile(percentile=float(Decimal(percentile)))
688 stats[stat.name_short()] = stat
689
690 # Get dict of column names
691 columns, n_models = _get_columns(
692 bands_columns=config.columns_flux,
693 suffixes=suffixes,
694 suffixes_flux=suffixes_flux,
695 suffixes_mag=suffixes_mag,
696 stats=stats,
697 target=target,
698 column_dist=column_dist,
699 )
700
701 # Setup numpy table
702 n_bins = config.mag_num_bins
703 data = np.zeros((n_bins,), dtype=[(key, value) for key, value in columns.items()])
704 data['bin'] = np.arange(n_bins)
705
706 # Setup bins
707 bins_mag = np.linspace(start=config.mag_brightest_ref, stop=config.mag_faintest_ref,
708 num=n_bins + 1)
709 data['mag_min'] = bins_mag[:-1]
710 data['mag_max'] = bins_mag[1:]
711 bins_mag = tuple((bins_mag[idx], bins_mag[idx + 1]) for idx in range(n_bins))
712
713 # Define temporary columns for intermediate storage
714 column_mag_temp = 'mag_temp'
715 column_color_temp = 'color_temp'
716 column_color_err_temp = 'colorErr_temp'
717 flux_err_frac_prev = [None]*n_models
718 mag_prev = [None]*n_models
719
720 columns_target = {
721 target.column_coord1: (
722 ref.column_coord1, target.column_coord1, coord1_target_err, False,
723 ),
724 target.column_coord2: (
725 ref.column_coord2, target.column_coord2, coord2_target_err, False,
726 ),
727 column_dist: (column_dummy, column_dist, column_dist_err, False),
728 }
729
730 # Cheat a little and do the first band last so that the color is
731 # based on the last band
732 band_fluxes = [(band, config_flux) for (band, config_flux) in config.columns_flux.items()]
733 n_bands = len(band_fluxes)
734 band_fluxes.append(band_fluxes[0])
735 flux_err_frac_first = None
736 mag_first = None
737 mag_ref_first = None
738
739 band_prev = None
740 for idx_band, (band, config_flux) in enumerate(band_fluxes):
741 if idx_band == n_bands:
742 # These were already computed earlier
743 mag_ref = mag_ref_first
744 flux_err_frac = flux_err_frac_first
745 mag_model = mag_first
746 else:
747 mag_ref = -2.5*np.log10(cat_ref[config_flux.column_ref_flux]) + config.mag_zeropoint_ref
748 flux_err_frac = [None]*n_models
749 mag_model = [None]*n_models
750
751 if idx_band > 0:
752 cat_ref[column_color_temp] = cat_ref[column_mag_temp] - mag_ref
753
754 cat_ref[column_mag_temp] = mag_ref
755
756 select_ref_bins = [select_ref & (mag_ref > mag_lo) & (mag_ref < mag_hi)
757 for idx_bin, (mag_lo, mag_hi) in enumerate(bins_mag)]
758
759 # Iterate over multiple models, compute their mags and colours (if there's a previous band)
760 for idx_model in range(n_models):
761 column_target_flux = config_flux.columns_target_flux[idx_model]
762 column_target_flux_err = config_flux.columns_target_flux_err[idx_model]
763
764 flux_target = cat_target[column_target_flux]
765 mag_target = -2.5*np.log10(flux_target) + config.mag_zeropoint_target
766 if config.mag_ceiling_target is not None:
767 mag_target[mag_target > config.mag_ceiling_target] = config.mag_ceiling_target
768 mag_model[idx_model] = mag_target
769
770 # These are needed for computing magnitude/color "errors" (which are a sketchy concept)
771 flux_err_frac[idx_model] = cat_target[column_target_flux_err]/flux_target
772
773 # Stop if idx == 0: The rest will be picked up at idx == n_bins
774 if idx_band > 0:
775 # Keep these mags tabulated for convenience
776 column_mag_temp_model = f'{column_mag_temp}{idx_model}'
777 cat_target[column_mag_temp_model] = mag_target
778
779 columns_target[f'flux_{column_target_flux}'] = (
780 config_flux.column_ref_flux,
781 column_target_flux,
782 column_target_flux_err,
783 True,
784 )
785 # Note: magnitude errors are generally problematic and not worth aggregating
786 columns_target[f'mag_{column_target_flux}'] = (
787 column_mag_temp, column_mag_temp_model, None, False,
788 )
789
790 # No need for colors if this is the last band and there are only two bands
791 # (because it would just be the negative of the first color)
792 skip_color = (idx_band == n_bands) and (n_bands <= 2)
793 if not skip_color:
794 column_color_temp_model = f'{column_color_temp}{idx_model}'
795 column_color_err_temp_model = f'{column_color_err_temp}{idx_model}'
796
797 # e.g. if order is ugrizy, first color will be u - g
798 cat_target[column_color_temp_model] = mag_prev[idx_model] - mag_model[idx_model]
799
800 # Sum (in quadrature, and admittedly sketchy for faint fluxes) magnitude errors
801 cat_target[column_color_err_temp_model] = 2.5/np.log(10)*np.hypot(
802 flux_err_frac[idx_model], flux_err_frac_prev[idx_model])
803 columns_target[f'color_{band_prev}_m_{band}_{column_target_flux}'] = (
804 column_color_temp,
805 column_color_temp_model,
806 column_color_err_temp_model,
807 False,
808 )
809
810 for idx_bin, (mag_lo, mag_hi) in enumerate(bins_mag):
811 row = data[idx_bin]
812 # Reference sources only need to be counted once
813 if idx_model == 0:
814 select_ref_bin = select_ref_bins[idx_bin]
815 select_target_bin = select_target & (mag_target > mag_lo) & (mag_target < mag_hi)
816
817 for sourcetype in SourceType:
818 sourcetype_info = sourcetype.value
819 is_extended = sourcetype_info.is_extended
820 # Counts filtered by match selection and magnitude bin
821 select_ref_sub = select_ref_bin.copy()
822 select_target_sub = select_target_bin.copy()
823 if is_extended is not None:
824 is_extended_ref = (extended_ref == is_extended)
825 select_ref_sub &= is_extended_ref
826 if idx_model == 0:
827 n_ref_sub = np.count_nonzero(select_ref_sub)
828 row[_get_column_name(band, sourcetype_info.label, 'n_ref',
829 MatchType.ALL.value)] = n_ref_sub
830 select_target_sub &= (extended_target == is_extended)
831 n_target_sub = np.count_nonzero(select_target_sub)
832 row[_get_column_name(band, sourcetype_info.label, 'n_target',
833 MatchType.ALL.value)] = n_target_sub
834
835 # Filter matches by magnitude bin and true class
836 match_row_bin = match_row.copy()
837 match_row_bin[~select_ref_sub] = -1
838 match_good = match_row_bin >= 0
839
840 n_match = np.count_nonzero(match_good)
841
842 # Same for counts of matched target sources (for e.g. purity)
843
844 if n_match > 0:
845 rows_matched = match_row_bin[match_good]
846 subset_target = cat_target.iloc[rows_matched]
847 if (is_extended is not None) and (idx_model == 0):
848 right_type = extended_target[rows_matched] == is_extended
849 n_total = len(right_type)
850 n_right = np.count_nonzero(right_type)
851 row[_get_column_name(band, sourcetype_info.label, 'n_ref',
852 MatchType.MATCH_RIGHT.value)] = n_right
853 row[_get_column_name(
854 band, sourcetype_info.label, 'n_ref', MatchType.MATCH_WRONG.value,
855 )] = n_total - n_right
856
857 # compute stats for this bin, for all columns
858 for column, (column_ref, column_target, column_err_target, skip_diff) \
859 in columns_target.items():
860 values_ref = cat_ref[column_ref][match_good].values
861 errors_target = (
862 subset_target[column_err_target].values
863 if column_err_target is not None
864 else None
865 )
866 compute_stats(
867 values_ref,
868 subset_target[column_target].values,
869 errors_target,
870 row,
871 stats,
872 suffixes,
873 prefix=f'{band}_{sourcetype_info.label}_{column}',
874 skip_diff=skip_diff,
875 )
876
877 # Count matched target sources with *measured* mags within bin
878 # Used for e.g. purity calculation
879 # Should be merged with above code if there's ever a need for
880 # measuring stats on this source selection
881 select_target_sub &= matched_target
882
883 if is_extended is not None and (np.count_nonzero(select_target_sub) > 0):
884 n_total = np.count_nonzero(select_target_sub)
885 right_type = np.zeros(n_target, dtype=bool)
886 right_type[match_row[matched_ref & is_extended_ref]] = True
887 right_type &= select_target_sub
888 n_right = np.count_nonzero(right_type)
889 row[_get_column_name(band, sourcetype_info.label, 'n_target',
890 MatchType.MATCH_RIGHT.value)] = n_right
891 row[_get_column_name(band, sourcetype_info.label, 'n_target',
892 MatchType.MATCH_WRONG.value)] = n_total - n_right
893
894 # delete the flux/color columns since they change with each band
895 for prefix in ('flux', 'mag'):
896 del columns_target[f'{prefix}_{column_target_flux}']
897 if not skip_color:
898 del columns_target[f'color_{band_prev}_m_{band}_{column_target_flux}']
899
900 # keep values needed for colors
901 flux_err_frac_prev = flux_err_frac
902 mag_prev = mag_model
903 band_prev = band
904 if idx_band == 0:
905 flux_err_frac_first = flux_err_frac
906 mag_first = mag_model
907 mag_ref_first = mag_ref
908
909 retStruct = pipeBase.Struct(cat_matched=cat_matched, diff_matched=pd.DataFrame(data))
910 return retStruct

◆ runQuantum()

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogTask.runQuantum ( self,
butlerQC,
inputRefs,
outputRefs )

Definition at line 563 of file diff_matched_tract_catalog.py.

563 def runQuantum(self, butlerQC, inputRefs, outputRefs):
564 inputs = butlerQC.get(inputRefs)
565 skymap = inputs.pop("skymap")
566
567 columns_match_target = ['match_row']
568 if 'match_candidate' in inputs['columns_match_target']:
569 columns_match_target.append('match_candidate')
570
571 outputs = self.run(
572 catalog_ref=inputs['cat_ref'].get(parameters={'columns': self.config.columns_in_ref}),
573 catalog_target=inputs['cat_target'].get(parameters={'columns': self.config.columns_in_target}),
574 catalog_match_ref=inputs['cat_match_ref'].get(
575 parameters={'columns': ['match_candidate', 'match_row']},
576 ),
577 catalog_match_target=inputs['cat_match_target'].get(
578 parameters={'columns': columns_match_target},
579 ),
580 wcs=skymap[butlerQC.quantum.dataId["tract"]].wcs,
581 )
582 butlerQC.put(outputs, outputRefs)
583

Member Data Documentation

◆ _DefaultName

str lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogTask._DefaultName = "DiffMatchedTractCatalog"
staticprotected

Definition at line 561 of file diff_matched_tract_catalog.py.

◆ ConfigClass

lsst.pipe.tasks.diff_matched_tract_catalog.DiffMatchedTractCatalogTask.ConfigClass = DiffMatchedTractCatalogConfig
static

Definition at line 560 of file diff_matched_tract_catalog.py.


The documentation for this class was generated from the following file: