Loading [MathJax]/extensions/tex2jax.js
LSST Applications g0fba68d861+b562e0a09f,g1ec0fe41b4+3ea9d11450,g1fd858c14a+9be2b0f3b9,g2440f9efcc+8c5ae1fdc5,g33b6eb7922+23bc9e47ac,g35bb328faa+8c5ae1fdc5,g4a4af6cd76+d25431c27e,g4d2262a081+e64e5ff751,g53246c7159+8c5ae1fdc5,g55585698de+be1c65ba71,g56a49b3a55+92a7603e7a,g60b5630c4e+be1c65ba71,g67b6fd64d1+3fc8cb0b9e,g78460c75b0+7e33a9eb6d,g786e29fd12+668abc6043,g8352419a5c+8c5ae1fdc5,g8852436030+60e38ee5ff,g89139ef638+3fc8cb0b9e,g94187f82dc+be1c65ba71,g989de1cb63+3fc8cb0b9e,g9d31334357+be1c65ba71,g9f33ca652e+69d6bbdd4b,gabe3b4be73+8856018cbb,gabf8522325+977d9fabaf,gb1101e3267+b0077987df,gb89ab40317+3fc8cb0b9e,gc91f06edcd+2e2ca305f6,gcf25f946ba+60e38ee5ff,gd6cbbdb0b4+1cc2750d2e,gdb1c4ca869+be65c9c1d7,gde0f65d7ad+b038c5c67d,ge278dab8ac+6b863515ed,ge410e46f29+3fc8cb0b9e,geb5476ad96+a886b35a30,gf35d7ec915+97dd712d81,gf5e32f922b+8c5ae1fdc5,gf618743f1b+3164b09b60,gf67bdafdda+3fc8cb0b9e,w.2025.18
LSST Data Management Base Package
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
matcher_probabilistic.py
Go to the documentation of this file.
1# This file is part of meas_astrom.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22__all__ = [
23 "CatalogExtras", "ComparableCatalog", "ConvertCatalogCoordinatesConfig", "MatchProbabilisticConfig",
24 "MatcherProbabilistic",
25]
26
27from dataclasses import dataclass
28import logging
29import time
30from typing import Callable, Set
31import warnings
32
33import astropy.table
34import lsst.pex.config as pexConfig
35import numpy as np
36import pandas as pd
37from scipy.spatial import cKDTree
38from smatch.matcher import Matcher
39
40logger_default = logging.getLogger(__name__)
41
42
43def _mul_column(column: np.array, value: float):
44 if value is not None and value != 1:
45 column *= value
46 return column
47
48
49def _radec_to_xyz(ra, dec):
50 """Convert input ra/dec coordinates to spherical unit vectors.
51
52 Parameters
53 ----------
54 ra, dec: `numpy.ndarray`
55 Arrays of right ascension/declination in degrees.
56
57 Returns
58 -------
59 vectors : `numpy.ndarray`, (N, 3)
60 Output unit vectors.
61 """
62 if ra.size != dec.size:
63 raise ValueError('ra and dec must be same size')
64 ras = np.radians(ra)
65 decs = np.radians(dec)
66 vectors = np.empty((ras.size, 3))
67
68 sin_dec = np.sin(np.pi / 2 - decs)
69 vectors[:, 0] = sin_dec * np.cos(ras)
70 vectors[:, 1] = sin_dec * np.sin(ras)
71 vectors[:, 2] = np.cos(np.pi / 2 - decs)
72
73 return vectors
74
75
76@dataclass
78 """Store frequently-reference (meta)data relevant for matching a catalog.
79
80 Parameters
81 ----------
82 catalog : `pandas.DataFrame`
83 A pandas catalog to store extra information for.
84 select : `numpy.array`
85 A numpy boolean array of the same length as catalog to be used for
86 target selection.
87 """
88
89 n: int
90 indices: np.array
91 select: np.array
92
93 coordinate_factor: float = None
94
96 self,
97 catalog: astropy.table.Table | pd.DataFrame,
98 select: np.array = None,
99 coordinate_factor: float = None,
100 ):
101 self.n = len(catalog)
102 self.select = np.ones(self.n, dtype=bool) if select is None else select
103 self.indices = np.flatnonzero(select) if select is not None else np.arange(self.n)
104 self.coordinate_factor = coordinate_factor
105
106
107@dataclass(frozen=True)
109 """A catalog with sources with coordinate columns in some standard format/units.
110
111 catalog : `pandas.DataFrame`
112 A catalog with comparable coordinate columns.
113 column_coord1 : `str`
114 The first spatial coordinate column name.
115 column_coord2 : `str`
116 The second spatial coordinate column name.
117 coord1 : `numpy.array`
118 The first spatial coordinate values.
119 coord2 : `numpy.array`
120 The second spatial coordinate values.
121 extras : `CatalogExtras`
122 Extra cached (meta)data for the `catalog`.
123 """
124
125 catalog: astropy.table.Table | pd.DataFrame
126 column_coord1: str
127 column_coord2: str
128 coord1: np.array
129 coord2: np.array
130 extras: CatalogExtras
131
132
133class ConvertCatalogCoordinatesConfig(pexConfig.Config):
134 """Configuration for the MatchProbabilistic matcher."""
135
136 column_ref_coord1 = pexConfig.Field[str](
137 default='ra',
138 doc='The reference table column for the first spatial coordinate (usually x or ra).',
139 )
140 column_ref_coord2 = pexConfig.Field[str](
141 default='dec',
142 doc='The reference table column for the second spatial coordinate (usually y or dec).'
143 'Units must match column_ref_coord1.',
144 )
145 column_target_coord1 = pexConfig.Field[str](
146 default='coord_ra',
147 doc='The target table column for the first spatial coordinate (usually x or ra).'
148 'Units must match column_ref_coord1.',
149 )
150 column_target_coord2 = pexConfig.Field[str](
151 default='coord_dec',
152 doc='The target table column for the second spatial coordinate (usually y or dec).'
153 'Units must match column_ref_coord2.',
154 )
155 coords_spherical = pexConfig.Field[bool](
156 default=True,
157 doc='Whether column_*_coord[12] are spherical coordinates (ra/dec) or not (pixel x/y).',
158 )
159 coords_ref_factor = pexConfig.Field[float](
160 default=1.0,
161 doc='Multiplicative factor for reference catalog coordinates.'
162 'If coords_spherical is true, this must be the number of degrees per unit increment of '
163 'column_ref_coord[12]. Otherwise, it must convert the coordinate to the same units'
164 ' as the target coordinates.',
165 )
166 coords_target_factor = pexConfig.Field[float](
167 default=1.0,
168 doc='Multiplicative factor for target catalog coordinates.'
169 'If coords_spherical is true, this must be the number of degrees per unit increment of '
170 'column_target_coord[12]. Otherwise, it must convert the coordinate to the same units'
171 ' as the reference coordinates.',
172 )
173 coords_ref_to_convert = pexConfig.DictField[str, str](
174 default=None,
175 optional=True,
176 dictCheck=lambda x: len(x) == 2,
177 doc='Dict mapping sky coordinate columns to be converted to pixel columns.',
178 )
179 mag_zeropoint_ref = pexConfig.Field[float](
180 default=31.4,
181 doc='Magnitude zeropoint for reference catalog.',
182 )
183 return_converted_coords = pexConfig.Field[float](
184 default=True,
185 doc='Whether to return converted coordinates for matching or only write them.',
186 )
187
189 self,
190 catalog_ref: astropy.table.Table | pd.DataFrame,
191 catalog_target: astropy.table.Table | pd.DataFrame,
192 select_ref: np.array = None,
193 select_target: np.array = None,
194 radec_to_xy_func: Callable = None,
195 **kwargs,
196 ):
197 """Format matched catalogs that may require coordinate conversions.
198
199 Parameters
200 ----------
201 catalog_ref : `pandas.DataFrame`
202 A reference catalog for comparison to `catalog_target`.
203 catalog_target : `pandas.DataFrame`
204 A target catalog with measurements for comparison to `catalog_ref`.
205 select_ref : `numpy.ndarray`, (Nref,)
206 A boolean array of len `catalog_ref`, True for valid match candidates.
207 select_target : `numpy.ndarray`, (Ntarget,)
208 A boolean array of len `catalog_target`, True for valid match candidates.
209 radec_to_xy_func : `typing.Callable`
210 Function taking equal-length ra, dec arrays and returning an ndarray of
211 - ``x``: current parameter (`float`).
212 - ``extra_args``: additional arguments (`dict`).
213 kwargs
214 Additional keyword arguments to pass to radec_to_xy_func.
215
216 Returns
217 -------
218 compcat_ref, compcat_target : `ComparableCatalog`
219 Comparable catalogs corresponding to the input reference and target.
220 """
221 # TODO: Remove pandas support in DM-46523
222 is_ref_pd = isinstance(catalog_ref, pd.DataFrame)
223 is_target_pd = isinstance(catalog_target, pd.DataFrame)
224 if is_ref_pd:
225 catalog_ref = astropy.table.Table.from_pandas(catalog_ref)
226 if is_target_pd:
227 catalog_target = astropy.table.Table.from_pandas(catalog_target)
228 if is_ref_pd or is_target_pd:
229 warnings.warn("pandas usage in MatchProbabilisticTask is deprecated; it will be removed "
230 " in favour of astropy.table after release 28.0.0", category=FutureWarning)
231
232 convert_ref = self.coords_ref_to_convert
233 if convert_ref and not callable(radec_to_xy_func):
234 raise TypeError('radec_to_xy_func must be callable if converting ref coords')
235
236 # Set up objects with frequently-used attributes like selection bool array
237 extras_ref, extras_target = (
238 CatalogExtras(catalog, select=select, coordinate_factor=coord_factor)
239 for catalog, select, coord_factor in zip(
240 (catalog_ref, catalog_target),
241 (select_ref, select_target),
243 )
244 )
245
246 compcats = []
247
248 # Retrieve coordinates and multiply them by scaling factors
249 for catalog, extras, (column1, column2), convert in (
250 (catalog_ref, extras_ref, (self.column_ref_coord1, self.column_ref_coord2), convert_ref),
251 (catalog_target, extras_target, (self.column_target_coord1, self.column_target_coord2), False),
252 ):
253 coord1, coord2 = (
254 _mul_column(catalog[column], extras.coordinate_factor)
255 for column in (column1, column2)
256 )
257 if convert:
258 xy_ref = radec_to_xy_func(coord1, coord2, self.coords_ref_factor, **kwargs)
259 for idx_coord, column_out in enumerate(self.coords_ref_to_convert.values()):
260 coord = np.array([xy[idx_coord] for xy in xy_ref])
261 catalog[column_out] = coord
262 if convert_ref:
263 column1, column2 = self.coords_ref_to_convert.values()
265 coord1, coord2 = catalog[column1], catalog[column2]
266 if isinstance(coord1, pd.Series):
267 coord1 = coord1.values
268 if isinstance(coord2, pd.Series):
269 coord2 = coord2.values
270
271 compcats.append(ComparableCatalog(
272 catalog=catalog, column_coord1=column1, column_coord2=column2,
273 coord1=coord1, coord2=coord2, extras=extras,
274 ))
275
276 return compcats[0], compcats[1]
277
278
279class MatchProbabilisticConfig(pexConfig.Config):
280 """Configuration for the MatchProbabilistic matcher."""
281
282 column_ref_order = pexConfig.Field(
283 dtype=str,
284 default=None,
285 optional=True,
286 doc='Name of column in reference catalog specifying order for matching'
287 ' Derived from columns_ref_flux if not set.',
288 )
289
290 @property
291 def columns_in_ref(self) -> Set[str]:
292 columns_all = [
293 self.coord_format.column_ref_coord1,
294 self.coord_format.column_ref_coord2,
295 ]
296 for columns in (
297 self.columns_ref_flux,
298 self.columns_ref_meas,
301 self.columns_ref_copy,
302 ):
303 columns_all.extend(columns)
304 if self.column_ref_order:
305 columns_all.append(self.column_ref_order)
306
307 return set(columns_all)
308
309 @property
310 def columns_in_target(self) -> Set[str]:
311 columns_all = [
312 self.coord_format.column_target_coord1,
313 self.coord_format.column_target_coord2,
314 ]
315 for columns in (
321 ):
322 columns_all.extend(columns)
323 return set(columns_all)
324
325 columns_ref_copy = pexConfig.ListField(
326 dtype=str,
327 default=[],
328 listCheck=lambda x: len(set(x)) == len(x),
329 optional=True,
330 doc='Reference table columns to copy unchanged into both match tables',
331 )
332 columns_ref_flux = pexConfig.ListField(
333 dtype=str,
334 default=[],
335 optional=True,
336 doc="List of reference flux columns to nansum total magnitudes from if column_order is None",
337 )
338 columns_ref_meas = pexConfig.ListField(
339 dtype=str,
340 doc='The reference table columns to compute match likelihoods from '
341 '(usually centroids and fluxes/magnitudes)',
342 )
343 columns_ref_select_true = pexConfig.ListField(
344 dtype=str,
345 default=tuple(),
346 doc='Reference table columns to require to be True for selecting sources',
347 )
348 columns_ref_select_false = pexConfig.ListField(
349 dtype=str,
350 default=tuple(),
351 doc='Reference table columns to require to be False for selecting sources',
352 )
353 columns_target_copy = pexConfig.ListField(
354 dtype=str,
355 default=[],
356 listCheck=lambda x: len(set(x)) == len(x),
357 optional=True,
358 doc='Target table columns to copy unchanged into both match tables',
359 )
360 columns_target_meas = pexConfig.ListField(
361 dtype=str,
362 doc='Target table columns with measurements corresponding to columns_ref_meas',
363 )
364 columns_target_err = pexConfig.ListField(
365 dtype=str,
366 doc='Target table columns with standard errors (sigma) corresponding to columns_ref_meas',
367 )
368 columns_target_select_true = pexConfig.ListField(
369 dtype=str,
370 default=('detect_isPrimary',),
371 doc='Target table columns to require to be True for selecting sources',
372 )
373 columns_target_select_false = pexConfig.ListField(
374 dtype=str,
375 default=('merge_peak_sky',),
376 doc='Target table columns to require to be False for selecting sources',
377 )
378 coord_format = pexConfig.ConfigField(
379 dtype=ConvertCatalogCoordinatesConfig,
380 doc="Configuration for coordinate conversion",
381 )
382 mag_brightest_ref = pexConfig.Field(
383 dtype=float,
384 default=-np.inf,
385 doc='Bright magnitude cutoff for selecting reference sources to match.'
386 ' Ignored if column_ref_order is None.'
387 )
388 mag_faintest_ref = pexConfig.Field(
389 dtype=float,
390 default=np.inf,
391 doc='Faint magnitude cutoff for selecting reference sources to match.'
392 ' Ignored if column_ref_order is None.'
393 )
394 match_dist_max = pexConfig.Field(
395 dtype=float,
396 default=0.5,
397 doc='Maximum match distance. Units must be arcseconds if coords_spherical, '
398 'or else match those of column_*_coord[12] multiplied by coords_*_factor.',
399 )
400 match_n_max = pexConfig.Field(
401 dtype=int,
402 default=10,
403 optional=True,
404 doc='Maximum number of spatial matches to consider (in ascending distance order).',
405 check=lambda x: x >= 1,
406 )
407 match_n_finite_min = pexConfig.Field(
408 dtype=int,
409 default=2,
410 optional=True,
411 doc='Minimum number of columns with a finite value to measure match likelihood',
412 check=lambda x: x >= 2,
413 )
414 order_ascending = pexConfig.Field(
415 dtype=bool,
416 default=False,
417 optional=True,
418 doc='Whether to order reference match candidates in ascending order of column_ref_order '
419 '(should be False if the column is a flux and True if it is a magnitude.',
420 )
421
422 def validate(self):
423 super().validate()
424 n_ref_meas = len(self.columns_ref_meas)
425 n_target_meas = len(self.columns_target_meas)
426 n_target_err = len(self.columns_target_err)
427 match_n_finite_min = self.match_n_finite_min
428 errors = []
429 if n_target_meas != n_ref_meas:
430 errors.append(f"{len(self.columns_target_meas)=} != {len(self.columns_ref_meas)=}")
431 if n_target_err != n_ref_meas:
432 errors.append(f"{len(self.columns_target_err)=} != {len(self.columns_ref_meas)=}")
433 if not (n_ref_meas >= match_n_finite_min):
434 errors.append(
435 f"{len(self.columns_ref_meas)=} !>= {self.match_n_finite_min=}, no matches possible"
436 )
437 if errors:
438 raise ValueError("\n".join(errors))
439
440
441def default_value(dtype):
442 if dtype is str:
443 return ''
444 elif np.issubdtype(dtype, np.signedinteger):
445 return np.inf
446 elif np.issubdtype(dtype, np.unsignedinteger):
447 return -np.inf
448 return None
449
450
452 """A probabilistic, greedy catalog matcher.
453
454 Parameters
455 ----------
456 config: `MatchProbabilisticConfig`
457 A configuration instance.
458 """
459
460 config: MatchProbabilisticConfig
461
463 self,
464 config: MatchProbabilisticConfig,
465 ):
466 self.config = config
467
468 def match(
469 self,
470 catalog_ref: astropy.table.Table | pd.DataFrame,
471 catalog_target: astropy.table.Table | pd.DataFrame,
472 select_ref: np.array = None,
473 select_target: np.array = None,
474 logger: logging.Logger = None,
475 logging_n_rows: int = None,
476 **kwargs
477 ):
478 """Match catalogs.
479
480 Parameters
481 ----------
482 catalog_ref : `pandas.DataFrame` | `astropy.table.Table`
483 A reference catalog to match in order of a given column (i.e. greedily).
484 catalog_target : `pandas.DataFrame` | `astropy.table.Table`
485 A target catalog for matching sources from `catalog_ref`. Must contain measurements with errors.
486 select_ref : `numpy.array`
487 A boolean array of the same length as `catalog_ref` selecting the sources that can be matched.
488 select_target : `numpy.array`
489 A boolean array of the same length as `catalog_target` selecting the sources that can be matched.
490 logger : `logging.Logger`
491 A Logger for logging.
492 logging_n_rows : `int`
493 The number of sources to match before printing a log message.
494 kwargs
495 Additional keyword arguments to pass to `format_catalogs`.
496
497 Returns
498 -------
499 catalog_out_ref : `pandas.DataFrame`
500 A catalog of identical length to `catalog_ref`, containing match information for rows selected by
501 `select_ref` (including the matching row index in `catalog_target`).
502 catalog_out_target : `pandas.DataFrame`
503 A catalog of identical length to `catalog_target`, containing the indices of matching rows in
504 `catalog_ref`.
505 exceptions : `dict` [`int`, `Exception`]
506 A dictionary keyed by `catalog_target` row number of the first exception caught when matching.
507 """
508 if logger is None:
509 logger = logger_default
510
511 # TODO: Remove pandas support in DM-46523
512 is_ref_pd = isinstance(catalog_ref, pd.DataFrame)
513 is_target_pd = isinstance(catalog_target, pd.DataFrame)
514 if is_ref_pd:
515 catalog_ref = astropy.table.Table.from_pandas(catalog_ref)
516 if is_target_pd:
517 catalog_target = astropy.table.Table.from_pandas(catalog_target)
518 if is_ref_pd or is_target_pd:
519 warnings.warn("pandas usage in MatchProbabilisticTask is deprecated; it will be removed "
520 " in favour of astropy.table after release 28.0.0", category=FutureWarning)
521
522 t_init = time.process_time()
523 config = self.config
524
525 # Transform any coordinates, if required
526 # Note: The returned objects contain the original catalogs, as well as
527 # transformed coordinates, and the selection of sources for matching.
528 # These might be identical to the arrays passed as kwargs, but that
529 # depends on config settings.
530 # For the rest of this function, the selection arrays will be used,
531 # but the indices of the original, unfiltered catalog will also be
532 # output, so some further indexing steps are needed.
533 with warnings.catch_warnings():
534 # We already issued a deprecation warning; no need to repeat it.
535 warnings.filterwarnings(action="ignore", category=FutureWarning)
536 ref, target = config.coord_format.format_catalogs(
537 catalog_ref=catalog_ref, catalog_target=catalog_target,
538 select_ref=select_ref, select_target=select_target,
539 **kwargs
540 )
541
542 # If no order is specified, take nansum of all flux columns for a 'total flux'
543 # Note: it won't actually be a total flux if bands overlap significantly
544 # (or it might define a filter with >100% efficiency
545 column_order = (
546 catalog_ref[config.column_ref_order][ref.extras.select]
547 if config.column_ref_order is not None else
548 np.nansum([catalog_ref[col][ref.extras.select] for col in config.columns_ref_flux], axis=0)
549 )
550 order = np.argsort(column_order if config.order_ascending else -column_order)
551
552 n_ref_select = len(ref.extras.indices)
553
554 coords_spherical = config.coord_format.coords_spherical
555 coords_ref, coords_target = (
556 (cat.coord1[cat.extras.select], cat.coord2[cat.extras.select])
557 for cat in (ref, target)
558 )
559
560 # Generate K-d tree to compute distances
561 logger.info('Generating cKDTree with match_n_max=%d', config.match_n_max)
562
563 if coords_spherical:
564 match_dist_max = config.match_dist_max/3600.
565 with Matcher(coords_target[0], coords_target[1]) as matcher:
566 idxs_target_select = matcher.query_knn(
567 coords_ref[0], coords_ref[1],
568 distance_upper_bound=match_dist_max,
569 k=config.match_n_max,
570 )
571 # Call scipy for non-spherical case
572 # The spherical case won't trigger, but the implementation is left for comparison, if needed
573 else:
574 match_dist_max = np.radians(config.match_dist_max/3600.)
575 # Convert ra/dec sky coordinates to spherical vectors for accurate distances
576 func_convert = _radec_to_xyz if coords_spherical else np.vstack
577 vec_ref, vec_target = (
578 func_convert(coords[0], coords[1])
579 for coords in (coords_ref, coords_target)
580 )
581 tree_obj = cKDTree(vec_target)
582 _, idxs_target_select = tree_obj.query(
583 vec_ref,
584 distance_upper_bound=match_dist_max,
585 k=config.match_n_max,
586 )
587
588 n_target_select = len(target.extras.indices)
589 n_matches = np.sum(idxs_target_select != n_target_select, axis=1)
590 n_matched_max = np.sum(n_matches == config.match_n_max)
591 if n_matched_max > 0:
592 logger.warning(
593 '%d/%d (%.2f%%) selected true objects have n_matches=n_match_max(%d)',
594 n_matched_max, n_ref_select, 100.*n_matched_max/n_ref_select, config.match_n_max
595 )
596
597 # Pre-allocate outputs
598 target_row_match = np.full(target.extras.n, np.iinfo(np.int64).min, dtype=np.int64)
599 ref_candidate_match = np.zeros(ref.extras.n, dtype=bool)
600 ref_row_match = np.full(ref.extras.n, np.iinfo(np.int64).min, dtype=np.int64)
601 ref_match_count = np.zeros(ref.extras.n, dtype=np.int32)
602 ref_match_meas_finite = np.zeros(ref.extras.n, dtype=np.int32)
603 ref_chisq = np.full(ref.extras.n, np.nan, dtype=float)
604
605 # Need the original reference row indices for output
606 idx_orig_ref, idx_orig_target = (np.argwhere(cat.extras.select)[:, 0] for cat in (ref, target))
607
608 # Retrieve required columns, including any converted ones (default to original column name)
609 columns_convert = config.coord_format.coords_ref_to_convert
610 if columns_convert is None:
611 columns_convert = {}
612 data_ref = np.array([
613 ref.catalog[columns_convert.get(column, column)][ref.extras.indices[order]]
614 for column in config.columns_ref_meas
615 ])
616 data_target = np.array([
617 target.catalog[col][target.extras.select] for col in config.columns_target_meas
618 ])
619 errors_target = np.array([
620 target.catalog[col][target.extras.select] for col in config.columns_target_err
621 ])
622
623 exceptions = {}
624 # The kdTree uses len(inputs) as a sentinel value for no match
625 matched_target = {n_target_select, }
626 index_ref = idx_orig_ref[order]
627 # Fill in the candidate column
628 ref_candidate_match[index_ref] = True
629
630 # Count this as the time when disambiguation begins
631 t_begin = time.process_time()
632
633 # Exclude unmatched sources
634 matched_ref = idxs_target_select[order, 0] != n_target_select
635 order = order[matched_ref]
636 idx_first = idxs_target_select[order, 0]
637 chi_0 = (data_target[:, idx_first] - data_ref[:, matched_ref])/errors_target[:, idx_first]
638 chi_finite_0 = np.isfinite(chi_0)
639 n_finite_0 = np.sum(chi_finite_0, axis=0)
640 chi_0[~chi_finite_0] = 0
641 chisq_sum_0 = np.sum(chi_0*chi_0, axis=0)
642 n_meas = len(config.columns_ref_meas)
643
644 logger.info('Disambiguating %d/%d matches/targets', len(order), len(ref.catalog))
645 for index_n, index_row_select in enumerate(order):
646 index_row = idx_orig_ref[index_row_select]
647 found = idxs_target_select[index_row_select, :]
648 # Unambiguous match, short-circuit some evaluations
649 if (found[1] == n_target_select) and (found[0] not in matched_target):
650 n_finite = n_finite_0[index_n]
651 if not (n_finite >= config.match_n_finite_min):
652 continue
653 idx_chisq_min = 0
654 n_matched = 1
655 chisq_sum = chisq_sum_0[index_n]
656 else:
657 # Select match candidates from nearby sources not already matched
658 # Note: set lookup is apparently fast enough that this is a few percent faster than:
659 # found = [x for x in found[found != n_target_select] if x not in matched_target]
660 # ... at least for ~1M sources
661 found = [x for x in found if x not in matched_target]
662 n_found = len(found)
663 if n_found == 0:
664 continue
665 # This is an ndarray of n_found rows x len(data_ref/target) columns
666 chi = (
667 data_target[:, found] - data_ref[:, index_n].reshape((n_meas, 1))
668 )/errors_target[:, found]
669 finite = np.isfinite(chi)
670 n_finite = np.sum(finite, axis=0)
671 # Require some number of finite chi_sq to match
672 chisq_good = n_finite >= config.match_n_finite_min
673 if not any(chisq_good):
674 continue
675 try:
676 chisq_sum = np.zeros(n_found, dtype=float)
677 chisq_sum[chisq_good] = np.nansum(chi[:, chisq_good] ** 2, axis=0)
678 idx_chisq_min = np.nanargmin(chisq_sum / n_finite)
679 n_finite = n_finite[idx_chisq_min]
680 n_matched = len(chisq_good)
681 chisq_sum = chisq_sum[idx_chisq_min]
682 except Exception as error:
683 # Can't foresee any exceptions, but they shouldn't prevent
684 # matching subsequent sources
685 exceptions[index_row] = error
686 ref_match_meas_finite[index_row] = n_finite
687 ref_match_count[index_row] = n_matched
688 ref_chisq[index_row] = chisq_sum
689 idx_match_select = found[idx_chisq_min]
690 row_target = target.extras.indices[idx_match_select]
691 ref_row_match[index_row] = row_target
692
693 target_row_match[row_target] = index_row
694 matched_target.add(idx_match_select)
695
696 if logging_n_rows and ((index_n + 1) % logging_n_rows == 0):
697 t_elapsed = time.process_time() - t_begin
698 logger.info(
699 'Processed %d/%d in %.2fs at sort value=%.3f',
700 index_n + 1, n_ref_select, t_elapsed, column_order[order[index_n]],
701 )
702
703 data_ref = {
704 'match_candidate': ref_candidate_match,
705 'match_row': ref_row_match,
706 'match_count': ref_match_count,
707 'match_chisq': ref_chisq,
708 'match_n_chisq_finite': ref_match_meas_finite,
709 }
710 data_target = {
711 'match_candidate': target.extras.select if target.extras.select is not None else (
712 np.ones(target.extras.n, dtype=bool)),
713 'match_row': target_row_match,
714 }
715
716 for (columns, out_original, out_matched, in_original, in_matched, matches, name_cat) in (
717 (
718 self.config.columns_ref_copy,
719 data_ref,
720 data_target,
721 ref,
722 target,
723 target_row_match,
724 'target',
725 ),
726 (
727 self.config.columns_target_copy,
728 data_target,
729 data_ref,
730 target,
731 ref,
732 ref_row_match,
733 'reference',
734 ),
735 ):
736 matched = matches >= 0
737 idx_matched = matches[matched]
738 logger.info('Matched %d/%d %s sources', np.sum(matched), len(matched), name_cat)
739
740 for column in columns:
741 values = in_original.catalog[column]
742 out_original[column] = values
743 dtype = in_original.catalog[column].dtype
744
745 # Pandas object columns can have mixed types - check for that
746 if dtype is object:
747 types = list(set((type(x) for x in values)))
748 if len(types) != 1:
749 raise RuntimeError(f'Column {column} dtype={dtype} has multiple types={types}')
750 dtype = types[0]
751
752 value_fill = default_value(dtype)
753
754 # Without this, the dtype would be '<U1' for an empty Unicode string
755 if dtype is str:
756 dtype = f'<U{max(len(x) for x in values)}'
757
758 column_match = np.full(in_matched.extras.n, value_fill, dtype=dtype)
759 column_match[matched] = in_original.catalog[column][idx_matched]
760 out_matched[f'match_{column}'] = column_match
761
762 logger.info(
763 'Completed match disambiguating in %.2fs (total %.2fs)',
764 time.process_time() - t_begin,
765 time.process_time() - t_init,
766 )
767
768 catalog_out_ref = (pd.DataFrame if is_ref_pd else astropy.table.Table)(data_ref)
769 if not is_ref_pd:
770 for column, description in {
771 'match_candidate': 'Whether the object was selected as a candidate for matching',
772 'match_row': 'The index of the best matched row in the target table, if any',
773 'match_count': 'The number of candidate matching target objects, i.e. those within the match'
774 ' distance but excluding objects already matched to a ref object',
775 'match_chisq': 'The sum of all finite reduced chi-squared values over all match columns',
776 'match_n_chisq_finite': 'The number of match columns with finite chisq',
777 }.items():
778 catalog_out_ref[column].description = description
779 catalog_out_target = (pd.DataFrame if is_target_pd else astropy.table.Table)(data_target)
780
781 return catalog_out_ref, catalog_out_target, exceptions
__init__(self, astropy.table.Table|pd.DataFrame catalog, np.array select=None, float coordinate_factor=None)
format_catalogs(self, astropy.table.Table|pd.DataFrame catalog_ref, astropy.table.Table|pd.DataFrame catalog_target, np.array select_ref=None, np.array select_target=None, Callable radec_to_xy_func=None, **kwargs)
match(self, astropy.table.Table|pd.DataFrame catalog_ref, astropy.table.Table|pd.DataFrame catalog_target, np.array select_ref=None, np.array select_target=None, logging.Logger logger=None, int logging_n_rows=None, **kwargs)