LSST Applications g04e9c324dd+8c5ae1fdc5,g134cb467dc+b203dec576,g18429d2f64+358861cd2c,g199a45376c+0ba108daf9,g1fd858c14a+dd066899e3,g262e1987ae+ebfced1d55,g29ae962dfc+72fd90588e,g2cef7863aa+aef1011c0b,g35bb328faa+8c5ae1fdc5,g3fd5ace14f+b668f15bc5,g4595892280+3897dae354,g47891489e3+abcf9c3559,g4d44eb3520+fb4ddce128,g53246c7159+8c5ae1fdc5,g67b6fd64d1+abcf9c3559,g67fd3c3899+1f72b5a9f7,g74acd417e5+cb6b47f07b,g786e29fd12+668abc6043,g87389fa792+8856018cbb,g89139ef638+abcf9c3559,g8d7436a09f+bcf525d20c,g8ea07a8fe4+9f5ccc88ac,g90f42f885a+6054cc57f1,g97be763408+06f794da49,g9dd6db0277+1f72b5a9f7,ga681d05dcb+7e36ad54cd,gabf8522325+735880ea63,gac2eed3f23+abcf9c3559,gb89ab40317+abcf9c3559,gbf99507273+8c5ae1fdc5,gd8ff7fe66e+1f72b5a9f7,gdab6d2f7ff+cb6b47f07b,gdc713202bf+1f72b5a9f7,gdfd2d52018+8225f2b331,ge365c994fd+375fc21c71,ge410e46f29+abcf9c3559,geaed405ab2+562b3308c0,gf9a733ac38+8c5ae1fdc5,w.2025.35
LSST Data Management Base Package
Loading...
Searching...
No Matches
matcher_probabilistic.py
Go to the documentation of this file.
1# This file is part of meas_astrom.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22__all__ = [
23 "CatalogExtras", "ComparableCatalog", "ConvertCatalogCoordinatesConfig", "MatchProbabilisticConfig",
24 "MatcherProbabilistic",
25]
26
27from dataclasses import dataclass
28import logging
29import time
30from typing import Callable, Set
31import warnings
32
33import astropy.table
34import lsst.pex.config as pexConfig
35import numpy as np
36from scipy.spatial import cKDTree
37from smatch.matcher import Matcher
38
39logger_default = logging.getLogger(__name__)
40
41
42def _mul_column(column: np.array, value: float):
43 if value is not None and value != 1:
44 column *= value
45 return column
46
47
48def _radec_to_xyz(ra, dec):
49 """Convert input ra/dec coordinates to spherical unit vectors.
50
51 Parameters
52 ----------
53 ra, dec: `numpy.ndarray`
54 Arrays of right ascension/declination in degrees.
55
56 Returns
57 -------
58 vectors : `numpy.ndarray`, (N, 3)
59 Output unit vectors.
60 """
61 if ra.size != dec.size:
62 raise ValueError('ra and dec must be same size')
63 ras = np.radians(ra)
64 decs = np.radians(dec)
65 vectors = np.empty((ras.size, 3))
66
67 sin_dec = np.sin(np.pi / 2 - decs)
68 vectors[:, 0] = sin_dec * np.cos(ras)
69 vectors[:, 1] = sin_dec * np.sin(ras)
70 vectors[:, 2] = np.cos(np.pi / 2 - decs)
71
72 return vectors
73
74
75@dataclass
77 """Store frequently-reference (meta)data relevant for matching a catalog.
78
79 Parameters
80 ----------
81 catalog : `astropy.table.Table`
82 A Table to store extra information for.
83 select : `numpy.array`
84 A numpy boolean array of the same length as catalog to be used for
85 target selection.
86 """
87
88 n: int
89 indices: np.array
90 select: np.array
91
92 coordinate_factor: float = None
93
95 self,
96 catalog: astropy.table.Table,
97 select: np.array = None,
98 coordinate_factor: float = None,
99 ):
100 self.n = len(catalog)
101 self.select = np.ones(self.n, dtype=bool) if select is None else select
102 self.indices = np.flatnonzero(select) if select is not None else np.arange(self.n)
103 self.coordinate_factor = coordinate_factor
104
105
106@dataclass(frozen=True)
108 """A catalog with sources with coordinate columns in some standard format/units.
109
110 catalog : `astropy.table.Table`
111 A catalog with comparable coordinate columns.
112 column_coord1 : `str`
113 The first spatial coordinate column name.
114 column_coord2 : `str`
115 The second spatial coordinate column name.
116 coord1 : `numpy.array`
117 The first spatial coordinate values.
118 coord2 : `numpy.array`
119 The second spatial coordinate values.
120 extras : `CatalogExtras`
121 Extra cached (meta)data for the `catalog`.
122 """
123
124 catalog: astropy.table.Table
125 column_coord1: str
126 column_coord2: str
127 coord1: np.array
128 coord2: np.array
129 extras: CatalogExtras
130
131
132class ConvertCatalogCoordinatesConfig(pexConfig.Config):
133 """Configuration for the MatchProbabilistic matcher."""
134
135 column_ref_coord1 = pexConfig.Field[str](
136 default='ra',
137 doc='The reference table column for the first spatial coordinate (usually x or ra).',
138 )
139 column_ref_coord2 = pexConfig.Field[str](
140 default='dec',
141 doc='The reference table column for the second spatial coordinate (usually y or dec).'
142 'Units must match column_ref_coord1.',
143 )
144 column_target_coord1 = pexConfig.Field[str](
145 default='coord_ra',
146 doc='The target table column for the first spatial coordinate (usually x or ra).'
147 'Units must match column_ref_coord1.',
148 )
149 column_target_coord2 = pexConfig.Field[str](
150 default='coord_dec',
151 doc='The target table column for the second spatial coordinate (usually y or dec).'
152 'Units must match column_ref_coord2.',
153 )
154 coords_spherical = pexConfig.Field[bool](
155 default=True,
156 doc='Whether column_*_coord[12] are spherical coordinates (ra/dec) or not (pixel x/y).',
157 )
158 coords_ref_factor = pexConfig.Field[float](
159 default=1.0,
160 doc='Multiplicative factor for reference catalog coordinates.'
161 'If coords_spherical is true, this must be the number of degrees per unit increment of '
162 'column_ref_coord[12]. Otherwise, it must convert the coordinate to the same units'
163 ' as the target coordinates.',
164 )
165 coords_target_factor = pexConfig.Field[float](
166 default=1.0,
167 doc='Multiplicative factor for target catalog coordinates.'
168 'If coords_spherical is true, this must be the number of degrees per unit increment of '
169 'column_target_coord[12]. Otherwise, it must convert the coordinate to the same units'
170 ' as the reference coordinates.',
171 )
172 coords_ref_to_convert = pexConfig.DictField[str, str](
173 default=None,
174 optional=True,
175 dictCheck=lambda x: len(x) == 2,
176 doc='Dict mapping sky coordinate columns to be converted to pixel columns.',
177 )
178 mag_zeropoint_ref = pexConfig.Field[float](
179 default=31.4,
180 doc='Magnitude zeropoint for reference catalog.',
181 )
182 return_converted_coords = pexConfig.Field[float](
183 default=True,
184 doc='Whether to return converted coordinates for matching or only write them.',
185 )
186
188 self,
189 catalog_ref: astropy.table.Table,
190 catalog_target: astropy.table.Table,
191 select_ref: np.array = None,
192 select_target: np.array = None,
193 radec_to_xy_func: Callable = None,
194 **kwargs,
195 ):
196 """Format matched catalogs that may require coordinate conversions.
197
198 Parameters
199 ----------
200 catalog_ref : `astropy.table.Table`
201 A reference catalog for comparison to `catalog_target`.
202 catalog_target : `astropy.table.Table`
203 A target catalog with measurements for comparison to `catalog_ref`.
204 select_ref : `numpy.ndarray`, (Nref,)
205 A boolean array of len `catalog_ref`, True for valid match candidates.
206 select_target : `numpy.ndarray`, (Ntarget,)
207 A boolean array of len `catalog_target`, True for valid match candidates.
208 radec_to_xy_func : `typing.Callable`
209 Function taking equal-length ra, dec arrays and returning an ndarray of
210 - ``x``: current parameter (`float`).
211 - ``extra_args``: additional arguments (`dict`).
212 kwargs
213 Additional keyword arguments to pass to radec_to_xy_func.
214
215 Returns
216 -------
217 compcat_ref, compcat_target : `ComparableCatalog`
218 Comparable catalogs corresponding to the input reference and target.
219 """
220 convert_ref = self.coords_ref_to_convert
221 if convert_ref and not callable(radec_to_xy_func):
222 raise TypeError('radec_to_xy_func must be callable if converting ref coords')
223
224 # Set up objects with frequently-used attributes like selection bool array
225 extras_ref, extras_target = (
226 CatalogExtras(catalog, select=select, coordinate_factor=coord_factor)
227 for catalog, select, coord_factor in zip(
228 (catalog_ref, catalog_target),
229 (select_ref, select_target),
231 )
232 )
233
234 compcats = []
235
236 # Retrieve coordinates and multiply them by scaling factors
237 for catalog, extras, (column1, column2), convert in (
238 (catalog_ref, extras_ref, (self.column_ref_coord1, self.column_ref_coord2), convert_ref),
239 (catalog_target, extras_target, (self.column_target_coord1, self.column_target_coord2), False),
240 ):
241 coord1, coord2 = (
242 _mul_column(catalog[column], extras.coordinate_factor)
243 for column in (column1, column2)
244 )
245 if convert:
246 xy_ref = radec_to_xy_func(coord1, coord2, self.coords_ref_factor, **kwargs)
247 for idx_coord, column_out in enumerate(self.coords_ref_to_convert.values()):
248 coord = np.array([xy[idx_coord] for xy in xy_ref])
249 catalog[column_out] = coord
250 if convert_ref:
251 column1, column2 = self.coords_ref_to_convert.values()
253 coord1, coord2 = catalog[column1], catalog[column2]
254
255 compcats.append(ComparableCatalog(
256 catalog=catalog, column_coord1=column1, column_coord2=column2,
257 coord1=coord1, coord2=coord2, extras=extras,
258 ))
259
260 return compcats[0], compcats[1]
261
262
263class MatchProbabilisticConfig(pexConfig.Config):
264 """Configuration for the MatchProbabilistic matcher."""
265
266 column_ref_order = pexConfig.Field(
267 dtype=str,
268 default=None,
269 optional=True,
270 doc='Name of column in reference catalog specifying order for matching'
271 ' Derived from columns_ref_flux if not set.',
272 )
273
274 @property
275 def columns_in_ref(self) -> Set[str]:
276 columns_all = [
277 self.coord_format.column_ref_coord1,
278 self.coord_format.column_ref_coord2,
279 ]
280 for columns in (
281 self.columns_ref_flux,
282 self.columns_ref_meas,
285 self.columns_ref_copy,
286 ):
287 columns_all.extend(columns)
288 if self.column_ref_order:
289 columns_all.append(self.column_ref_order)
290
291 return set(columns_all)
292
293 @property
294 def columns_in_target(self) -> Set[str]:
295 columns_all = [
296 self.coord_format.column_target_coord1,
297 self.coord_format.column_target_coord2,
298 ]
299 for columns in (
305 ):
306 columns_all.extend(columns)
307 return set(columns_all)
308
309 columns_ref_copy = pexConfig.ListField(
310 dtype=str,
311 default=[],
312 listCheck=lambda x: len(set(x)) == len(x),
313 optional=True,
314 doc='Reference table columns to copy unchanged into both match tables',
315 )
316 columns_ref_flux = pexConfig.ListField(
317 dtype=str,
318 default=[],
319 optional=True,
320 doc="List of reference flux columns to nansum total magnitudes from if column_order is None",
321 )
322 columns_ref_meas = pexConfig.ListField(
323 dtype=str,
324 doc='The reference table columns to compute match likelihoods from '
325 '(usually centroids and fluxes/magnitudes)',
326 )
327 columns_ref_select_true = pexConfig.ListField(
328 dtype=str,
329 default=tuple(),
330 doc='Reference table columns to require to be True for selecting sources',
331 )
332 columns_ref_select_false = pexConfig.ListField(
333 dtype=str,
334 default=tuple(),
335 doc='Reference table columns to require to be False for selecting sources',
336 )
337 columns_target_copy = pexConfig.ListField(
338 dtype=str,
339 default=[],
340 listCheck=lambda x: len(set(x)) == len(x),
341 optional=True,
342 doc='Target table columns to copy unchanged into both match tables',
343 )
344 columns_target_meas = pexConfig.ListField(
345 dtype=str,
346 doc='Target table columns with measurements corresponding to columns_ref_meas',
347 )
348 columns_target_err = pexConfig.ListField(
349 dtype=str,
350 doc='Target table columns with standard errors (sigma) corresponding to columns_ref_meas',
351 )
352 columns_target_select_true = pexConfig.ListField(
353 dtype=str,
354 default=('detect_isPrimary',),
355 doc='Target table columns to require to be True for selecting sources',
356 )
357 columns_target_select_false = pexConfig.ListField(
358 dtype=str,
359 default=('merge_peak_sky',),
360 doc='Target table columns to require to be False for selecting sources',
361 )
362 coord_format = pexConfig.ConfigField(
363 dtype=ConvertCatalogCoordinatesConfig,
364 doc="Configuration for coordinate conversion",
365 )
366 mag_brightest_ref = pexConfig.Field(
367 dtype=float,
368 default=-np.inf,
369 doc='Bright magnitude cutoff for selecting reference sources to match.'
370 ' Ignored if column_ref_order is None.'
371 )
372 mag_faintest_ref = pexConfig.Field(
373 dtype=float,
374 default=np.inf,
375 doc='Faint magnitude cutoff for selecting reference sources to match.'
376 ' Ignored if column_ref_order is None.'
377 )
378 match_dist_max = pexConfig.Field(
379 dtype=float,
380 default=0.5,
381 doc='Maximum match distance. Units must be arcseconds if coords_spherical, '
382 'or else match those of column_*_coord[12] multiplied by coords_*_factor.',
383 )
384 match_n_max = pexConfig.Field(
385 dtype=int,
386 default=10,
387 optional=True,
388 doc='Maximum number of spatial matches to consider (in ascending distance order).',
389 check=lambda x: x >= 1,
390 )
391 match_n_finite_min = pexConfig.Field(
392 dtype=int,
393 default=2,
394 optional=True,
395 doc='Minimum number of columns with a finite value to measure match likelihood',
396 check=lambda x: x >= 2,
397 )
398 order_ascending = pexConfig.Field(
399 dtype=bool,
400 default=False,
401 optional=True,
402 doc='Whether to order reference match candidates in ascending order of column_ref_order '
403 '(should be False if the column is a flux and True if it is a magnitude.',
404 )
405
406 def validate(self):
407 super().validate()
408 n_ref_meas = len(self.columns_ref_meas)
409 n_target_meas = len(self.columns_target_meas)
410 n_target_err = len(self.columns_target_err)
411 match_n_finite_min = self.match_n_finite_min
412 errors = []
413 if n_target_meas != n_ref_meas:
414 errors.append(f"{len(self.columns_target_meas)=} != {len(self.columns_ref_meas)=}")
415 if n_target_err != n_ref_meas:
416 errors.append(f"{len(self.columns_target_err)=} != {len(self.columns_ref_meas)=}")
417 if not (n_ref_meas >= match_n_finite_min):
418 errors.append(
419 f"{len(self.columns_ref_meas)=} !>= {self.match_n_finite_min=}, no matches possible"
420 )
421 if errors:
422 raise ValueError("\n".join(errors))
423
424
425def default_value(dtype):
426 if dtype is str:
427 return ''
428 elif np.issubdtype(dtype, np.signedinteger):
429 return np.inf
430 elif np.issubdtype(dtype, np.unsignedinteger):
431 return -np.inf
432 return None
433
434
436 """A probabilistic, greedy catalog matcher.
437
438 Parameters
439 ----------
440 config: `MatchProbabilisticConfig`
441 A configuration instance.
442 """
443
444 config: MatchProbabilisticConfig
445
447 self,
448 config: MatchProbabilisticConfig,
449 ):
450 self.config = config
451
452 def match(
453 self,
454 catalog_ref: astropy.table.Table,
455 catalog_target: astropy.table.Table,
456 select_ref: np.array = None,
457 select_target: np.array = None,
458 logger: logging.Logger = None,
459 logging_n_rows: int = None,
460 **kwargs
461 ):
462 """Match catalogs.
463
464 Parameters
465 ----------
466 catalog_ref : `astropy.table.Table`
467 A reference catalog to match in order of a given column (i.e. greedily).
468 catalog_target : `astropy.table.Table`
469 A target catalog for matching sources from `catalog_ref`. Must contain measurements with errors.
470 select_ref : `numpy.array`
471 A boolean array of the same length as `catalog_ref` selecting the sources that can be matched.
472 select_target : `numpy.array`
473 A boolean array of the same length as `catalog_target` selecting the sources that can be matched.
474 logger : `logging.Logger`
475 A Logger for logging.
476 logging_n_rows : `int`
477 The number of sources to match before printing a log message.
478 kwargs
479 Additional keyword arguments to pass to `format_catalogs`.
480
481 Returns
482 -------
483 catalog_out_ref : `astropy.table.Table`
484 A catalog of identical length to `catalog_ref`, containing match information for rows selected by
485 `select_ref` (including the matching row index in `catalog_target`).
486 catalog_out_target : `astropy.table.Table`
487 A catalog of identical length to `catalog_target`, containing the indices of matching rows in
488 `catalog_ref`.
489 exceptions : `dict` [`int`, `Exception`]
490 A dictionary keyed by `catalog_target` row number of the first exception caught when matching.
491 """
492 if logger is None:
493 logger = logger_default
494
495 t_init = time.process_time()
496 config = self.config
497
498 # Transform any coordinates, if required
499 # Note: The returned objects contain the original catalogs, as well as
500 # transformed coordinates, and the selection of sources for matching.
501 # These might be identical to the arrays passed as kwargs, but that
502 # depends on config settings.
503 # For the rest of this function, the selection arrays will be used,
504 # but the indices of the original, unfiltered catalog will also be
505 # output, so some further indexing steps are needed.
506 with warnings.catch_warnings():
507 # We already issued a deprecation warning; no need to repeat it.
508 warnings.filterwarnings(action="ignore", category=FutureWarning)
509 ref, target = config.coord_format.format_catalogs(
510 catalog_ref=catalog_ref, catalog_target=catalog_target,
511 select_ref=select_ref, select_target=select_target,
512 **kwargs
513 )
514
515 # If no order is specified, take nansum of all flux columns for a 'total flux'
516 # Note: it won't actually be a total flux if bands overlap significantly
517 # (or it might define a filter with >100% efficiency
518 column_order = (
519 catalog_ref[config.column_ref_order][ref.extras.select]
520 if config.column_ref_order is not None else
521 np.nansum([catalog_ref[col][ref.extras.select] for col in config.columns_ref_flux], axis=0)
522 )
523 order = np.argsort(column_order if config.order_ascending else -column_order)
524
525 n_ref_select = len(ref.extras.indices)
526
527 coords_spherical = config.coord_format.coords_spherical
528 coords_ref, coords_target = (
529 (cat.coord1[cat.extras.select], cat.coord2[cat.extras.select])
530 for cat in (ref, target)
531 )
532
533 # Generate K-d tree to compute distances
534 logger.info('Generating cKDTree with match_n_max=%d', config.match_n_max)
535
536 if coords_spherical:
537 match_dist_max = config.match_dist_max/3600.
538 with Matcher(coords_target[0], coords_target[1]) as matcher:
539 idxs_target_select = matcher.query_knn(
540 coords_ref[0], coords_ref[1],
541 distance_upper_bound=match_dist_max,
542 k=config.match_n_max,
543 )
544 # Call scipy for non-spherical case
545 # The spherical case won't trigger, but the implementation is left for comparison, if needed
546 else:
547 match_dist_max = np.radians(config.match_dist_max/3600.)
548 # Convert ra/dec sky coordinates to spherical vectors for accurate distances
549 func_convert = _radec_to_xyz if coords_spherical else np.vstack
550 vec_ref, vec_target = (
551 func_convert(coords[0], coords[1])
552 for coords in (coords_ref, coords_target)
553 )
554 tree_obj = cKDTree(vec_target)
555 _, idxs_target_select = tree_obj.query(
556 vec_ref,
557 distance_upper_bound=match_dist_max,
558 k=config.match_n_max,
559 )
560
561 n_target_select = len(target.extras.indices)
562 n_matches = np.sum(idxs_target_select != n_target_select, axis=1)
563 n_matched_max = np.sum(n_matches == config.match_n_max)
564 if n_matched_max > 0:
565 logger.warning(
566 '%d/%d (%.2f%%) selected true objects have n_matches=n_match_max(%d)',
567 n_matched_max, n_ref_select, 100.*n_matched_max/n_ref_select, config.match_n_max
568 )
569
570 # Pre-allocate outputs
571 target_row_match = np.full(target.extras.n, np.iinfo(np.int64).min, dtype=np.int64)
572 ref_candidate_match = np.zeros(ref.extras.n, dtype=bool)
573 ref_row_match = np.full(ref.extras.n, np.iinfo(np.int64).min, dtype=np.int64)
574 ref_match_count = np.zeros(ref.extras.n, dtype=np.int32)
575 ref_match_meas_finite = np.zeros(ref.extras.n, dtype=np.int32)
576 ref_chisq = np.full(ref.extras.n, np.nan, dtype=float)
577
578 # Need the original reference row indices for output
579 idx_orig_ref, idx_orig_target = (np.argwhere(cat.extras.select)[:, 0] for cat in (ref, target))
580
581 # Retrieve required columns, including any converted ones (default to original column name)
582 columns_convert = config.coord_format.coords_ref_to_convert
583 if columns_convert is None:
584 columns_convert = {}
585 data_ref = np.array([
586 ref.catalog[columns_convert.get(column, column)][ref.extras.indices[order]]
587 for column in config.columns_ref_meas
588 ])
589 data_target = np.array([
590 target.catalog[col][target.extras.select] for col in config.columns_target_meas
591 ])
592 errors_target = np.array([
593 target.catalog[col][target.extras.select] for col in config.columns_target_err
594 ])
595
596 exceptions = {}
597 # The kdTree uses len(inputs) as a sentinel value for no match
598 matched_target = {n_target_select, }
599 index_ref = idx_orig_ref[order]
600 # Fill in the candidate column
601 ref_candidate_match[index_ref] = True
602
603 # Count this as the time when disambiguation begins
604 t_begin = time.process_time()
605
606 # Exclude unmatched sources
607 matched_ref = idxs_target_select[order, 0] != n_target_select
608 order = order[matched_ref]
609 idx_first = idxs_target_select[order, 0]
610 chi_0 = (data_target[:, idx_first] - data_ref[:, matched_ref])/errors_target[:, idx_first]
611 chi_finite_0 = np.isfinite(chi_0)
612 n_finite_0 = np.sum(chi_finite_0, axis=0)
613 chi_0[~chi_finite_0] = 0
614 chisq_sum_0 = np.sum(chi_0*chi_0, axis=0)
615 n_meas = len(config.columns_ref_meas)
616
617 logger.info('Disambiguating %d/%d matches/targets', len(order), len(ref.catalog))
618 for index_n, index_row_select in enumerate(order):
619 index_row = idx_orig_ref[index_row_select]
620 found = idxs_target_select[index_row_select, :]
621 # Unambiguous match, short-circuit some evaluations
622 if (found[1] == n_target_select) and (found[0] not in matched_target):
623 n_finite = n_finite_0[index_n]
624 if not (n_finite >= config.match_n_finite_min):
625 continue
626 idx_chisq_min = 0
627 n_matched = 1
628 chisq_sum = chisq_sum_0[index_n]
629 else:
630 # Select match candidates from nearby sources not already matched
631 # Note: set lookup is apparently fast enough that this is a few percent faster than:
632 # found = [x for x in found[found != n_target_select] if x not in matched_target]
633 # ... at least for ~1M sources
634 found = [x for x in found if x not in matched_target]
635 n_found = len(found)
636 if n_found == 0:
637 continue
638 # This is an ndarray of n_found rows x len(data_ref/target) columns
639 chi = (
640 data_target[:, found] - data_ref[:, index_n].reshape((n_meas, 1))
641 )/errors_target[:, found]
642 finite = np.isfinite(chi)
643 n_finite = np.sum(finite, axis=0)
644 # Require some number of finite chi_sq to match
645 chisq_good = n_finite >= config.match_n_finite_min
646 if not any(chisq_good):
647 continue
648 try:
649 chisq_sum = np.zeros(n_found, dtype=float)
650 chisq_sum[chisq_good] = np.nansum(chi[:, chisq_good] ** 2, axis=0)
651 idx_chisq_min = np.nanargmin(chisq_sum / n_finite)
652 n_finite = n_finite[idx_chisq_min]
653 n_matched = len(chisq_good)
654 chisq_sum = chisq_sum[idx_chisq_min]
655 except Exception as error:
656 # Can't foresee any exceptions, but they shouldn't prevent
657 # matching subsequent sources
658 exceptions[index_row] = error
659 ref_match_meas_finite[index_row] = n_finite
660 ref_match_count[index_row] = n_matched
661 ref_chisq[index_row] = chisq_sum
662 idx_match_select = found[idx_chisq_min]
663 row_target = target.extras.indices[idx_match_select]
664 ref_row_match[index_row] = row_target
665
666 target_row_match[row_target] = index_row
667 matched_target.add(idx_match_select)
668
669 if logging_n_rows and ((index_n + 1) % logging_n_rows == 0):
670 t_elapsed = time.process_time() - t_begin
671 logger.info(
672 'Processed %d/%d in %.2fs at sort value=%.3f',
673 index_n + 1, n_ref_select, t_elapsed, column_order[order[index_n]],
674 )
675
676 data_ref = {
677 'match_candidate': ref_candidate_match,
678 'match_row': ref_row_match,
679 'match_count': ref_match_count,
680 'match_chisq': ref_chisq,
681 'match_n_chisq_finite': ref_match_meas_finite,
682 }
683 data_target = {
684 'match_candidate': target.extras.select if target.extras.select is not None else (
685 np.ones(target.extras.n, dtype=bool)),
686 'match_row': target_row_match,
687 }
688
689 for (columns, out_original, out_matched, in_original, in_matched, matches, name_cat) in (
690 (
691 self.config.columns_ref_copy,
692 data_ref,
693 data_target,
694 ref,
695 target,
696 target_row_match,
697 'target',
698 ),
699 (
700 self.config.columns_target_copy,
701 data_target,
702 data_ref,
703 target,
704 ref,
705 ref_row_match,
706 'reference',
707 ),
708 ):
709 matched = matches >= 0
710 idx_matched = matches[matched]
711 logger.info('Matched %d/%d %s sources', np.sum(matched), len(matched), name_cat)
712
713 for column in columns:
714 values = in_original.catalog[column]
715 out_original[column] = values
716 dtype = in_original.catalog[column].dtype
717
718 # Pandas object columns can have mixed types - check for that
719 if dtype is object:
720 types = list(set((type(x) for x in values)))
721 if len(types) != 1:
722 raise RuntimeError(f'Column {column} dtype={dtype} has multiple types={types}')
723 dtype = types[0]
724
725 value_fill = default_value(dtype)
726
727 # Without this, the dtype would be '<U1' for an empty Unicode string
728 if dtype is str:
729 dtype = f'<U{max(len(x) for x in values)}'
730
731 column_match = np.full(in_matched.extras.n, value_fill, dtype=dtype)
732 column_match[matched] = in_original.catalog[column][idx_matched]
733 out_matched[f'match_{column}'] = column_match
734
735 logger.info(
736 'Completed match disambiguating in %.2fs (total %.2fs)',
737 time.process_time() - t_begin,
738 time.process_time() - t_init,
739 )
740
741 catalog_out_ref = astropy.table.Table(data_ref)
742 for column, description in {
743 'match_candidate': 'Whether the object was selected as a candidate for matching',
744 'match_row': 'The index of the best matched row in the target table, if any',
745 'match_count': 'The number of candidate matching target objects, i.e. those within the match'
746 ' distance but excluding objects already matched to a ref object',
747 'match_chisq': 'The sum of all finite reduced chi-squared values over all match columns',
748 'match_n_chisq_finite': 'The number of match columns with finite chisq',
749 }.items():
750 catalog_out_ref[column].description = description
751 catalog_out_target = astropy.table.Table(data_target)
752
753 return catalog_out_ref, catalog_out_target, exceptions
__init__(self, astropy.table.Table catalog, np.array select=None, float coordinate_factor=None)
format_catalogs(self, astropy.table.Table catalog_ref, astropy.table.Table catalog_target, np.array select_ref=None, np.array select_target=None, Callable radec_to_xy_func=None, **kwargs)
match(self, astropy.table.Table catalog_ref, astropy.table.Table catalog_target, np.array select_ref=None, np.array select_target=None, logging.Logger logger=None, int logging_n_rows=None, **kwargs)