LSST Applications g180d380827+78227d2bc4,g2079a07aa2+86d27d4dc4,g2305ad1205+bdd7851fe3,g2bbee38e9b+c6a8a0fb72,g337abbeb29+c6a8a0fb72,g33d1c0ed96+c6a8a0fb72,g3a166c0a6a+c6a8a0fb72,g3d1719c13e+260d7c3927,g3ddfee87b4+723a6db5f3,g487adcacf7+29e55ea757,g50ff169b8f+96c6868917,g52b1c1532d+585e252eca,g591dd9f2cf+9443c4b912,g62aa8f1a4b+7e2ea9cd42,g858d7b2824+260d7c3927,g864b0138d7+8498d97249,g95921f966b+dffe86973d,g991b906543+260d7c3927,g99cad8db69+4809d78dd9,g9c22b2923f+e2510deafe,g9ddcbc5298+9a081db1e4,ga1e77700b3+03d07e1c1f,gb0e22166c9+60f28cb32d,gb23b769143+260d7c3927,gba4ed39666+c2a2e4ac27,gbb8dafda3b+e22341fd87,gbd998247f1+585e252eca,gc120e1dc64+713f94b854,gc28159a63d+c6a8a0fb72,gc3e9b769f7+385ea95214,gcf0d15dbbd+723a6db5f3,gdaeeff99f8+f9a426f77a,ge6526c86ff+fde82a80b9,ge79ae78c31+c6a8a0fb72,gee10cc3b42+585e252eca,w.2024.18
LSST Data Management Base Package
Loading...
Searching...
No Matches
matcher_probabilistic.py
Go to the documentation of this file.
1# This file is part of meas_astrom.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22__all__ = ['ConvertCatalogCoordinatesConfig', 'MatchProbabilisticConfig', 'MatcherProbabilistic']
23
24import lsst.pex.config as pexConfig
25
26from dataclasses import dataclass
27import logging
28import numpy as np
29import pandas as pd
30from scipy.spatial import cKDTree
31import time
32from typing import Callable, Set
33
34logger_default = logging.getLogger(__name__)
35
36
37def _mul_column(column: np.array, value: float):
38 if value is not None and value != 1:
39 column *= value
40 return column
41
42
43def _radec_to_xyz(ra, dec):
44 """Convert input ra/dec coordinates to spherical unit vectors.
45
46 Parameters
47 ----------
48 ra, dec: `numpy.ndarray`
49 Arrays of right ascension/declination in degrees.
50
51 Returns
52 -------
53 vectors : `numpy.ndarray`, (N, 3)
54 Output unit vectors.
55 """
56 if ra.size != dec.size:
57 raise ValueError('ra and dec must be same size')
58 ras = np.radians(ra)
59 decs = np.radians(dec)
60 vectors = np.empty((ras.size, 3))
61
62 sin_dec = np.sin(np.pi / 2 - decs)
63 vectors[:, 0] = sin_dec * np.cos(ras)
64 vectors[:, 1] = sin_dec * np.sin(ras)
65 vectors[:, 2] = np.cos(np.pi / 2 - decs)
66
67 return vectors
68
69
70@dataclass
72 """Store frequently-reference (meta)data relevant for matching a catalog.
73
74 Parameters
75 ----------
76 catalog : `pandas.DataFrame`
77 A pandas catalog to store extra information for.
78 select : `numpy.array`
79 A numpy boolean array of the same length as catalog to be used for
80 target selection.
81 """
82
83 n: int
84 indices: np.array
85 select: np.array
86
87 coordinate_factor: float = None
88
89 def __init__(self, catalog: pd.DataFrame, select: np.array = None, coordinate_factor: float = None):
90 self.nn = len(catalog)
91 self.selectselect = np.ones(self.nn, dtype=bool) if select is None else select
92 self.indicesindices = np.flatnonzero(select) if select is not None else np.arange(self.nn)
93 self.coordinate_factorcoordinate_factor = coordinate_factor
94
95
96@dataclass(frozen=True)
98 """A catalog with sources with coordinate columns in some standard format/units.
99
100 catalog : `pandas.DataFrame`
101 A catalog with comparable coordinate columns.
102 column_coord1 : `str`
103 The first spatial coordinate column name.
104 column_coord2 : `str`
105 The second spatial coordinate column name.
106 coord1 : `numpy.array`
107 The first spatial coordinate values.
108 coord2 : `numpy.array`
109 The second spatial coordinate values.
110 extras : `CatalogExtras`
111 Extra cached (meta)data for the `catalog`.
112 """
113
114 catalog: pd.DataFrame
115 column_coord1: str
116 column_coord2: str
117 coord1: np.array
118 coord2: np.array
119 extras: CatalogExtras
120
121
122class ConvertCatalogCoordinatesConfig(pexConfig.Config):
123 """Configuration for the MatchProbabilistic matcher."""
124
125 column_ref_coord1 = pexConfig.Field(
126 dtype=str,
127 default='ra',
128 doc='The reference table column for the first spatial coordinate (usually x or ra).',
129 )
130 column_ref_coord2 = pexConfig.Field(
131 dtype=str,
132 default='dec',
133 doc='The reference table column for the second spatial coordinate (usually y or dec).'
134 'Units must match column_ref_coord1.',
135 )
136 column_target_coord1 = pexConfig.Field(
137 dtype=str,
138 default='coord_ra',
139 doc='The target table column for the first spatial coordinate (usually x or ra).'
140 'Units must match column_ref_coord1.',
141 )
142 column_target_coord2 = pexConfig.Field(
143 dtype=str,
144 default='coord_dec',
145 doc='The target table column for the second spatial coordinate (usually y or dec).'
146 'Units must match column_ref_coord2.',
147 )
148 coords_spherical = pexConfig.Field(
149 dtype=bool,
150 default=True,
151 doc='Whether column_*_coord[12] are spherical coordinates (ra/dec) or not (pixel x/y)',
152 )
153 coords_ref_factor = pexConfig.Field(
154 dtype=float,
155 default=1.0,
156 doc='Multiplicative factor for reference catalog coordinates.'
157 'If coords_spherical is true, this must be the number of degrees per unit increment of '
158 'column_ref_coord[12]. Otherwise, it must convert the coordinate to the same units'
159 ' as the target coordinates.',
160 )
161 coords_target_factor = pexConfig.Field(
162 dtype=float,
163 default=1.0,
164 doc='Multiplicative factor for target catalog coordinates.'
165 'If coords_spherical is true, this must be the number of degrees per unit increment of '
166 'column_target_coord[12]. Otherwise, it must convert the coordinate to the same units'
167 ' as the reference coordinates.',
168 )
169 coords_ref_to_convert = pexConfig.DictField(
170 default=None,
171 optional=True,
172 keytype=str,
173 itemtype=str,
174 dictCheck=lambda x: len(x) == 2,
175 doc='Dict mapping sky coordinate columns to be converted to pixel columns',
176 )
177 mag_zeropoint_ref = pexConfig.Field(
178 dtype=float,
179 default=31.4,
180 doc='Magnitude zeropoint for reference catalog.',
181 )
182
184 self,
185 catalog_ref: pd.DataFrame,
186 catalog_target: pd.DataFrame,
187 select_ref: np.array = None,
188 select_target: np.array = None,
189 radec_to_xy_func: Callable = None,
190 return_converted_columns: bool = False,
191 **kwargs,
192 ):
193 """Format matched catalogs that may require coordinate conversions.
194
195 Parameters
196 ----------
197 catalog_ref : `pandas.DataFrame`
198 A reference catalog for comparison to `catalog_target`.
199 catalog_target : `pandas.DataFrame`
200 A target catalog with measurements for comparison to `catalog_ref`.
201 select_ref : `numpy.ndarray`, (Nref,)
202 A boolean array of len `catalog_ref`, True for valid match candidates.
203 select_target : `numpy.ndarray`, (Ntarget,)
204 A boolean array of len `catalog_target`, True for valid match candidates.
205 radec_to_xy_func : `typing.Callable`
206 Function taking equal-length ra, dec arrays and returning an ndarray of
207 - ``x``: current parameter (`float`).
208 - ``extra_args``: additional arguments (`dict`).
209 return_converted_columns : `bool`
210 Whether to return converted columns in the `coord1` and `coord2`
211 attributes, rather than keep the original values.
212 kwargs
213
214 Returns
215 -------
216 compcat_ref, compcat_target : `ComparableCatalog`
217 Comparable catalogs corresponding to the input reference and target.
218 """
219 convert_ref = self.coords_ref_to_convert
220 if convert_ref and not callable(radec_to_xy_func):
221 raise TypeError('radec_to_xy_func must be callable if converting ref coords')
222
223 # Set up objects with frequently-used attributes like selection bool array
224 extras_ref, extras_target = (
225 CatalogExtras(catalog, select=select, coordinate_factor=coord_factor)
226 for catalog, select, coord_factor in zip(
227 (catalog_ref, catalog_target),
228 (select_ref, select_target),
230 )
231 )
232
233 compcats = []
234
235 # Retrieve coordinates and multiply them by scaling factors
236 for catalog, extras, (column1, column2), convert in (
237 (catalog_ref, extras_ref, (self.column_ref_coord1, self.column_ref_coord2), convert_ref),
238 (catalog_target, extras_target, (self.column_target_coord1, self.column_target_coord2), False),
239 ):
240 coord1, coord2 = (
241 _mul_column(catalog[column], extras.coordinate_factor)
242 for column in (column1, column2)
243 )
244 if convert:
245 xy_ref = radec_to_xy_func(coord1, coord2, self.coords_ref_factor, **kwargs)
246 for idx_coord, column_out in enumerate(self.coords_ref_to_convert.values()):
247 coord = np.array([xy[idx_coord] for xy in xy_ref])
248 catalog[column_out] = coord
249 if convert_ref and return_converted_columns:
250 column1, column2 = self.coords_ref_to_convert.values()
251 coord1, coord2 = catalog[column1], catalog[column2]
252 if isinstance(coord1, pd.Series):
253 coord1 = coord1.values
254 if isinstance(coord2, pd.Series):
255 coord2 = coord2.values
256
257 compcats.append(ComparableCatalog(
258 catalog=catalog, column_coord1=column1, column_coord2=column2,
259 coord1=coord1, coord2=coord2, extras=extras,
260 ))
261
262 return tuple(compcats)
263
264
265class MatchProbabilisticConfig(pexConfig.Config):
266 """Configuration for the MatchProbabilistic matcher."""
267
268 column_ref_order = pexConfig.Field(
269 dtype=str,
270 default=None,
271 optional=True,
272 doc='Name of column in reference catalog specifying order for matching'
273 ' Derived from columns_ref_flux if not set.',
274 )
275
276 @property
277 def columns_in_ref(self) -> Set[str]:
278 columns_all = [
279 self.coord_format.column_ref_coord1,
280 self.coord_format.column_ref_coord2,
281 ]
282 for columns in (
283 self.columns_ref_flux,
284 self.columns_ref_meas,
287 self.columns_ref_copy,
288 ):
289 columns_all.extend(columns)
290 if self.column_ref_order:
291 columns_all.append(self.column_ref_order)
292
293 return set(columns_all)
294
295 @property
296 def columns_in_target(self) -> Set[str]:
297 columns_all = [
298 self.coord_format.column_target_coord1,
299 self.coord_format.column_target_coord2,
300 ]
301 for columns in (
307 ):
308 columns_all.extend(columns)
309 return set(columns_all)
310
311 columns_ref_copy = pexConfig.ListField(
312 dtype=str,
313 default=[],
314 listCheck=lambda x: len(set(x)) == len(x),
315 optional=True,
316 doc='Reference table columns to copy unchanged into both match tables',
317 )
318 columns_ref_flux = pexConfig.ListField(
319 dtype=str,
320 default=[],
321 optional=True,
322 doc="List of reference flux columns to nansum total magnitudes from if column_order is None",
323 )
324 columns_ref_meas = pexConfig.ListField(
325 dtype=str,
326 doc='The reference table columns to compute match likelihoods from '
327 '(usually centroids and fluxes/magnitudes)',
328 )
329 columns_ref_select_true = pexConfig.ListField(
330 dtype=str,
331 default=tuple(),
332 doc='Reference table columns to require to be True for selecting sources',
333 )
334 columns_ref_select_false = pexConfig.ListField(
335 dtype=str,
336 default=tuple(),
337 doc='Reference table columns to require to be False for selecting sources',
338 )
339 columns_target_copy = pexConfig.ListField(
340 dtype=str,
341 default=[],
342 listCheck=lambda x: len(set(x)) == len(x),
343 optional=True,
344 doc='Target table columns to copy unchanged into both match tables',
345 )
346 columns_target_meas = pexConfig.ListField(
347 dtype=str,
348 doc='Target table columns with measurements corresponding to columns_ref_meas',
349 )
350 columns_target_err = pexConfig.ListField(
351 dtype=str,
352 doc='Target table columns with standard errors (sigma) corresponding to columns_ref_meas',
353 )
354 columns_target_select_true = pexConfig.ListField(
355 dtype=str,
356 default=('detect_isPrimary',),
357 doc='Target table columns to require to be True for selecting sources',
358 )
359 columns_target_select_false = pexConfig.ListField(
360 dtype=str,
361 default=('merge_peak_sky',),
362 doc='Target table columns to require to be False for selecting sources',
363 )
364 coord_format = pexConfig.ConfigField(
365 dtype=ConvertCatalogCoordinatesConfig,
366 doc="Configuration for coordinate conversion",
367 )
368 mag_brightest_ref = pexConfig.Field(
369 dtype=float,
370 default=-np.inf,
371 doc='Bright magnitude cutoff for selecting reference sources to match.'
372 ' Ignored if column_ref_order is None.'
373 )
374 mag_faintest_ref = pexConfig.Field(
375 dtype=float,
376 default=np.Inf,
377 doc='Faint magnitude cutoff for selecting reference sources to match.'
378 ' Ignored if column_ref_order is None.'
379 )
380 match_dist_max = pexConfig.Field(
381 dtype=float,
382 default=0.5,
383 doc='Maximum match distance. Units must be arcseconds if coords_spherical, '
384 'or else match those of column_*_coord[12] multiplied by coords_*_factor.',
385 )
386 match_n_max = pexConfig.Field(
387 dtype=int,
388 default=10,
389 optional=True,
390 doc='Maximum number of spatial matches to consider (in ascending distance order).',
391 )
392 match_n_finite_min = pexConfig.Field(
393 dtype=int,
394 default=3,
395 optional=True,
396 doc='Minimum number of columns with a finite value to measure match likelihood',
397 )
398 order_ascending = pexConfig.Field(
399 dtype=bool,
400 default=False,
401 optional=True,
402 doc='Whether to order reference match candidates in ascending order of column_ref_order '
403 '(should be False if the column is a flux and True if it is a magnitude.',
404 )
405
406 def validate(self):
407 super().validate()
408 n_ref_meas = len(self.columns_ref_meas)
409 n_target_meas = len(self.columns_target_meas)
410 n_target_err = len(self.columns_target_err)
411 match_n_finite_min = self.match_n_finite_min
412 errors = []
413 if n_target_meas != n_ref_meas:
414 errors.append(f"{len(self.columns_target_meas)=} != {len(self.columns_ref_meas)=}")
415 if n_target_err != n_ref_meas:
416 errors.append(f"{len(self.columns_target_err)=} != {len(self.columns_ref_meas)=}")
417 if not (n_ref_meas >= match_n_finite_min):
418 errors.append(
419 f"{len(self.columns_ref_meas)=} !>= {self.match_n_finite_min=}, no matches possible"
420 )
421 if errors:
422 raise ValueError("\n".join(errors))
423
424
425def default_value(dtype):
426 if dtype == str:
427 return ''
428 elif dtype == np.signedinteger:
429 return np.Inf
430 elif dtype == np.unsignedinteger:
431 return -np.Inf
432 return None
433
434
436 """A probabilistic, greedy catalog matcher.
437
438 Parameters
439 ----------
440 config: `MatchProbabilisticConfig`
441 A configuration instance.
442 """
443
444 config: MatchProbabilisticConfig
445
447 self,
448 config: MatchProbabilisticConfig,
449 ):
450 self.configconfig = config
451
452 def match(
453 self,
454 catalog_ref: pd.DataFrame,
455 catalog_target: pd.DataFrame,
456 select_ref: np.array = None,
457 select_target: np.array = None,
458 logger: logging.Logger = None,
459 logging_n_rows: int = None,
460 **kwargs
461 ):
462 """Match catalogs.
463
464 Parameters
465 ----------
466 catalog_ref : `pandas.DataFrame`
467 A reference catalog to match in order of a given column (i.e. greedily).
468 catalog_target : `pandas.DataFrame`
469 A target catalog for matching sources from `catalog_ref`. Must contain measurements with errors.
470 select_ref : `numpy.array`
471 A boolean array of the same length as `catalog_ref` selecting the sources that can be matched.
472 select_target : `numpy.array`
473 A boolean array of the same length as `catalog_target` selecting the sources that can be matched.
474 logger : `logging.Logger`
475 A Logger for logging.
476 logging_n_rows : `int`
477 The number of sources to match before printing a log message.
478 kwargs
479 Additional keyword arguments to pass to `format_catalogs`.
480
481 Returns
482 -------
483 catalog_out_ref : `pandas.DataFrame`
484 A catalog of identical length to `catalog_ref`, containing match information for rows selected by
485 `select_ref` (including the matching row index in `catalog_target`).
486 catalog_out_target : `pandas.DataFrame`
487 A catalog of identical length to `catalog_target`, containing the indices of matching rows in
488 `catalog_ref`.
489 exceptions : `dict` [`int`, `Exception`]
490 A dictionary keyed by `catalog_target` row number of the first exception caught when matching.
491 """
492 if logger is None:
493 logger = logger_default
494
495 config = self.configconfig
496
497 # Transform any coordinates, if required
498 # Note: The returned objects contain the original catalogs, as well as
499 # transformed coordinates, and the selection of sources for matching.
500 # These might be identical to the arrays passed as kwargs, but that
501 # depends on config settings.
502 # For the rest of this function, the selection arrays will be used,
503 # but the indices of the original, unfiltered catalog will also be
504 # output, so some further indexing steps are needed.
505 ref, target = config.coord_format.format_catalogs(
506 catalog_ref=catalog_ref, catalog_target=catalog_target,
507 select_ref=select_ref, select_target=select_target,
508 **kwargs
509 )
510
511 # If no order is specified, take nansum of all flux columns for a 'total flux'
512 # Note: it won't actually be a total flux if bands overlap significantly
513 # (or it might define a filter with >100% efficiency
514 # Also, this is done on the original dataframe as it's harder to accomplish
515 # just with a recarray
516 column_order = (
517 catalog_ref.loc[ref.extras.select, config.column_ref_order]
518 if config.column_ref_order is not None else
519 np.nansum(catalog_ref.loc[ref.extras.select, config.columns_ref_flux], axis=1)
520 )
521 order = np.argsort(column_order if config.order_ascending else -column_order)
522
523 n_ref_select = len(ref.extras.indices)
524
525 match_dist_max = config.match_dist_max
526 coords_spherical = config.coord_format.coords_spherical
527 if coords_spherical:
528 match_dist_max = np.radians(match_dist_max / 3600.)
529
530 # Convert ra/dec sky coordinates to spherical vectors for accurate distances
531 func_convert = _radec_to_xyz if coords_spherical else np.vstack
532 vec_ref, vec_target = (
533 func_convert(cat.coord1[cat.extras.select], cat.coord2[cat.extras.select])
534 for cat in (ref, target)
535 )
536
537 # Generate K-d tree to compute distances
538 logger.info('Generating cKDTree with match_n_max=%d', config.match_n_max)
539 tree_obj = cKDTree(vec_target)
540
541 scores, idxs_target_select = tree_obj.query(
542 vec_ref,
543 distance_upper_bound=match_dist_max,
544 k=config.match_n_max,
545 )
546
547 n_target_select = len(target.extras.indices)
548 n_matches = np.sum(idxs_target_select != n_target_select, axis=1)
549 n_matched_max = np.sum(n_matches == config.match_n_max)
550 if n_matched_max > 0:
551 logger.warning(
552 '%d/%d (%.2f%%) selected true objects have n_matches=n_match_max(%d)',
553 n_matched_max, n_ref_select, 100.*n_matched_max/n_ref_select, config.match_n_max
554 )
555
556 # Pre-allocate outputs
557 target_row_match = np.full(target.extras.n, np.nan, dtype=np.int64)
558 ref_candidate_match = np.zeros(ref.extras.n, dtype=bool)
559 ref_row_match = np.full(ref.extras.n, np.nan, dtype=np.int64)
560 ref_match_count = np.zeros(ref.extras.n, dtype=np.int32)
561 ref_match_meas_finite = np.zeros(ref.extras.n, dtype=np.int32)
562 ref_chisq = np.full(ref.extras.n, np.nan, dtype=float)
563
564 # Need the original reference row indices for output
565 idx_orig_ref, idx_orig_target = (np.argwhere(cat.extras.select) for cat in (ref, target))
566
567 # Retrieve required columns, including any converted ones (default to original column name)
568 columns_convert = config.coord_format.coords_ref_to_convert
569 if columns_convert is None:
570 columns_convert = {}
571 data_ref = ref.catalog[
572 [columns_convert.get(column, column) for column in config.columns_ref_meas]
573 ].iloc[ref.extras.indices[order]]
574 data_target = target.catalog[config.columns_target_meas][target.extras.select]
575 errors_target = target.catalog[config.columns_target_err][target.extras.select]
576
577 exceptions = {}
578 # The kdTree uses len(inputs) as a sentinel value for no match
579 matched_target = {n_target_select, }
580
581 t_begin = time.process_time()
582
583 logger.info('Matching n_indices=%d/%d', len(order), len(ref.catalog))
584 for index_n, index_row_select in enumerate(order):
585 index_row = idx_orig_ref[index_row_select]
586 ref_candidate_match[index_row] = True
587 found = idxs_target_select[index_row_select, :]
588 # Select match candidates from nearby sources not already matched
589 # Note: set lookup is apparently fast enough that this is a few percent faster than:
590 # found = [x for x in found[found != n_target_select] if x not in matched_target]
591 # ... at least for ~1M sources
592 found = [x for x in found if x not in matched_target]
593 n_found = len(found)
594 if n_found > 0:
595 # This is an ndarray of n_found rows x len(data_ref/target) columns
596 chi = (
597 (data_target.iloc[found].values - data_ref.iloc[index_n].values)
598 / errors_target.iloc[found].values
599 )
600 finite = np.isfinite(chi)
601 n_finite = np.sum(finite, axis=1)
602 # Require some number of finite chi_sq to match
603 chisq_good = n_finite >= config.match_n_finite_min
604 if np.any(chisq_good):
605 try:
606 chisq_sum = np.zeros(n_found, dtype=float)
607 chisq_sum[chisq_good] = np.nansum(chi[chisq_good, :] ** 2, axis=1)
608 idx_chisq_min = np.nanargmin(chisq_sum / n_finite)
609 ref_match_meas_finite[index_row] = n_finite[idx_chisq_min]
610 ref_match_count[index_row] = len(chisq_good)
611 ref_chisq[index_row] = chisq_sum[idx_chisq_min]
612 idx_match_select = found[idx_chisq_min]
613 row_target = target.extras.indices[idx_match_select]
614 ref_row_match[index_row] = row_target
615
616 target_row_match[row_target] = index_row
617 matched_target.add(idx_match_select)
618 except Exception as error:
619 # Can't foresee any exceptions, but they shouldn't prevent
620 # matching subsequent sources
621 exceptions[index_row] = error
622
623 if logging_n_rows and ((index_n + 1) % logging_n_rows == 0):
624 t_elapsed = time.process_time() - t_begin
625 logger.info(
626 'Processed %d/%d in %.2fs at sort value=%.3f',
627 index_n + 1, n_ref_select, t_elapsed, column_order[order[index_n]],
628 )
629
630 data_ref = {
631 'match_candidate': ref_candidate_match,
632 'match_row': ref_row_match,
633 'match_count': ref_match_count,
634 'match_chisq': ref_chisq,
635 'match_n_chisq_finite': ref_match_meas_finite,
636 }
637 data_target = {
638 'match_candidate': target.extras.select if target.extras.select is not None else (
639 np.ones(target.extras.n, dtype=bool)),
640 'match_row': target_row_match,
641 }
642
643 for (columns, out_original, out_matched, in_original, in_matched, matches, name_cat) in (
644 (
645 self.configconfig.columns_ref_copy,
646 data_ref,
647 data_target,
648 ref,
649 target,
650 target_row_match,
651 'reference',
652 ),
653 (
654 self.configconfig.columns_target_copy,
655 data_target,
656 data_ref,
657 target,
658 ref,
659 ref_row_match,
660 'target',
661 ),
662 ):
663 matched = matches >= 0
664 idx_matched = matches[matched]
665 logger.info('Matched %d/%d %s sources', np.sum(matched), len(matched), name_cat)
666
667 for column in columns:
668 values = in_original.catalog[column]
669 out_original[column] = values
670 dtype = in_original.catalog[column].dtype
671
672 # Pandas object columns can have mixed types - check for that
673 if dtype == object:
674 types = list(set((type(x) for x in values)))
675 if len(types) != 1:
676 raise RuntimeError(f'Column {column} dtype={dtype} has multiple types={types}')
677 dtype = types[0]
678
679 value_fill = default_value(dtype)
680
681 # Without this, the dtype would be '<U1' for an empty Unicode string
682 if dtype == str:
683 dtype = f'<U{max(len(x) for x in values)}'
684
685 column_match = np.full(in_matched.extras.n, value_fill, dtype=dtype)
686 column_match[matched] = in_original.catalog[column][idx_matched]
687 out_matched[f'match_{column}'] = column_match
688
689 catalog_out_ref = pd.DataFrame(data_ref)
690 catalog_out_target = pd.DataFrame(data_target)
691
692 return catalog_out_ref, catalog_out_target, exceptions
__init__(self, pd.DataFrame catalog, np.array select=None, float coordinate_factor=None)
format_catalogs(self, pd.DataFrame catalog_ref, pd.DataFrame catalog_target, np.array select_ref=None, np.array select_target=None, Callable radec_to_xy_func=None, bool return_converted_columns=False, **kwargs)
match(self, pd.DataFrame catalog_ref, pd.DataFrame catalog_target, np.array select_ref=None, np.array select_target=None, logging.Logger logger=None, int logging_n_rows=None, **kwargs)
daf::base::PropertySet * set
Definition fits.cc:931