454 catalog_ref: astropy.table.Table,
455 catalog_target: astropy.table.Table,
456 select_ref: np.array =
None,
457 select_target: np.array =
None,
458 logger: logging.Logger =
None,
459 logging_n_rows: int =
None,
466 catalog_ref : `astropy.table.Table`
467 A reference catalog to match in order of a given column (i.e. greedily).
468 catalog_target : `astropy.table.Table`
469 A target catalog for matching sources from `catalog_ref`. Must contain measurements with errors.
470 select_ref : `numpy.array`
471 A boolean array of the same length as `catalog_ref` selecting the sources that can be matched.
472 select_target : `numpy.array`
473 A boolean array of the same length as `catalog_target` selecting the sources that can be matched.
474 logger : `logging.Logger`
475 A Logger for logging.
476 logging_n_rows : `int`
477 The number of sources to match before printing a log message.
479 Additional keyword arguments to pass to `format_catalogs`.
483 catalog_out_ref : `astropy.table.Table`
484 A catalog of identical length to `catalog_ref`, containing match information for rows selected by
485 `select_ref` (including the matching row index in `catalog_target`).
486 catalog_out_target : `astropy.table.Table`
487 A catalog of identical length to `catalog_target`, containing the indices of matching rows in
489 exceptions : `dict` [`int`, `Exception`]
490 A dictionary keyed by `catalog_target` row number of the first exception caught when matching.
493 logger = logger_default
495 t_init = time.process_time()
506 with warnings.catch_warnings():
508 warnings.filterwarnings(action=
"ignore", category=FutureWarning)
509 ref, target = config.coord_format.format_catalogs(
510 catalog_ref=catalog_ref, catalog_target=catalog_target,
511 select_ref=select_ref, select_target=select_target,
519 catalog_ref[config.column_ref_order][ref.extras.select]
520 if config.column_ref_order
is not None else
521 np.nansum([catalog_ref[col][ref.extras.select]
for col
in config.columns_ref_flux], axis=0)
523 order = np.argsort(column_order
if config.order_ascending
else -column_order)
525 n_ref_select = len(ref.extras.indices)
527 coords_spherical = config.coord_format.coords_spherical
528 coords_ref, coords_target = (
529 (cat.coord1[cat.extras.select], cat.coord2[cat.extras.select])
530 for cat
in (ref, target)
534 logger.info(
'Generating cKDTree with match_n_max=%d', config.match_n_max)
537 match_dist_max = config.match_dist_max/3600.
538 with Matcher(coords_target[0], coords_target[1])
as matcher:
539 idxs_target_select = matcher.query_knn(
540 coords_ref[0], coords_ref[1],
541 distance_upper_bound=match_dist_max,
542 k=config.match_n_max,
547 match_dist_max = np.radians(config.match_dist_max/3600.)
549 func_convert = _radec_to_xyz
if coords_spherical
else np.vstack
550 vec_ref, vec_target = (
551 func_convert(coords[0], coords[1])
552 for coords
in (coords_ref, coords_target)
554 tree_obj = cKDTree(vec_target)
555 _, idxs_target_select = tree_obj.query(
557 distance_upper_bound=match_dist_max,
558 k=config.match_n_max,
561 n_target_select = len(target.extras.indices)
562 n_matches = np.sum(idxs_target_select != n_target_select, axis=1)
563 n_matched_max = np.sum(n_matches == config.match_n_max)
564 if n_matched_max > 0:
566 '%d/%d (%.2f%%) selected true objects have n_matches=n_match_max(%d)',
567 n_matched_max, n_ref_select, 100.*n_matched_max/n_ref_select, config.match_n_max
571 target_row_match = np.full(target.extras.n, np.iinfo(np.int64).min, dtype=np.int64)
572 ref_candidate_match = np.zeros(ref.extras.n, dtype=bool)
573 ref_row_match = np.full(ref.extras.n, np.iinfo(np.int64).min, dtype=np.int64)
574 ref_match_count = np.zeros(ref.extras.n, dtype=np.int32)
575 ref_match_meas_finite = np.zeros(ref.extras.n, dtype=np.int32)
576 ref_chisq = np.full(ref.extras.n, np.nan, dtype=float)
579 idx_orig_ref, idx_orig_target = (np.argwhere(cat.extras.select)[:, 0]
for cat
in (ref, target))
582 columns_convert = config.coord_format.coords_ref_to_convert
583 if columns_convert
is None:
585 data_ref = np.array([
586 ref.catalog[columns_convert.get(column, column)][ref.extras.indices[order]]
587 for column
in config.columns_ref_meas
589 data_target = np.array([
590 target.catalog[col][target.extras.select]
for col
in config.columns_target_meas
592 errors_target = np.array([
593 target.catalog[col][target.extras.select]
for col
in config.columns_target_err
598 matched_target = {n_target_select, }
599 index_ref = idx_orig_ref[order]
601 ref_candidate_match[index_ref] =
True
604 t_begin = time.process_time()
607 matched_ref = idxs_target_select[order, 0] != n_target_select
608 order = order[matched_ref]
609 idx_first = idxs_target_select[order, 0]
610 chi_0 = (data_target[:, idx_first] - data_ref[:, matched_ref])/errors_target[:, idx_first]
611 chi_finite_0 = np.isfinite(chi_0)
612 n_finite_0 = np.sum(chi_finite_0, axis=0)
613 chi_0[~chi_finite_0] = 0
614 chisq_sum_0 = np.sum(chi_0*chi_0, axis=0)
615 n_meas = len(config.columns_ref_meas)
617 logger.info(
'Disambiguating %d/%d matches/targets', len(order), len(ref.catalog))
618 for index_n, index_row_select
in enumerate(order):
619 index_row = idx_orig_ref[index_row_select]
620 found = idxs_target_select[index_row_select, :]
622 if (found[1] == n_target_select)
and (found[0]
not in matched_target):
623 n_finite = n_finite_0[index_n]
624 if not (n_finite >= config.match_n_finite_min):
628 chisq_sum = chisq_sum_0[index_n]
634 found = [x
for x
in found
if x
not in matched_target]
640 data_target[:, found] - data_ref[:, index_n].reshape((n_meas, 1))
641 )/errors_target[:, found]
642 finite = np.isfinite(chi)
643 n_finite = np.sum(finite, axis=0)
645 chisq_good = n_finite >= config.match_n_finite_min
646 if not any(chisq_good):
649 chisq_sum = np.zeros(n_found, dtype=float)
650 chisq_sum[chisq_good] = np.nansum(chi[:, chisq_good] ** 2, axis=0)
651 idx_chisq_min = np.nanargmin(chisq_sum / n_finite)
652 n_finite = n_finite[idx_chisq_min]
653 n_matched = len(chisq_good)
654 chisq_sum = chisq_sum[idx_chisq_min]
655 except Exception
as error:
658 exceptions[index_row] = error
659 ref_match_meas_finite[index_row] = n_finite
660 ref_match_count[index_row] = n_matched
661 ref_chisq[index_row] = chisq_sum
662 idx_match_select = found[idx_chisq_min]
663 row_target = target.extras.indices[idx_match_select]
664 ref_row_match[index_row] = row_target
666 target_row_match[row_target] = index_row
667 matched_target.add(idx_match_select)
669 if logging_n_rows
and ((index_n + 1) % logging_n_rows == 0):
670 t_elapsed = time.process_time() - t_begin
672 'Processed %d/%d in %.2fs at sort value=%.3f',
673 index_n + 1, n_ref_select, t_elapsed, column_order[order[index_n]],
677 'match_candidate': ref_candidate_match,
678 'match_row': ref_row_match,
679 'match_count': ref_match_count,
680 'match_chisq': ref_chisq,
681 'match_n_chisq_finite': ref_match_meas_finite,
684 'match_candidate': target.extras.select
if target.extras.select
is not None else (
685 np.ones(target.extras.n, dtype=bool)),
686 'match_row': target_row_match,
689 for (columns, out_original, out_matched, in_original, in_matched, matches, name_cat)
in (
691 self.
config.columns_ref_copy,
700 self.
config.columns_target_copy,
709 matched = matches >= 0
710 idx_matched = matches[matched]
711 logger.info(
'Matched %d/%d %s sources', np.sum(matched), len(matched), name_cat)
713 for column
in columns:
714 values = in_original.catalog[column]
715 out_original[column] = values
716 dtype = in_original.catalog[column].dtype
720 types = list(set((type(x)
for x
in values)))
722 raise RuntimeError(f
'Column {column} dtype={dtype} has multiple types={types}')
729 dtype = f
'<U{max(len(x) for x in values)}'
731 column_match = np.full(in_matched.extras.n, value_fill, dtype=dtype)
732 column_match[matched] = in_original.catalog[column][idx_matched]
733 out_matched[f
'match_{column}'] = column_match
736 'Completed match disambiguating in %.2fs (total %.2fs)',
737 time.process_time() - t_begin,
738 time.process_time() - t_init,
741 catalog_out_ref = astropy.table.Table(data_ref)
742 for column, description
in {
743 'match_candidate':
'Whether the object was selected as a candidate for matching',
744 'match_row':
'The index of the best matched row in the target table, if any',
745 'match_count':
'The number of candidate matching target objects, i.e. those within the match'
746 ' distance but excluding objects already matched to a ref object',
747 'match_chisq':
'The sum of all finite reduced chi-squared values over all match columns',
748 'match_n_chisq_finite':
'The number of match columns with finite chisq',
750 catalog_out_ref[column].description = description
751 catalog_out_target = astropy.table.Table(data_target)
753 return catalog_out_ref, catalog_out_target, exceptions