doxygen/xlink_master_2021_04_01_08.31.19/multi_match_8py_source.html

 # This file is part of afw.

 #

 # Developed for the LSST Data Management System.

 # This product includes software developed by the LSST Project

 # (https://www.lsst.org).

 # See the COPYRIGHT file at the top-level directory of this distribution

 # for details of code ownership.

 #

 # This program is free software: you can redistribute it and/or modify

 # it under the terms of the GNU General Public License as published by

 # the Free Software Foundation, either version 3 of the License, or

 # (at your option) any later version.

 #

 # This program is distributed in the hope that it will be useful,

 # but WITHOUT ANY WARRANTY; without even the implied warranty of

 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 # GNU General Public License for more details.

 #

 # You should have received a copy of the GNU General Public License

 # along with this program.  If not, see <https://www.gnu.org/licenses/>.

 import collections.abc


 import numpy


 import lsst.geom

 from ._schemaMapper import SchemaMapper

 from ._table import CoordKey, SourceRecord


 class MultiMatch:

     """Initialize a multi-catalog match.


     Parameters

     ----------

     schema : `lsst.afw.table.Schema`

         Schema shared by all catalogs to be included in the match.

     dataIdFormat : `dict`

         Set of name: type for all data ID keys (e.g. {"visit":int,

         "ccd":int}).

     coordField : `str`, optional

         Prefix for _ra and _dec fields that contain the

         coordinates to use for the match.

     idField : `str`, optional

         Name of the field in schema that contains unique object

         IDs.

     radius : `lsst.geom.Angle`, optional

         Maximum separation for a match.  Defaults to 0.5 arcseconds.

     RecordClass : `lsst.afw.table.BaseRecord`

         Type of record to expect in catalogs to be matched.

     """


     def __init__(self, schema, dataIdFormat, coordField="coord", idField="id", radius=None,

                  RecordClass=SourceRecord):

         if radius is None:

             radius = 0.5*lsst.geom.arcseconds

         elif not isinstance(radius, lsst.geom.Angle):

             raise ValueError("'radius' argument must be an Angle")

         self.radiusradius = radius

         self.mappermapper = SchemaMapper(schema)

         self.mappermapper.addMinimalSchema(schema, True)

         self.coordKeycoordKey = CoordKey(schema[coordField])

         self.idKeyidKey = schema.find(idField).key

         self.dataIdKeysdataIdKeys = {}

         outSchema = self.mappermapper.editOutputSchema()

         outSchema.setAliasMap(self.mappermapper.getInputSchema().getAliasMap())

         self.objectKeyobjectKey = outSchema.addField(

             "object", type=numpy.int64, doc="Unique ID for joined sources")

         for name, dataType in dataIdFormat.items():

             self.dataIdKeysdataIdKeys[name] = outSchema.addField(

                 name, type=dataType, doc="'%s' data ID component")

         # self.result will be a catalog containing the union of all matched records, with an 'object' ID

         # column that can be used to group matches.  Sources that have ambiguous matches may appear

         # multiple times.

         self.resultresult = None

         # self.reference will be a subset of self.result, with exactly one record for each group of matches

         # (we'll use the one from the first catalog matched into this group)

         # We'll use this to match against each subsequent catalog.

         self.referencereference = None

         # A set of ambiguous objects that we may want to ultimately remove from

         # the final merged catalog.

         self.ambiguousambiguous = set()

         # Table used to allocate new records for the ouput catalog.

         self.tabletable = RecordClass.Table.make(self.mappermapper.getOutputSchema())

         # Counter used to assign the next object ID

         self.nextObjIdnextObjId = 1


     def makeRecord(self, inputRecord, dataId, objId):

         """Create a new result record from the given input record, using the

         given data ID and object ID to fill in additional columns.


         Parameters

         ----------

         inputRecord : `lsst.afw.table.source.sourceRecord`

             Record to use as the reference for the new result.

         dataId : `DataId` or `dict`

             Data id describing the data.

         objId : `int`

             Object id of the object to be added.


         Returns

         -------

         outputRecord : `lsst.afw.table.source.sourceRecord`

             Newly generated record.

         """

         outputRecord = self.tabletable.copyRecord(inputRecord, self.mappermapper)

         for name, key in self.dataIdKeysdataIdKeys.items():

             outputRecord.set(key, dataId[name])

         outputRecord.set(self.objectKeyobjectKey, objId)

         return outputRecord


     def add(self, catalog, dataId):

         """Add a new catalog to the match, corresponding to the given data ID.

         The new catalog is appended to the `self.result` and

         `self.reference` catalogs.


         Parameters

         ----------

         catalog : `lsst.afw.table.base.Catalog`

             Catalog to be added to the match result.

         dataId : `DataId` or `dict`

             Data id for the catalog to be added.

         """

         if self.resultresult is None:

             self.resultresult = self.tabletable.Catalog(self.tabletable)

             for record in catalog:

                 self.resultresult.append(self.makeRecordmakeRecord(

                     record, dataId, objId=self.nextObjIdnextObjId))

                 self.nextObjIdnextObjId += 1

             self.referencereference = self.resultresult.copy(deep=False)

             return

         catalog.sort(self.idKeyidKey)  # pre-sort for speedy by-id access later.

         # Will remove from this set as objects are matched.

         unmatchedIds = {record.get(self.idKeyidKey) for record in catalog}

         # Temporary dict mapping new source ID to a set of associated objects.

         newToObj = {}

         matches = lsst.afw.table.matchRaDec(self.referencereference, catalog, self.radiusradius)

         matchedRefIds = set()

         matchedCatIds = set()

         for refRecord, newRecord, distance in matches:

             objId = refRecord.get(self.objectKeyobjectKey)

             if objId in matchedRefIds:

                 # We've already matched this object against another new source,

                 # mark it as ambiguous.

                 self.ambiguousambiguous.add(objId)

             matchedRefIds.add(objId)

             if newRecord.get(self.idKeyidKey) in matchedCatIds:

                 # We've already matched this new source to one or more other objects

                 # Mark all involved objects as ambiguous

                 self.ambiguousambiguous.add(objId)

                 self.ambiguousambiguous |= newToObj.get(newRecord.get(self.idKeyidKey), set())

             matchedCatIds.add(newRecord.get(self.idKeyidKey))

             unmatchedIds.discard(newRecord.get(self.idKeyidKey))

             # Populate the newToObj dict (setdefault trick is an idiom for

             # appending to a dict-of-sets)

             newToObj.setdefault(newRecord.get(self.idKeyidKey), set()).add(objId)

             # Add a new result record for this match.

             self.resultresult.append(self.makeRecordmakeRecord(newRecord, dataId, objId))

         # Add any unmatched sources from the new catalog as new objects to both

         # the joined result catalog and the reference catalog.

         for objId in unmatchedIds:

             newRecord = catalog.find(objId, self.idKeyidKey)

             resultRecord = self.makeRecordmakeRecord(newRecord, dataId, self.nextObjIdnextObjId)

             self.nextObjIdnextObjId += 1

             self.resultresult.append(resultRecord)

             self.referencereference.append(resultRecord)


     def finish(self, removeAmbiguous=True):

         """Return the final match catalog, after sorting it by object, copying

         it to ensure contiguousness, and optionally removing ambiguous

         matches.


         After calling finish(), the in-progress state of the matcher

         is returned to the state it was just after construction, with

         the exception of the object ID counter (which is not reset).


         Parameters

         ----------

         removeAmbiguous : `bool`, optional

             Should ambiguous matches be removed from the match

             catalog?  Defaults to True.


         Returns

         -------

         result : `lsst.afw.table.base.Catalog`

             Final match catalog, sorted by object.

         """

         if removeAmbiguous:

             result = self.tabletable.Catalog(self.tabletable)

             for record in self.resultresult:

                 if record.get(self.objectKeyobjectKey) not in self.ambiguousambiguous:

                     result.append(record)

         else:

             result = self.resultresult

         result.sort(self.objectKeyobjectKey)

         result = result.copy(deep=True)

         self.resultresult = None

         self.referencereference = None

         self.ambiguousambiguous = set()

         return result


 class GroupView(collections.abc.Mapping):

     """A mapping (i.e. dict-like object) that provides convenient

     operations on the concatenated catalogs returned by a MultiMatch

     object.


     A GroupView provides access to a catalog of grouped objects, in

     which the grouping is indicated by a field for which all records

     in a group have the same value.  Once constructed, it allows

     operations similar to those supported by SQL "GROUP BY", such as

     filtering and aggregate calculation.


     Parameters

     ----------

     schema : `lsst.afw.table.Schema`

         Catalog schema to use for the grouped object catalog.

     ids : `List`

         List of identifying keys for the groups in the catalog.

     groups : `List`

         List of catalog subsets associated with each key in ids.

     """


     @classmethod

     def build(cls, catalog, groupField="object"):

         """Construct a GroupView from a concatenated catalog.


         Parameters

         ----------

         catalog : `lsst.afw.table.base.Catalog`

             Input catalog, containing records grouped by a field in

             which all records in the same group have the same value.

             Must be sorted by the group field.

         groupField : `str`, optional

             Name or Key for the field that indicates groups.  Defaults

             to "object".


         Returns

         -------

         groupCatalog : `lsst.afw.table.multiMatch.GroupView`

             Constructed GroupView from the input concatenated catalog.

         """

         groupKey = catalog.schema.find(groupField).key

         ids, indices = numpy.unique(catalog.get(groupKey), return_index=True)

         groups = numpy.zeros(len(ids), dtype=object)

         ends = list(indices[1:]) + [len(catalog)]

         for n, (i1, i2) in enumerate(zip(indices, ends)):

             groups[n] = catalog[i1:i2]

             assert (groups[n].get(groupKey) == ids[n]).all()

         return cls(catalog.schema, ids, groups)


     def __init__(self, schema, ids, groups):

         self.schemaschema = schema

         self.idsids = ids

         self.groupsgroups = groups

         self.countcount = sum(len(cat) for cat in self.groupsgroups)


     def __len__(self):

         return len(self.idsids)


     def __iter__(self):

         return iter(self.idsids)


     def __getitem__(self, key):

         index = numpy.searchsorted(self.idsids, key)

         if self.idsids[index] != key:

             raise KeyError("Group with ID {0} not found".format(key))

         return self.groupsgroups[index]


     def where(self, predicate):

         """Return a new GroupView that contains only groups for which the

         given predicate function returns True.


         The predicate function is called once for each group, and

         passed a single argument: the subset catalog for that group.


         Parameters

         ----------

         predicate :

             Function to identify which groups should be included in

             the output.


         Returns

         -------

         outGroupView : `lsst.afw.table.multiMatch.GroupView`

             Subset GroupView containing only groups that match the

             predicate.

         """

         mask = numpy.zeros(len(self), dtype=bool)

         for i in range(len(self)):

             mask[i] = predicate(self.groupsgroups[i])

         return type(self)(self.schemaschema, self.idsids[mask], self.groupsgroups[mask])


     def aggregate(self, function, field=None, dtype=float):

         """Run an aggregate function on each group, returning an array with

         one element for each group.


         Parameters

         ----------

         function :

             Callable object that computes the aggregate value.  If

             `field` is None, called with the entire subset catalog as an

             argument.  If `field` is not None, called with an array view

             into that field.

         field : `str`, optional

             A string name or Key object that indicates a single field the aggregate

             is computed over.

         dtype :

             Data type of the output array.


         Returns

         -------

         result : Array of `dtype`

             Aggregated values for each group.

         """

         result = numpy.zeros(len(self), dtype=dtype)

         if field is not None:

             key = self.schemaschema.find(field).key


             def f(cat):

                 return function(cat.get(key))

         else:

             f = function

         for i in range(len(self)):

             result[i] = f(self.groupsgroups[i])

         return result


     def apply(self, function, field=None, dtype=float):

         """Run a non-aggregate function on each group, returning an array with

         one element for each record.


         Parameters

         ----------

         function :

             Callable object that computes the aggregate value.  If field is None,

             called with the entire subset catalog as an argument.  If field is not

             None, called with an array view into that field.

         field : `str`

             A string name or Key object that indicates a single field the aggregate

             is computed over.

         dtype :

             Data type for the output array.


         Returns

         -------

         result : `numpy.array` of `dtype`

             Result of the function calculated on an element-by-element basis.

         """

         result = numpy.zeros(self.countcount, dtype=dtype)

         if field is not None:

             key = self.schemaschema.find(field).key


             def f(cat):

                 return function(cat.get(key))

         else:

             f = function

         last = 0

         for i in range(len(self)):

             next = last + len(self.groupsgroups[i])

             result[last:next] = f(self.groupsgroups[i])

             last = next

         return result

items
std::vector< SchemaItem< Flag > > * items
Definition: BaseColumnView.cc:142

type
table::Key< int > type
Definition: Detector.cc:163

lsst::afw::table._base.Catalog
Definition: _base.py:88

lsst::afw::table._schemaMapper.SchemaMapper
Definition: _schemaMapper.py:32

lsst::afw::table.multiMatch.GroupView
Definition: multiMatch.py:202

lsst::afw::table.multiMatch.GroupView.where
def where(self, predicate)
Definition: multiMatch.py:269

lsst::afw::table.multiMatch.GroupView.groups
groups
Definition: multiMatch.py:254

lsst::afw::table.multiMatch.GroupView.ids
ids
Definition: multiMatch.py:253

lsst::afw::table.multiMatch.GroupView.__len__
def __len__(self)
Definition: multiMatch.py:257

lsst::afw::table.multiMatch.GroupView.aggregate
def aggregate(self, function, field=None, dtype=float)
Definition: multiMatch.py:293

lsst::afw::table.multiMatch.GroupView.__iter__
def __iter__(self)
Definition: multiMatch.py:260

lsst::afw::table.multiMatch.GroupView.build
def build(cls, catalog, groupField="object")
Definition: multiMatch.py:224

lsst::afw::table.multiMatch.GroupView.__init__
def __init__(self, schema, ids, groups)
Definition: multiMatch.py:251

lsst::afw::table.multiMatch.GroupView.count
count
Definition: multiMatch.py:255

lsst::afw::table.multiMatch.GroupView.schema
schema
Definition: multiMatch.py:252

lsst::afw::table.multiMatch.GroupView.apply
def apply(self, function, field=None, dtype=float)
Definition: multiMatch.py:327

lsst::afw::table.multiMatch.GroupView.__getitem__
def __getitem__(self, key)
Definition: multiMatch.py:263

lsst::afw::table.multiMatch.MultiMatch
Definition: multiMatch.py:30

lsst::afw::table.multiMatch.MultiMatch.mapper
mapper
Definition: multiMatch.py:59

lsst::afw::table.multiMatch.MultiMatch.__init__
def __init__(self, schema, dataIdFormat, coordField="coord", idField="id", radius=None, RecordClass=SourceRecord)
Definition: multiMatch.py:53

lsst::afw::table.multiMatch.MultiMatch.add
def add(self, catalog, dataId)
Definition: multiMatch.py:111

lsst::afw::table.multiMatch.MultiMatch.table
table
Definition: multiMatch.py:83

lsst::afw::table.multiMatch.MultiMatch.dataIdKeys
dataIdKeys
Definition: multiMatch.py:63

lsst::afw::table.multiMatch.MultiMatch.radius
radius
Definition: multiMatch.py:58

lsst::afw::table.multiMatch.MultiMatch.finish
def finish(self, removeAmbiguous=True)
Definition: multiMatch.py:167

lsst::afw::table.multiMatch.MultiMatch.makeRecord
def makeRecord(self, inputRecord, dataId, objId)
Definition: multiMatch.py:87

lsst::afw::table.multiMatch.MultiMatch.objectKey
objectKey
Definition: multiMatch.py:66

lsst::afw::table.multiMatch.MultiMatch.coordKey
coordKey
Definition: multiMatch.py:61

lsst::afw::table.multiMatch.MultiMatch.idKey
idKey
Definition: multiMatch.py:62

lsst::afw::table.multiMatch.MultiMatch.reference
reference
Definition: multiMatch.py:78

lsst::afw::table.multiMatch.MultiMatch.ambiguous
ambiguous
Definition: multiMatch.py:81

lsst::afw::table.multiMatch.MultiMatch.nextObjId
nextObjId
Definition: multiMatch.py:85

lsst::afw::table.multiMatch.MultiMatch.result
result
Definition: multiMatch.py:74

lsst::geom::Angle
A class representing an angle.
Definition: Angle.h:127

list
daf::base::PropertyList * list
Definition: fits.cc:913

set
daf::base::PropertySet * set
Definition: fits.cc:912

ast::append
std::shared_ptr< FrameSet > append(FrameSet const &first, FrameSet const &second)
Construct a FrameSet that performs two transformations in series.
Definition: functional.cc:33

astshim.fitsChanContinued.iter
def iter(self)
Definition: fitsChanContinued.py:88

collections.abc

lsst::afw::table::matchRaDec
std::vector< Match< typename Cat1::Record, typename Cat2::Record > > matchRaDec(Cat1 const &cat1, Cat2 const &cat2, lsst::geom::Angle radius, MatchControl const &mc=MatchControl())
Compute all tuples (s1,s2,d) where s1 belings to cat1, s2 belongs to cat2 and d, the distance between...
Definition: Match.cc:158

lsst::geom
Definition: AffineTransform.h:36

lsst::geom::all
bool all(CoordinateExpr< N > const &expr) noexcept
Return true if all elements are true.
Definition: CoordinateExpr.h:81

lsst.pex.config.history.format
def format(config, name=None, writeSourceLine=True, prefix="", verbose=False)
Definition: history.py:174

lsst.pipe.tasks.multiBandUtils.getInputSchema
def getInputSchema(task, butler=None, schema=None)
Obtain the input schema either directly or froma butler reference.
Definition: multiBandUtils.py:146