21 import collections.abc
26 from ._schemaMapper
import SchemaMapper
27 from ._table
import CoordKey, SourceRecord
31 """Initialize a multi-catalog match.
35 schema : `lsst.afw.table.Schema`
36 Schema shared by all catalogs to be included in the match.
38 Set of name: type for all data ID keys (e.g. {"visit":int,
40 coordField : `str`, optional
41 Prefix for _ra and _dec fields that contain the
42 coordinates to use for the match.
43 idField : `str`, optional
44 Name of the field in schema that contains unique object
46 radius : `lsst.geom.Angle`, optional
47 Maximum separation for a match. Defaults to 0.5 arcseconds.
48 RecordClass : `lsst.afw.table.BaseRecord`
49 Type of record to expect in catalogs to be matched.
52 def __init__(self, schema, dataIdFormat, coordField="coord", idField="id", radius=None,
53 RecordClass=SourceRecord):
55 radius = 0.5*lsst.geom.arcseconds
57 raise ValueError(
"'radius' argument must be an Angle")
60 self.
mapper.addMinimalSchema(schema,
True)
62 self.
idKey = schema.find(idField).key
64 outSchema = self.
mapper.editOutputSchema()
67 "object", type=numpy.int64, doc=
"Unique ID for joined sources")
68 for name, dataType
in dataIdFormat.items():
70 name, type=dataType, doc=
"'%s' data ID component")
83 self.
table = RecordClass.Table.make(self.
mapper.getOutputSchema())
88 """Create a new result record from the given input record, using the
89 given data ID and object ID to fill in additional columns.
93 inputRecord : `lsst.afw.table.source.sourceRecord`
94 Record to use as the reference for the new result.
95 dataId : `DataId` or `dict`
96 Data id describing the data.
98 Object id of the object to be added.
102 outputRecord : `lsst.afw.table.source.sourceRecord`
103 Newly generated record.
105 outputRecord = self.
table.copyRecord(inputRecord, self.
mapper)
107 outputRecord.set(key, dataId[name])
111 def add(self, catalog, dataId):
112 """Add a new catalog to the match, corresponding to the given data ID.
113 The new catalog is appended to the `self.result` and
114 `self.reference` catalogs.
118 catalog : `lsst.afw.table.base.Catalog`
119 Catalog to be added to the match result.
120 dataId : `DataId` or `dict`
121 Data id for the catalog to be added.
125 for record
in catalog:
131 catalog.sort(self.
idKey)
133 unmatchedIds = {record.get(self.
idKey)
for record
in catalog}
137 matchedRefIds =
set()
138 matchedCatIds =
set()
139 for refRecord, newRecord, distance
in matches:
141 if objId
in matchedRefIds:
145 matchedRefIds.add(objId)
146 if newRecord.get(self.
idKey)
in matchedCatIds:
151 matchedCatIds.add(newRecord.get(self.
idKey))
152 unmatchedIds.discard(newRecord.get(self.
idKey))
155 newToObj.setdefault(newRecord.get(self.
idKey),
set()).
add(objId)
160 for objId
in unmatchedIds:
161 newRecord = catalog.find(objId, self.
idKey)
168 """Return the final match catalog, after sorting it by object, copying
169 it to ensure contiguousness, and optionally removing ambiguous
172 After calling finish(), the in-progress state of the matcher
173 is returned to the state it was just after construction, with
174 the exception of the object ID counter (which is not reset).
178 removeAmbiguous : `bool`, optional
179 Should ambiguous matches be removed from the match
180 catalog? Defaults to True.
184 result : `lsst.afw.table.base.Catalog`
185 Final match catalog, sorted by object.
189 for record
in self.
result:
191 result.append(record)
195 result = result.copy(deep=
True)
203 """A mapping (i.e. dict-like object) that provides convenient
204 operations on the concatenated catalogs returned by a MultiMatch
207 A GroupView provides access to a catalog of grouped objects, in
208 which the grouping is indicated by a field for which all records
209 in a group have the same value. Once constructed, it allows
210 operations similar to those supported by SQL "GROUP BY", such as
211 filtering and aggregate calculation.
215 schema : `lsst.afw.table.Schema`
216 Catalog schema to use for the grouped object catalog.
218 List of identifying keys for the groups in the catalog.
220 List of catalog subsets associated with each key in ids.
224 def build(cls, catalog, groupField="object"):
225 """Construct a GroupView from a concatenated catalog.
229 catalog : `lsst.afw.table.base.Catalog`
230 Input catalog, containing records grouped by a field in
231 which all records in the same group have the same value.
232 Must be sorted by the group field.
233 groupField : `str`, optional
234 Name or Key for the field that indicates groups. Defaults
239 groupCatalog : `lsst.afw.table.multiMatch.GroupView`
240 Constructed GroupView from the input concatenated catalog.
242 groupKey = catalog.schema.find(groupField).key
243 ids, indices = numpy.unique(catalog.get(groupKey), return_index=
True)
244 groups = numpy.zeros(len(ids), dtype=object)
245 ends =
list(indices[1:]) + [len(catalog)]
246 for n, (i1, i2)
in enumerate(zip(indices, ends)):
248 groups[n] = catalog[int(i1):int(i2)]
249 assert (groups[n].get(groupKey) == ids[n]).
all()
250 return cls(catalog.schema, ids, groups)
265 index = numpy.searchsorted(self.
ids, key)
266 if self.
ids[index] != key:
267 raise KeyError(
"Group with ID {0} not found".
format(key))
271 """Return a new GroupView that contains only groups for which the
272 given predicate function returns True.
274 The predicate function is called once for each group, and
275 passed a single argument: the subset catalog for that group.
280 Function to identify which groups should be included in
285 outGroupView : `lsst.afw.table.multiMatch.GroupView`
286 Subset GroupView containing only groups that match the
289 mask = numpy.zeros(len(self), dtype=bool)
290 for i
in range(len(self)):
291 mask[i] = predicate(self.
groups[i])
295 """Run an aggregate function on each group, returning an array with
296 one element for each group.
301 Callable object that computes the aggregate value. If
302 `field` is None, called with the entire subset catalog as an
303 argument. If `field` is not None, called with an array view
305 field : `str`, optional
306 A string name or Key object that indicates a single field the aggregate
309 Data type of the output array.
313 result : Array of `dtype`
314 Aggregated values for each group.
316 result = numpy.zeros(len(self), dtype=dtype)
317 if field
is not None:
318 key = self.
schema.find(field).key
324 for i
in range(len(self)):
325 result[i] = f(self.
groups[i])
328 def apply(self, function, field=None, dtype=float):
329 """Run a non-aggregate function on each group, returning an array with
330 one element for each record.
335 Callable object that computes the aggregate value. If field is None,
336 called with the entire subset catalog as an argument. If field is not
337 None, called with an array view into that field.
339 A string name or Key object that indicates a single field the aggregate
342 Data type for the output array.
346 result : `numpy.array` of `dtype`
347 Result of the function calculated on an element-by-element basis.
349 result = numpy.zeros(self.
count, dtype=dtype)
350 if field
is not None:
351 key = self.
schema.find(field).key
358 for i
in range(len(self)):
359 next = last + len(self.
groups[i])
360 result[last:next] = f(self.
groups[i])