LSSTApplications  16.0-11-g09ed895+2,16.0-11-g12e47bd,16.0-11-g9bb73b2+6,16.0-12-g5c924a4+6,16.0-14-g9a974b3+1,16.0-15-g1417920+1,16.0-15-gdd5ca33+1,16.0-16-gf0259e2,16.0-17-g31abd91+7,16.0-17-g7d7456e+7,16.0-17-ga3d2e9f+13,16.0-18-ga4d4bcb+1,16.0-18-gd06566c+1,16.0-2-g0febb12+21,16.0-2-g9d5294e+69,16.0-2-ga8830df+6,16.0-20-g21842373+7,16.0-24-g3eae5ec,16.0-28-gfc9ea6c+4,16.0-29-ge8801f9,16.0-3-ge00e371+34,16.0-4-g18f3627+13,16.0-4-g5f3a788+20,16.0-4-ga3eb747+10,16.0-4-gabf74b7+29,16.0-4-gb13d127+6,16.0-49-g42e581f7+6,16.0-5-g27fb78a+7,16.0-5-g6a53317+34,16.0-5-gb3f8a4b+87,16.0-6-g9321be7+4,16.0-6-gcbc7b31+42,16.0-6-gf49912c+29,16.0-7-gd2eeba5+51,16.0-71-ge89f8615e,16.0-8-g21fd5fe+29,16.0-8-g3a9f023+20,16.0-8-g4734f7a+1,16.0-8-g5858431+3,16.0-9-gf5c1f43+8,master-gd73dc1d098+1,w.2019.01
LSSTDataManagementBasePackage
multiMatch.py
Go to the documentation of this file.
1 import collections
2 
3 import numpy
4 
5 import lsst.geom
6 from .schemaMapper import SchemaMapper
7 from .aggregates import CoordKey
8 from .source import SourceRecord
9 
10 
11 class MultiMatch:
12 
13  def __init__(self, schema, dataIdFormat, coordField="coord", idField="id", radius=None,
14  RecordClass=SourceRecord):
15  """Initialize a multi-catalog match.
16 
17  Arguments:
18  schema -------- schema shared by all catalogs to be included in the match.
19  dataIdFormat -- dict of name: type for all data ID keys (e.g. {"visit":int, "ccd":int}).
20  coordField ---- prefix for _ra and _dec fields that contain the coordinates to use for the match.
21  idField ------- name of the field in schema that contains unique object IDs.
22  radius -------- lsst.geom.Angle; maximum separation for a match. Defaults to 0.5 arcseconds.
23  RecordClass --- type of record (a subclass of lsst.afw.table.BaseRecord) to expect in catalogs
24  to be matched.
25  """
26  if radius is None:
27  radius = 0.5*lsst.geom.arcseconds
28  elif not isinstance(radius, lsst.geom.Angle):
29  raise ValueError("'radius' argument must be an Angle")
30  self.radius = radius
31  self.mapper = SchemaMapper(schema)
32  self.mapper.addMinimalSchema(schema, True)
33  self.coordKey = CoordKey(schema[coordField])
34  self.idKey = schema.find(idField).key
35  self.dataIdKeys = {}
36  outSchema = self.mapper.editOutputSchema()
37  self.objectKey = outSchema.addField(
38  "object", type=numpy.int64, doc="Unique ID for joined sources")
39  for name, dataType in dataIdFormat.items():
40  self.dataIdKeys[name] = outSchema.addField(
41  name, type=dataType, doc="'%s' data ID component")
42  # self.result will be a catalog containing the union of all matched records, with an 'object' ID
43  # column that can be used to group matches. Sources that have ambiguous matches may appear
44  # multiple times.
45  self.result = None
46  # self.reference will be a subset of self.result, with exactly one record for each group of matches
47  # (we'll use the one from the first catalog matched into this group)
48  # We'll use this to match against each subsequent catalog.
49  self.reference = None
50  # A set of ambiguous objects that we may want to ultimately remove from
51  # the final merged catalog.
52  self.ambiguous = set()
53  # Table used to allocate new records for the ouput catalog.
54  self.table = RecordClass.Table.make(self.mapper.getOutputSchema())
55  # Counter used to assign the next object ID
56  self.nextObjId = 1
57 
58  def makeRecord(self, inputRecord, dataId, objId):
59  """Create a new result record from the given input record, using the given data ID and object ID
60  to fill in additional columns."""
61  outputRecord = self.table.copyRecord(inputRecord, self.mapper)
62  for name, key in self.dataIdKeys.items():
63  outputRecord.set(key, dataId[name])
64  outputRecord.set(self.objectKey, objId)
65  return outputRecord
66 
67  def add(self, catalog, dataId):
68  """Add a new catalog to the match, corresponding to the given data ID.
69  """
70  if self.result is None:
71  self.result = self.table.Catalog(self.table)
72  for record in catalog:
73  self.result.append(self.makeRecord(
74  record, dataId, objId=self.nextObjId))
75  self.nextObjId += 1
76  self.reference = self.result.copy(deep=False)
77  return
78  catalog.sort(self.idKey) # pre-sort for speedy by-id access later.
79  # Will remove from this set as objects are matched.
80  unmatchedIds = {record.get(self.idKey) for record in catalog}
81  # Temporary dict mapping new source ID to a set of associated objects.
82  newToObj = {}
83  matches = lsst.afw.table.matchRaDec(self.reference, catalog, self.radius)
84  matchedRefIds = set()
85  matchedCatIds = set()
86  for refRecord, newRecord, distance in matches:
87  objId = refRecord.get(self.objectKey)
88  if objId in matchedRefIds:
89  # We've already matched this object against another new source,
90  # mark it as ambiguous.
91  self.ambiguous.add(objId)
92  matchedRefIds.add(objId)
93  if newRecord.get(self.idKey) in matchedCatIds:
94  # We've already matched this new source to one or more other objects
95  # Mark all involved objects as ambiguous
96  self.ambiguous.add(objId)
97  self.ambiguous |= newToObj.get(newRecord.get(self.idKey), set())
98  matchedCatIds.add(newRecord.get(self.idKey))
99  unmatchedIds.discard(newRecord.get(self.idKey))
100  # Populate the newToObj dict (setdefault trick is an idiom for
101  # appending to a dict-of-sets)
102  newToObj.setdefault(newRecord.get(self.idKey), set()).add(objId)
103  # Add a new result record for this match.
104  self.result.append(self.makeRecord(newRecord, dataId, objId))
105  # Add any unmatched sources from the new catalog as new objects to both
106  # the joined result catalog and the reference catalog.
107  for objId in unmatchedIds:
108  newRecord = catalog.find(objId, self.idKey)
109  resultRecord = self.makeRecord(newRecord, dataId, self.nextObjId)
110  self.nextObjId += 1
111  self.result.append(resultRecord)
112  self.reference.append(resultRecord)
113 
114  def finish(self, removeAmbiguous=True):
115  """Return the final match catalog, after sorting it by object, copying it to ensure contiguousness,
116  and optionally removing ambiguous matches.
117 
118  After calling finish(), the in-progress state of the matcher is returned to the state it was
119  just after construction, with the exception of the object ID counter (which is not reset).
120  """
121  if removeAmbiguous:
122  result = self.table.Catalog(self.table)
123  for record in self.result:
124  if record.get(self.objectKey) not in self.ambiguous:
125  result.append(record)
126  else:
127  result = self.result
128  result.sort(self.objectKey)
129  result = result.copy(deep=True)
130  self.result = None
131  self.reference = None
132  self.ambiguous = set()
133  return result
134 
135 
136 class GroupView(collections.Mapping):
137  """A mapping (i.e. dict-like object) that provides convenient operations on the concatenated
138  catalogs returned by a MultiMatch object.
139 
140  A GroupView provides access to a catalog of grouped objects, in which the grouping is indicated by
141  a field for which all records in a group have the same value. Once constructed, it allows operations
142  similar to those supported by SQL "GROUP BY", such as filtering and aggregate calculation.
143  """
144 
145  @classmethod
146  def build(cls, catalog, groupField="object"):
147  """!Construct a GroupView from a concatenated catalog.
148 
149  @param[in] cls (Class; omit this argument, but Doxygen wants it mentioned)
150  @param[in] catalog Input catalog, containing records grouped by a field in which all records
151  in the same group have the same value. Must be sorted by the group field.
152  @param[in] groupField Name or Key for the field that indicates groups.
153  """
154  groupKey = catalog.schema.find(groupField).key
155  ids, indices = numpy.unique(catalog.get(groupKey), return_index=True)
156  groups = numpy.zeros(len(ids), dtype=object)
157  ends = list(indices[1:]) + [len(catalog)]
158  for n, (i1, i2) in enumerate(zip(indices, ends)):
159  # casts are a work-around for DM-8557
160  groups[n] = catalog[int(i1):int(i2)]
161  assert (groups[n].get(groupKey) == ids[n]).all()
162  return cls(catalog.schema, ids, groups)
163 
164  def __init__(self, schema, ids, groups):
165  """Direct constructor; most users should call build() instead.
166 
167  This constructor takes the constituent arrays of the object directly, to allow multiple
168  methods for construction.
169  """
170  self.schema = schema
171  self.ids = ids
172  self.groups = groups
173  self.count = sum(len(cat) for cat in self.groups)
174 
175  def __len__(self):
176  """Return the number of groups"""
177  return len(self.ids)
178 
179  def __iter__(self):
180  """Iterate over group field values"""
181  return self.ids
182 
183  def __getitem__(self, key):
184  """Return the catalog subset that corresponds to an group field value"""
185  index = numpy.searchsorted(self.ids, key)
186  if self.ids[index] != key:
187  raise KeyError("Group with ID {0} not found".format(key))
188  return self.groups[index]
189 
190  def where(self, predicate):
191  """Return a new GroupView that contains only groups for which the given predicate function
192  returns True.
193 
194  The predicate function is called once for each group, and passed a single argument: the subset
195  catalog for that group.
196  """
197  mask = numpy.zeros(len(self), dtype=bool)
198  for i in range(len(self)):
199  mask[i] = predicate(self.groups[i])
200  return type(self)(self.schema, self.ids[mask], self.groups[mask])
201 
202  def aggregate(self, function, field=None, dtype=float):
203  """!Run an aggregate function on each group, returning an array with one element for each group.
204 
205  @param[in] function Callable object that computes the aggregate value. If field is None,
206  called with the entire subset catalog as an argument. If field is not
207  None, called with an array view into that field.
208  @param[in] field A string name or Key object that indicates a single field the aggregate
209  is computed over.
210  @param[in] dtype Data type for the output array.
211  """
212  result = numpy.zeros(len(self), dtype=dtype)
213  if field is not None:
214  key = self.schema.find(field).key
215 
216  def f(cat):
217  return function(cat.get(key))
218  else:
219  f = function
220  for i in range(len(self)):
221  result[i] = f(self.groups[i])
222  return result
223 
224  def apply(self, function, field=None, dtype=float):
225  """!Run a non-aggregate function on each group, returning an array with one element for each record.
226 
227  @param[in] function Callable object that computes the aggregate value. If field is None,
228  called with the entire subset catalog as an argument. If field is not
229  None, called with an array view into that field.
230  @param[in] field A string name or Key object that indicates a single field the aggregate
231  is computed over.
232  @param[in] dtype Data type for the output array.
233  """
234  result = numpy.zeros(self.count, dtype=dtype)
235  if field is not None:
236  key = self.schema.find(field).key
237 
238  def f(cat):
239  return function(cat.get(key))
240  else:
241  f = function
242  last = 0
243  for i in range(len(self)):
244  next = last + len(self.groups[i])
245  result[last:next] = f(self.groups[i])
246  last = next
247  return result
def __init__(self, schema, dataIdFormat, coordField="coord", idField="id", radius=None, RecordClass=SourceRecord)
Definition: multiMatch.py:14
def build(cls, catalog, groupField="object")
Construct a GroupView from a concatenated catalog.
Definition: multiMatch.py:146
std::shared_ptr< FrameSet > append(FrameSet const &first, FrameSet const &second)
Construct a FrameSet that performs two transformations in series.
Definition: functional.cc:33
daf::base::PropertySet * set
Definition: fits.cc:832
A class representing an angle.
Definition: Angle.h:127
def apply(self, function, field=None, dtype=float)
Run a non-aggregate function on each group, returning an array with one element for each record...
Definition: multiMatch.py:224
template SourceMatchVector matchRaDec(SourceCatalog const &, lsst::geom::Angle, MatchControl const &)
def makeRecord(self, inputRecord, dataId, objId)
Definition: multiMatch.py:58
def __init__(self, schema, ids, groups)
Definition: multiMatch.py:164
bool all(CoordinateExpr< N > const &expr) noexcept
Return true if all elements are true.
table::Key< int > type
Definition: Detector.cc:164
def finish(self, removeAmbiguous=True)
Definition: multiMatch.py:114
def format(config, name=None, writeSourceLine=True, prefix="", verbose=False)
Definition: history.py:129
def aggregate(self, function, field=None, dtype=float)
Run an aggregate function on each group, returning an array with one element for each group...
Definition: multiMatch.py:202
std::vector< SchemaItem< Flag > > * items
def add(self, catalog, dataId)
Definition: multiMatch.py:67
daf::base::PropertyList * list
Definition: fits.cc:833