LSST Applications  21.0.0-147-g0e635eb1+1acddb5be5,22.0.0+052faf71bd,22.0.0+1ea9a8b2b2,22.0.0+6312710a6c,22.0.0+729191ecac,22.0.0+7589c3a021,22.0.0+9f079a9461,22.0.1-1-g7d6de66+b8044ec9de,22.0.1-1-g87000a6+536b1ee016,22.0.1-1-g8e32f31+6312710a6c,22.0.1-10-gd060f87+016f7cdc03,22.0.1-12-g9c3108e+df145f6f68,22.0.1-16-g314fa6d+c825727ab8,22.0.1-19-g93a5c75+d23f2fb6d8,22.0.1-19-gb93eaa13+aab3ef7709,22.0.1-2-g8ef0a89+b8044ec9de,22.0.1-2-g92698f7+9f079a9461,22.0.1-2-ga9b0f51+052faf71bd,22.0.1-2-gac51dbf+052faf71bd,22.0.1-2-gb66926d+6312710a6c,22.0.1-2-gcb770ba+09e3807989,22.0.1-20-g32debb5+b8044ec9de,22.0.1-23-gc2439a9a+fb0756638e,22.0.1-3-g496fd5d+09117f784f,22.0.1-3-g59f966b+1e6ba2c031,22.0.1-3-g849a1b8+f8b568069f,22.0.1-3-gaaec9c0+c5c846a8b1,22.0.1-32-g5ddfab5d3+60ce4897b0,22.0.1-4-g037fbe1+64e601228d,22.0.1-4-g8623105+b8044ec9de,22.0.1-5-g096abc9+d18c45d440,22.0.1-5-g15c806e+57f5c03693,22.0.1-7-gba73697+57f5c03693,master-g6e05de7fdc+c1283a92b8,master-g72cdda8301+729191ecac,w.2021.39
LSST Data Management Base Package
simpleAssociation.py
Go to the documentation of this file.
1 # This file is part of pipe_tasks.
2 
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 #
22 
23 """Simple association algorithm for DRP.
24 Adapted from http://github.com/LSSTDESC/dia_pipe
25 """
26 
27 import numpy as np
28 import pandas as pd
29 
30 import lsst.afw.table as afwTable
31 import lsst.geom as geom
32 import lsst.pex.config as pexConfig
33 import lsst.pipe.base as pipeBase
34 from lsst.obs.base import ExposureIdInfo
35 
36 from .associationUtils import query_disc, eq2xyz, toIndex
37 
38 
39 class SimpleAssociationConfig(pexConfig.Config):
40  """Configuration parameters for the SimpleAssociationTask
41  """
42  tolerance = pexConfig.Field(
43  dtype=float,
44  doc='maximum distance to match sources together in arcsec',
45  default=0.5
46  )
47  nside = pexConfig.Field(
48  dtype=int,
49  doc='Healpix nside value used for indexing',
50  default=2**18,
51  )
52 
53 
54 class SimpleAssociationTask(pipeBase.Task):
55  """Construct DiaObjects from a DataFrame of DIASources by spatially
56  associating the sources.
57 
58  Represents a simple, brute force algorithm, 2-way matching of DiaSources
59  into. DiaObjects. Algorithm picks the nearest, first match within the
60  matching radius of a DiaObject to associate a source to for simplicity.
61  """
62  ConfigClass = SimpleAssociationConfig
63  _DefaultName = "simpleAssociation"
64 
65  def run(self, diaSources, tractPatchId, skymapBits):
66  """Associate DiaSources into a collection of DiaObjects using a
67  brute force matching algorithm.
68 
69  Reproducible is for the same input data is assured by ordering the
70  DiaSource data by ccdVisit ordering.
71 
72  Parameters
73  ----------
74  diaSources : `pandas.DataFrame`
75  DiaSources grouped by CcdVisitId to spatially associate into
76  DiaObjects.
77  tractPatchId : `int`
78  Unique identifier for the tract patch.
79  skymapBits : `int`
80  Maximum number of bits used the ``tractPatchId`` integer
81  identifier.
82 
83  Returns
84  -------
85  results : `lsst.pipe.base.Struct`
86  Results struct with attributes:
87 
88  ``assocDiaSources``
89  Table of DiaSources with updated values for the DiaObjects
90  they are spatially associated to (`pandas.DataFrame`).
91  ``diaObjects``
92  Table of DiaObjects from matching DiaSources
93  (`pandas.DataFrame`).
94 
95  """
96  # Sort by ccdVisit and diaSourceId to get a reproducible ordering for
97  # the association.
98  diaSources.reset_index(inplace=True)
99  diaSources.set_index(["ccdVisitId", "diaSourceId"], inplace=True)
100 
101  # Empty lists to store matching and location data.
102  diaObjectCat = []
103  diaObjectCoords = []
104  healPixIndices = []
105 
106  # Create Id factory and catalog for creating DiaObjectIds.
107  exposureIdInfo = ExposureIdInfo(tractPatchId, skymapBits)
108  idFactory = exposureIdInfo.makeSourceIdFactory()
109  idCat = afwTable.SourceCatalog(
110  afwTable.SourceTable.make(afwTable.SourceTable.makeMinimalSchema(),
111  idFactory))
112 
113  for ccdVisit in diaSources.index.levels[0]:
114  # For the first ccdVisit, just copy the DiaSource info into the
115  # diaObject data to create the first set of Objects.
116  ccdVisitSources = diaSources.loc[ccdVisit]
117  if len(diaObjectCat) == 0:
118  for diaSourceId, diaSrc in ccdVisitSources.iterrows():
119  self.addNewDiaObjectaddNewDiaObject(diaSrc,
120  diaSources,
121  ccdVisit,
122  diaSourceId,
123  diaObjectCat,
124  idCat,
125  diaObjectCoords,
126  healPixIndices)
127  continue
128  # Temp list to store DiaObjects already used for this ccdVisit.
129  usedMatchIndicies = []
130  # Run over subsequent data.
131  for diaSourceId, diaSrc in ccdVisitSources.iterrows():
132  # Find matches.
133  matchResult = self.findMatchesfindMatches(diaSrc["ra"],
134  diaSrc["decl"],
135  2*self.config.tolerance,
136  healPixIndices,
137  diaObjectCat)
138  dists = matchResult.dists
139  matches = matchResult.matches
140  # Create a new DiaObject if no match found.
141  if dists is None:
142  self.addNewDiaObjectaddNewDiaObject(diaSrc,
143  diaSources,
144  ccdVisit,
145  diaSourceId,
146  diaObjectCat,
147  idCat,
148  diaObjectCoords,
149  healPixIndices)
150  continue
151  # If matched, update catalogs and arrays.
152  if np.min(dists) < np.deg2rad(self.config.tolerance/3600):
153  matchDistArg = np.argmin(dists)
154  matchIndex = matches[matchDistArg]
155  # Test to see if the DiaObject has been used.
156  if np.isin([matchIndex], usedMatchIndicies).sum() < 1:
157  self.updateCatalogsupdateCatalogs(matchIndex,
158  diaSrc,
159  diaSources,
160  ccdVisit,
161  diaSourceId,
162  diaObjectCat,
163  diaObjectCoords,
164  healPixIndices)
165  usedMatchIndicies.append(matchIndex)
166  # If the matched DiaObject has already been used, create a
167  # new DiaObject for this DiaSource.
168  else:
169  self.addNewDiaObjectaddNewDiaObject(diaSrc,
170  diaSources,
171  ccdVisit,
172  diaSourceId,
173  diaObjectCat,
174  idCat,
175  diaObjectCoords,
176  healPixIndices)
177  # Create new DiaObject if no match found within the matching
178  # tolerance.
179  else:
180  self.addNewDiaObjectaddNewDiaObject(diaSrc,
181  diaSources,
182  ccdVisit,
183  diaSourceId,
184  diaObjectCat,
185  idCat,
186  diaObjectCoords,
187  healPixIndices)
188 
189  # Drop indices before returning associated diaSource catalog.
190  diaSources.reset_index(inplace=True)
191  diaSources.set_index("diaSourceId", inplace=True, verify_integrity=True)
192 
193  diaObjects = pd.DataFrame(data=diaObjectCat)
194  diaSources.reset_index(inplace=True)
195  if "diaObjectId" in diaObjects.columns:
196  diaObjects.set_index("diaObjectId", inplace=True, verify_integrity=True)
197 
198  return pipeBase.Struct(
199  assocDiaSources=diaSources,
200  diaObjects=diaObjects)
201 
202  def addNewDiaObject(self,
203  diaSrc,
204  diaSources,
205  ccdVisit,
206  diaSourceId,
207  diaObjCat,
208  idCat,
209  diaObjCoords,
210  healPixIndices):
211  """Create a new DiaObject and append its data.
212 
213  Parameters
214  ----------
215  diaSrc : `pandas.Series`
216  Full unassociated DiaSource to create a DiaObject from.
217  diaSources : `pandas.DataFrame`
218  DiaSource catalog to update information in. The catalog is
219  modified in place.
220  ccdVisit : `int`
221  Unique identifier of the ccdVisit where ``diaSrc`` was observed.
222  diaSourceId : `int`
223  Unique identifier of the DiaSource.
224  diaObjectCat : `list` of `dict`s
225  Catalog of diaObjects to append the new object o.
226  idCat : `lsst.afw.table.SourceCatalog`
227  Catalog with the IdFactory used to generate unique DiaObject
228  identifiers.
229  diaObjectCoords : `list` of `list`s of `lsst.geom.SpherePoint`s
230  Set of coordinates of DiaSource locations that make up the
231  DiaObject average coordinate.
232  healPixIndices : `list` of `int`s
233  HealPix indices representing the locations of each currently
234  existing DiaObject.
235  """
236  hpIndex = toIndex(self.config.nside,
237  diaSrc["ra"],
238  diaSrc["decl"])
239  healPixIndices.append(hpIndex)
240 
241  sphPoint = geom.SpherePoint(diaSrc["ra"],
242  diaSrc["decl"],
243  geom.degrees)
244  diaObjCoords.append([sphPoint])
245 
246  diaObjId = idCat.addNew().get("id")
247  diaObjCat.append(self.createDiaObjectcreateDiaObject(diaObjId,
248  diaSrc["ra"],
249  diaSrc["decl"]))
250  diaSources.loc[(ccdVisit, diaSourceId), "diaObjectId"] = diaObjId
251 
252  def updateCatalogs(self,
253  matchIndex,
254  diaSrc,
255  diaSources,
256  ccdVisit,
257  diaSourceId,
258  diaObjCat,
259  diaObjCoords,
260  healPixIndices):
261  """Update DiaObject and DiaSource values after an association.
262 
263  Parameters
264  ----------
265  matchIndex : `int`
266  Array index location of the DiaObject that ``diaSrc`` was
267  associated to.
268  diaSrc : `pandas.Series`
269  Full unassociated DiaSource to create a DiaObject from.
270  diaSources : `pandas.DataFrame`
271  DiaSource catalog to update information in. The catalog is
272  modified in place.
273  ccdVisit : `int`
274  Unique identifier of the ccdVisit where ``diaSrc`` was observed.
275  diaSourceId : `int`
276  Unique identifier of the DiaSource.
277  diaObjectCat : `list` of `dict`s
278  Catalog of diaObjects to append the new object o.
279  diaObjectCoords : `list` of `list`s of `lsst.geom.SpherePoint`s
280  Set of coordinates of DiaSource locations that make up the
281  DiaObject average coordinate.
282  healPixIndices : `list` of `int`s
283  HealPix indices representing the locations of each currently
284  existing DiaObject.
285  """
286  # Update location and healPix index.
287  sphPoint = geom.SpherePoint(diaSrc["ra"],
288  diaSrc["decl"],
289  geom.degrees)
290  diaObjCoords[matchIndex].append(sphPoint)
291  aveCoord = geom.averageSpherePoint(diaObjCoords[matchIndex])
292  diaObjCat[matchIndex]["ra"] = aveCoord.getRa().asDegrees()
293  diaObjCat[matchIndex]["decl"] = aveCoord.getDec().asDegrees()
294  nSources = diaObjCat[matchIndex]["nDiaSources"]
295  diaObjCat[matchIndex]["nDiaSources"] = nSources + 1
296  healPixIndices[matchIndex] = toIndex(self.config.nside,
297  diaObjCat[matchIndex]["ra"],
298  diaObjCat[matchIndex]["decl"])
299  # Update DiaObject Id that this source is now associated to.
300  diaSources.loc[(ccdVisit, diaSourceId), "diaObjectId"] = \
301  diaObjCat[matchIndex]["diaObjectId"]
302 
303  def findMatches(self, src_ra, src_dec, tol, hpIndices, diaObjs):
304  """Search healPixels around DiaSource locations for DiaObjects.
305 
306  Parameters
307  ----------
308  src_ra : `float`
309  DiaSource RA location.
310  src_dec : `float`
311  DiaSource Dec location.
312  tol : `float`
313  Size of annulus to convert to covering healPixels and search for
314  DiaObjects.
315  hpIndices : `list` of `int`s
316  List of heal pix indices containing the DiaObjects in ``diaObjs``.
317  diaObjs : `list` of `dict`s
318  Catalog diaObjects to with full location information for comparing
319  to DiaSources.
320 
321  Returns
322  -------
323  results : `lsst.pipe.base.Struct`
324  Results struct containing
325 
326  ``dists``
327  Array of distances between the current DiaSource diaObjects.
328  (`numpy.ndarray` or `None`)
329  ``matches``
330  Array of array indices of diaObjects this DiaSource matches to.
331  (`numpy.ndarray` or `None`)
332  """
333  match_indices = query_disc(self.config.nside,
334  src_ra,
335  src_dec,
336  np.deg2rad(tol/3600.))
337  matchIndices = np.argwhere(np.isin(hpIndices, match_indices)).flatten()
338 
339  if len(matchIndices) < 1:
340  return pipeBase.Struct(dists=None, matches=None)
341 
342  dists = np.array(
343  [np.sqrt(np.sum((eq2xyz(src_ra, src_dec)
344  - eq2xyz(diaObjs[match]["ra"],
345  diaObjs[match]["decl"]))**2))
346  for match in matchIndices])
347  return pipeBase.Struct(
348  dists=dists,
349  matches=matchIndices)
350 
351  def createDiaObject(self, objId, ra, decl):
352  """Create a simple empty DiaObject with location and id information.
353 
354  Parameters
355  ----------
356  objId : `int`
357  Unique ID for this new DiaObject.
358  ra : `float`
359  RA location of this DiaObject.
360  decl : `float`
361  Dec location of this DiaObject
362 
363  Returns
364  -------
365  DiaObject : `dict`
366  Dictionary of values representing a DiaObject.
367  """
368  new_dia_object = {"diaObjectId": objId,
369  "ra": ra,
370  "decl": decl,
371  "nDiaSources": 1}
372  return new_dia_object
Point in an unspecified spherical coordinate system.
Definition: SpherePoint.h:57
def updateCatalogs(self, matchIndex, diaSrc, diaSources, ccdVisit, diaSourceId, diaObjCat, diaObjCoords, healPixIndices)
def addNewDiaObject(self, diaSrc, diaSources, ccdVisit, diaSourceId, diaObjCat, idCat, diaObjCoords, healPixIndices)
def findMatches(self, src_ra, src_dec, tol, hpIndices, diaObjs)
def run(self, diaSources, tractPatchId, skymapBits)
std::shared_ptr< FrameSet > append(FrameSet const &first, FrameSet const &second)
Construct a FrameSet that performs two transformations in series.
Definition: functional.cc:33
SpherePoint averageSpherePoint(std::vector< SpherePoint > const &coords)
Return the average of a list of coordinates.
Definition: SpherePoint.cc:235
def query_disc(nside, ra, dec, max_rad, min_rad=0)