LSST Applications g180d380827+0f66a164bb,g2079a07aa2+86d27d4dc4,g2305ad1205+7d304bc7a0,g29320951ab+500695df56,g2bbee38e9b+0e5473021a,g337abbeb29+0e5473021a,g33d1c0ed96+0e5473021a,g3a166c0a6a+0e5473021a,g3ddfee87b4+e42ea45bea,g48712c4677+36a86eeaa5,g487adcacf7+2dd8f347ac,g50ff169b8f+96c6868917,g52b1c1532d+585e252eca,g591dd9f2cf+c70619cc9d,g5a732f18d5+53520f316c,g5ea96fc03c+341ea1ce94,g64a986408d+f7cd9c7162,g858d7b2824+f7cd9c7162,g8a8a8dda67+585e252eca,g99cad8db69+469ab8c039,g9ddcbc5298+9a081db1e4,ga1e77700b3+15fc3df1f7,gb0e22166c9+60f28cb32d,gba4ed39666+c2a2e4ac27,gbb8dafda3b+c92fc63c7e,gbd866b1f37+f7cd9c7162,gc120e1dc64+02c66aa596,gc28159a63d+0e5473021a,gc3e9b769f7+b0068a2d9f,gcf0d15dbbd+e42ea45bea,gdaeeff99f8+f9a426f77a,ge6526c86ff+84383d05b3,ge79ae78c31+0e5473021a,gee10cc3b42+585e252eca,gff1a9f87cc+f7cd9c7162,w.2024.17
LSST Data Management Base Package
Loading...
Searching...
No Matches
diaCalculation.py
Go to the documentation of this file.
1# This file is part of ap_association.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22from collections import namedtuple
23import numpy as np
24import pandas as pd
25
26from .catalogCalculation import (CatalogCalculationPluginConfig,
27 CatalogCalculationPlugin,
28 CatalogCalculationConfig,
29 CatalogCalculationTask,
30 CCContext)
31from .pluginsBase import BasePlugin
32from .pluginRegistry import (PluginRegistry, PluginMap)
33import lsst.pipe.base
34from lsst.utils.timer import timeMethod
35
36# Enforce an error for unsafe column/array value setting in pandas.
37pd.options.mode.chained_assignment = 'raise'
38
39__all__ = ("DiaObjectCalculationPlugin", "DiaObjectCalculationPluginConfig",
40 "DiaObjectCalculationTask", "DiaObjectCalculationConfig")
41
42
44 """Default configuration class for DIA catalog calculation plugins.
45 """
46 pass
47
48
50 """Base class for DIA catalog calculation plugins.
51
52 Task follows CatalogCalculationPlugin with modifications for use in AP.
53
54 Parameters
55 ----------
56 config : `DiaObjectCalculationPlugin.ConfigClass`
57 Plugin configuration.
58 name : `str`
59 The string the plugin was registered with.
60 metadata : `lsst.daf.base.PropertySet`
61 Plugin metadata that will be attached to the output catalog
62 """
63
64 ConfigClass = DiaObjectCalculationPluginConfig
65
66 registry = PluginRegistry(DiaObjectCalculationPluginConfig)
67 """List of available plugins (`lsst.meas.base.PluginRegistry`).
68 """
69
70 FLUX_MOMENTS_CALCULATED = 5.0
71 """Add order after flux means and stds are calculated.
72 """
73
74 plugType = 'single'
75 """Does the plugin operate on a single source or the whole catalog (`str`)?
76 If the plugin operates on a single source at a time, this should be set to
77 ``"single"``; if it expects the whoe catalog, to ``"multi"``. If the
78 plugin is of type ``"multi"``, the `fail` method must be implemented to
79 accept the whole catalog. If the plugin is of type ``"single"``, `fail`
80 should accept a single source record.
81 """
82
83 inputCols = []
84 """DiaObject column names required by the plugin in order to run and
85 complete its calculation. DiaCalculationTask should raise an error is a
86 plugin is instantiated without the needed column available. Input columns
87 should be defined in the DPDD/cat/Apdb schema. Filter dependent columns
88 should be specified without the filter name perpended to them. eg
89 ``psfFluxMean`` instead of ``u_psfFluxMean``.
90 """
91 outputCols = []
92 """DiaObject column names output by the plugin. DiaCalculationTask should
93 raise an error if another pluging is run output to the same column.
94 Output columns should be defined in the DPDD/cat/Apdb schema. Filter
95 dependent columns should be specified without the filter name perpended to
96 them. eg ``psfFluxMean`` instead of ``u_psfFluxMean``.
97 """
98
99 needsFilter = True
100 """This plugin requires a filter to be specified. Plugin's using filter
101 names usually deal with fluxes and only a sub-set of the DiaSource
102 catalog. Plugins that to not use the filter name usually run over a value
103 common across all observations/detections such as position.
104 """
105
106 def __init__(self, config, name, metadata):
107 BasePlugin.__init__(self, config, name)
108
109 def calculate(self,
110 diaObject,
111 diaSources,
112 filterDiaFluxes=None,
113 band=None,
114 **kwargs):
115 """Perform the calculation specified by this plugin.
116
117 This method can either be used to operate on a single catalog record
118 or a whole catalog, populating it with the output defined by this
119 plugin.
120
121 Note that results may be added to catalog records as new columns, or
122 may result in changes to existing values.
123
124 Parameters
125 ----------
126 diaObject : `dict`
127 Summary object to store values in.
128 diaSources : `pandas.DataFrame`
129 DataFrame representing all diaSources associated with this
130 diaObject.
131 filterDiaFluxes : `pandas.DataFrame`
132 DataFrame representing diaSources associated with this
133 diaObject that are observed in the band pass ``band``.
134 band : `str`
135 Simple name of the filter for the flux being calculated.
136 **kwargs
137 Any additional keyword arguments that may be passed to the plugin.
138 """
139 raise NotImplementedError()
140
141 def fail(self, diaObject, columns, error=None):
142 """Set diaObject position values to nan.
143
144 Parameters
145 ----------
146 diaObject : `dict`
147 Summary object to store values in.
148 columns : `list` of `str`
149 List of string names of columns to write a the failed value.
150 error : `BaseException` or `None`
151 Error to pass. Kept for consistency with CatologCalculationPlugin.
152 Unused.
153 """
154 for colName in columns:
155 diaObject[colName] = np.nan
156
157
159 """Config class for the catalog calculation driver task.
160
161 Specifies which plugins will execute when the `CatalogCalculationTask`
162 associated with this configuration is run.
163 """
164
165 plugins = DiaObjectCalculationPlugin.registry.makeField(
166 multi=True,
167 default=["ap_meanPosition",
168 "ap_meanFlux"],
169 doc="Plugins to be run and their configuration")
170
171
173 """Run plugins which operate on a catalog of DIA sources.
174
175 This task facilitates running plugins which will operate on a source
176 catalog. These plugins may do things such as classifying an object based
177 on source record entries inserted during a measurement task.
178
179 This task differs from CatalogCaculationTask in the following ways:
180
181 -No multi mode is available for plugins. All plugins are assumed to run
182 in single mode.
183
184 -Input and output catalog types are assumed to be `pandas.DataFrames` with
185 columns following those used in the Apdb.
186
187 -No schema argument is passed to the plugins. Each plugin specifies
188 output columns and required inputs.
189
190 Parameters
191 ----------
192 plugMetaData : `lsst.daf.base.PropertyList` or `None`
193 Will be modified in-place to contain metadata about the plugins being
194 run. If `None`, an empty `~lsst.daf.base.PropertyList` will be
195 created.
196 **kwargs
197 Additional arguments passed to the superclass constructor.
198
199 Notes
200 -----
201 Plugins may either take an entire catalog to work on at a time, or work on
202 individual records.
203 """
204 ConfigClass = DiaObjectCalculationConfig
205 _DefaultName = "diaObjectCalculation"
206
207 def __init__(self, plugMetadata=None, **kwargs):
208 lsst.pipe.base.Task.__init__(self, **kwargs)
209 if plugMetadata is None:
210 plugMetadata = lsst.daf.base.PropertyList()
211 self.plugMetadataplugMetadata = plugMetadata
213 self.outputCols = []
214
216
218 """Initialize the plugins according to the configuration.
219 """
220
221 pluginType = namedtuple('pluginType', 'single multi')
223 # Read the properties for each plugin. Allocate a dictionary entry for
224 # each run level. Verify that the plugins are above the minimum run
225 # level for an catalogCalculation plugin. For each run level, the
226 # plugins are sorted into either single record, or multi record groups
227 # to later be run appropriately
228 for executionOrder, name, config, PluginClass in sorted(self.config.plugins.apply()):
229 if executionOrder not in self.executionDictexecutionDict:
230 self.executionDictexecutionDict[executionOrder] = pluginType(single=[], multi=[])
231 if PluginClass.getExecutionOrder() >= BasePlugin.DEFAULT_CATALOGCALCULATION:
232 plug = PluginClass(config, name, metadata=self.plugMetadataplugMetadata)
233
234 self._validatePluginCols(plug)
235
236 self.pluginsplugins[name] = plug
237 if plug.plugType == 'single':
238 self.executionDictexecutionDict[executionOrder].single.append(plug)
239 elif plug.plugType == 'multi':
240 self.executionDictexecutionDict[executionOrder].multi.append(plug)
241 else:
242 errorTuple = (PluginClass, PluginClass.getExecutionOrder(),
243 BasePlugin.DEFAULT_CATALOGCALCULATION)
244 raise ValueError("{} has an execution order less than the minimum for an catalogCalculation "
245 "plugin. Value {} : Minimum {}".format(*errorTuple))
246
247 def _validatePluginCols(self, plug):
248 """Assert that output columns are not duplicated and input columns
249 exist for dependent plugins.
250
251 Parameters
252 ----------
253 plug : `lsst.ap.association.DiaCalculationPlugin`
254 Plugin to test for output collisions and input needs.
255 """
256 for inputName in plug.inputCols:
257 if inputName not in self.outputCols:
258 errorTuple = (plug.name, plug.getExecutionOrder(),
259 inputName)
260 raise ValueError(
261 "Plugin, {} with execution order {} requires DiaObject "
262 "column {} to exist. Check the execution order of the "
263 "plugin and make sure it runs after a plugin creating "
264 "the column is run.".format(*errorTuple))
265 for outputName in plug.outputCols:
266 if outputName in self.outputCols:
267 errorTuple = (plug.name, plug.getExecutionOrder(),
268 outputName)
269 raise ValueError(
270 "Plugin, {} with execution order {} is attempting to "
271 "output a column {}, however the column is already being "
272 "produced by another plugin. Check other plugins for "
273 "collisions with this one.".format(*errorTuple))
274 else:
275 self.outputCols.append(outputName)
276
277 @timeMethod
278 def run(self,
279 diaObjectCat,
280 diaSourceCat,
281 updatedDiaObjectIds,
282 filterNames):
283 """The entry point for the DIA catalog calculation task.
284
285 Run method both updates the values in the diaObjectCat and appends
286 newly created DiaObjects to the catalog. For catalog column names
287 see the lsst.cat schema definitions for the DiaObject and DiaSource
288 tables (http://github.com/lsst/cat).
289
290 Parameters
291 ----------
292 diaObjectCat : `pandas.DataFrame`
293 DiaObjects to update values of and append new objects to. DataFrame
294 should be indexed on "diaObjectId"
295 diaSourceCat : `pandas.DataFrame`
296 DiaSources associated with the DiaObjects in diaObjectCat.
297 DataFrame should be indexed on
298 `["diaObjectId", "band", "diaSourceId"]`
299 updatedDiaObjectIds : `numpy.ndarray`
300 Integer ids of the DiaObjects to update and create.
301 filterNames : `list` of `str`
302 List of string names of filters to be being processed.
303
304 Returns
305 -------
306 returnStruct : `lsst.pipe.base.Struct`
307 Struct containing:
308
309 ``diaObjectCat``
310 Full set of DiaObjects including both un-updated and
311 updated/new DiaObjects (`pandas.DataFrame`).
312 ``updatedDiaObjects``
313 Catalog of DiaObjects that were updated or created by this
314 task (`pandas.DataFrame`).
315 """
316 if diaObjectCat.index.name is None:
317 diaObjectCat.set_index("diaObjectId", inplace=True, drop=False)
318 elif diaObjectCat.index.name != "diaObjectId":
319 self.loglog.warning(
320 "Input diaObjectCat is indexed on column(s) incompatible with "
321 "this task. Should be indexed on 'diaObjectId'. Trying to set "
322 "index regardless")
323 diaObjectCat.set_index("diaObjectId", inplace=True, drop=False)
324
325 # ``names`` by default is FrozenList([None]) hence we access the first
326 # element and test for None.
327 if diaSourceCat.index.names[0] is None:
328 diaSourceCat.set_index(
329 ["diaObjectId", "band", "diaSourceId"],
330 inplace=True,
331 drop=False)
332 elif (diaSourceCat.index.names
333 != ["diaObjectId", "band", "diaSourceId"]):
334 diaSourceCat.reset_index(inplace=True)
335 diaSourceCat.set_index(
336 ["diaObjectId", "band", "diaSourceId"],
337 inplace=True,
338 drop=False)
339
340 return self.callComputecallCompute(diaObjectCat,
341 diaSourceCat,
342 updatedDiaObjectIds,
343 filterNames)
344
345 @timeMethod
346 def callCompute(self,
347 diaObjectCat,
348 diaSourceCat,
349 updatedDiaObjectIds,
350 filterNames):
351 """Run each of the plugins on the catalog.
352
353 For catalog column names see the lsst.cat schema definitions for the
354 DiaObject and DiaSource tables (http://github.com/lsst/cat).
355
356 Parameters
357 ----------
358 diaObjectCat : `pandas.DataFrame`
359 DiaObjects to update values of and append new objects to. DataFrame
360 should be indexed on "diaObjectId"
361 diaSourceCat : `pandas.DataFrame`
362 DiaSources associated with the DiaObjects in diaObjectCat.
363 DataFrame must be indexed on
364 ["diaObjectId", "band", "diaSourceId"]`
365 updatedDiaObjectIds : `numpy.ndarray`
366 Integer ids of the DiaObjects to update and create.
367 filterNames : `list` of `str`
368 List of string names of filters to be being processed.
369
370 Returns
371 -------
372 returnStruct : `lsst.pipe.base.Struct`
373 Struct containing:
374
375 ``diaObjectCat``
376 Full set of DiaObjects including both un-updated and
377 updated/new DiaObjects (`pandas.DataFrame`).
378 ``updatedDiaObjects``
379 Catalog of DiaObjects that were updated or created by this
380 task (`pandas.DataFrame`).
381
382 Raises
383 ------
384 KeyError
385 Raises if `pandas.DataFrame` indexing is not properly set.
386 """
387 # DiaObjects will be updated in place.
388 diaObjectsToUpdate = diaObjectCat.loc[updatedDiaObjectIds, :]
389 self.loglog.info("Calculating summary stats for %i DiaObjects",
390 len(diaObjectsToUpdate))
391
392 updatingDiaSources = diaSourceCat.loc[updatedDiaObjectIds, :]
393 diaSourcesGB = updatingDiaSources.groupby(level=0)
394 for runlevel in sorted(self.executionDictexecutionDict):
395 for plug in self.executionDictexecutionDict[runlevel].single:
396 if plug.needsFilter:
397 continue
398 for updatedDiaObjectId in updatedDiaObjectIds:
399
400 # Sub-select diaSources associated with this diaObject.
401 objDiaSources = updatingDiaSources.loc[updatedDiaObjectId]
402
403 # Sub-select on diaSources observed in the current filter.
404 with CCContext(plug, updatedDiaObjectId, self.loglog):
405 # We feed the catalog we need to update and the id
406 # so as to get a few into the catalog and not a copy.
407 # This updates the values in the catalog.
408 plug.calculate(diaObjects=diaObjectsToUpdate,
409 diaObjectId=updatedDiaObjectId,
410 diaSources=objDiaSources,
411 filterDiaSources=None,
412 band=None)
413 for plug in self.executionDictexecutionDict[runlevel].multi:
414 if plug.needsFilter:
415 continue
416 with CCContext(plug, diaObjectsToUpdate, self.loglog):
417 plug.calculate(diaObjects=diaObjectsToUpdate,
418 diaSources=diaSourcesGB,
419 filterDiaSources=None,
420 band=None)
421
422 for band in filterNames:
423 try:
424 updatingFilterDiaSources = updatingDiaSources.loc[
425 (slice(None), band), :
426 ]
427 except KeyError:
428 self.loglog.warning("No DiaSource data with fitler=%s. "
429 "Continuing...", band)
430 continue
431 # Level=0 here groups by diaObjectId.
432 filterDiaSourcesGB = updatingFilterDiaSources.groupby(level=0)
433
434 for runlevel in sorted(self.executionDictexecutionDict):
435 for plug in self.executionDictexecutionDict[runlevel].single:
436 if not plug.needsFilter:
437 continue
438 for updatedDiaObjectId in updatedDiaObjectIds:
439
440 # Sub-select diaSources associated with this diaObject.
441 objDiaSources = updatingDiaSources.loc[updatedDiaObjectId]
442
443 # Sub-select on diaSources observed in the current filter.
444 try:
445 filterObjDiaSources = objDiaSources.loc[band]
446 except KeyError:
447 self.loglog.warning(
448 "DiaObjectId={updatedDiaObjectId} has no "
449 "DiaSources for filter=%s. "
450 "Continuing...", band)
451 with CCContext(plug, updatedDiaObjectId, self.loglog):
452 # We feed the catalog we need to update and the id
453 # so as to get a few into the catalog and not a copy.
454 # This updates the values in the catalog.
455 plug.calculate(diaObjects=diaObjectsToUpdate,
456 diaObjectId=updatedDiaObjectId,
457 diaSources=objDiaSources,
458 filterDiaSources=filterObjDiaSources,
459 band=band)
460 for plug in self.executionDictexecutionDict[runlevel].multi:
461 if not plug.needsFilter:
462 continue
463 with CCContext(plug, diaObjectsToUpdate, self.loglog):
464 plug.calculate(diaObjects=diaObjectsToUpdate,
465 diaSources=diaSourcesGB,
466 filterDiaSources=filterDiaSourcesGB,
467 band=band)
468 # Need to store the newly updated diaObjects directly as the editing
469 # a view into diaObjectsToUpdate does not update the values of
470 # diaObjectCat.
471 diaObjectCat.loc[updatedDiaObjectIds, :] = diaObjectsToUpdate
472 return lsst.pipe.base.Struct(
473 diaObjectCat=diaObjectCat,
474 updatedDiaObjects=diaObjectsToUpdate)
475
476 def _initialize_dia_object(self, objId):
477 """Create a new DiaObject with values required to be initialized by the
478 Apdb.
479
480 Parameters
481 ----------
482 objid : `int`
483 ``diaObjectId`` value for the of the new DiaObject.
484
485 Returns
486 -------
487 diaObject : `dict`
488 Newly created DiaObject with keys:
489
490 ``diaObjectId``
491 Unique DiaObjectId (`int`).
492 ``pmParallaxNdata``
493 Number of data points used for parallax calculation (`int`).
494 ``nearbyObj1``
495 Id of the a nearbyObject in the Object table (`int`).
496 ``nearbyObj2``
497 Id of the a nearbyObject in the Object table (`int`).
498 ``nearbyObj3``
499 Id of the a nearbyObject in the Object table (`int`).
500 ``?_psfFluxNdata``
501 Number of data points used to calculate point source flux
502 summary statistics in each bandpass (`int`).
503 """
504 new_dia_object = {"diaObjectId": objId,
505 "pmParallaxNdata": 0,
506 "nearbyObj1": 0,
507 "nearbyObj2": 0,
508 "nearbyObj3": 0}
509 for f in ["u", "g", "r", "i", "z", "y"]:
510 new_dia_object["%s_psfFluxNdata" % f] = 0
511 return new_dia_object
Class for storing ordered metadata with comments.
calculate(self, diaObject, diaSources, filterDiaFluxes=None, band=None, **kwargs)
run(self, diaObjectCat, diaSourceCat, updatedDiaObjectIds, filterNames)
callCompute(self, diaObjectCat, diaSourceCat, updatedDiaObjectIds, filterNames)