35 from .sourceAssocArgumentParser
import SourceAssocArgumentParser
37 __all__ = [
"SourceAssocConfig",
"SourceAssocTask"]
41 """Configuration parameters for SourceAssocTask.
43 inputLevel = pexConfig.Field(
44 dtype=str, default=
"sensor",
46 Level of input datasets identified by the
47 inputSourceDataset and inputCalexpMetadataDataset
48 configuration parameters.
50 inputSourceDataset = pexConfig.Field(
51 dtype=str, default=
"src",
52 doc=
"Name of the butler dataset for input sources.")
53 inputCalexpMetadataDataset = pexConfig.Field(
54 dtype=str, default=
"calexp_md",
56 Name of the butler dataset for calibrated exposure metadata.
58 Note that this dataset must yield metadata for the calibrated
59 exposures that the sources from inputSourceDataset were detected
60 and measured on; otherwise, the SourceAssoc pipeline behavior
64 sourceProcessing = pexConfig.ConfigField(
65 dtype=apCluster.SourceProcessingConfig,
67 Source processing parameters.
69 To see their descriptions:
71 >>> from lsst.ap.cluster import SourceProcessingConfig
72 >>> help(SourceProcessingConfig)
75 clustering = pexConfig.ConfigField(
76 dtype=apCluster.ClusteringConfig,
78 Source clustering parameters.
80 To see their descriptions:
82 >>> from lsst.ap.cluster import ClusteringConfig
83 >>> help(ClusteringConfig)
85 doCluster = pexConfig.Field(
86 dtype=bool, default=
True,
88 If set to True, then "good" sources are clustered with the
89 OPTICS algorithm - this is an attempt to group sources from
90 individual exposures which correspond to measurements of the
91 same astronomical object.
93 If set to False, then running SourceAssocTask reduces to simply
94 processing sources - this involves adding exposure ID, filter,
95 and middle-of-exposure time fields to each source, as well as
96 computation of sky-coordinate errors from centroid errors. In
97 other words, sources are prepared for database ingest into the
98 LSST Source database table, but are not associated with one
101 Note that a "good" source is one with valid sky-coordinates and
102 which has not been identified as "bad" by one of the flag fields
103 listed in the sourceProcessing.badFlagFields configuration
106 doDiscardNoiseClusters = pexConfig.Field(
107 dtype=bool, default=
True,
108 doc=
"Discard single source clusters?")
109 doWriteClusters = pexConfig.Field(
110 dtype=bool, default=
True,
112 Write source clusters to persistent storage via the butler?
114 Source clusters are stored as lsst.ap.cluster.SourceClusterCatalog
115 instances; one such catalog is stored per sky-tile. The
116 corresponding butler dataset name is "object", so they can
117 be retrieved using e.g.:
119 >>> clusters = butler.get("object", skyTile=12345)
121 Note that if no clusters were generated for a sky-tile, say
122 because the doCluster configuration parameter was set to False,
123 then nothing (not even an empty catalog!) is written to persistent
124 storage. In other words:
126 >>> butler.datasetExists("object", skyTile=12345)
128 can return False, even after SourceAssocTask has been run on
131 algorithmFlags = pexConfig.DictField(
132 keytype=str, itemtype=str,
134 A dictionary mapping from algorithm names to strings containing
135 comma separated lists of flag field names. If any flag is set for
136 a source, then that source is ignored when computing the
137 measurement mean of the corresponding algorithm.
140 doWriteSources = pexConfig.Field(
141 dtype=bool, default=
True,
143 Write processed "good" sources to persistent storage via the butler?
145 A "good" source is one with valid coordinates and which has not
146 been identified as "bad" by one of the flag fields listed in the
147 sourceProcessing.badFlagFields configuration parameter. Sources are
148 stored as lsst.afw.table.SourceCatalog instances; one such catalog
149 is stored per sky-tile. The corresponding butler dataset name is
150 "source", so they can be retrieved using e.g.:
152 >>> sources = butler.get("source", skyTile=12345)
154 Note that if no "good" sources were identified for a sky-tile, then
155 nothing (not even an empty catalog!) is written to persistent
156 storage. In other words:
158 >>> butler.datasetExists("source", skyTile=12345)
160 can return False. In this case, the clustering algorithm had no
161 input, so no "object" dataset will have been written for the
164 doWriteBadSources = pexConfig.Field(
165 dtype=bool, default=
True,
167 Write processed "bad" sources to persistent storage via the butler?
169 A "bad" source is one with valid coordinates and for which at least
170 one of the flag fields listed in the sourceProcessing.badFlagFields
171 configuration parameter has been set. Bad sources are stored as
172 lsst.afw.table.SourceCatalog instances; one such catalog is
173 stored per sky-tile. The corresponding butler dataset name is
174 "badSource", so they can be retrieved using e.g.:
176 >>> badSources = butler.get("badSource", skyTile=12345)
178 If no "bad" sources were identified in the sky-tile, no
179 dataset is written (not even an empty catalog).
181 doWriteInvalidSources = pexConfig.Field(
182 dtype=bool, default=
True,
184 Write "invalid" sources to persistent storage via the butler?
186 An "invalid" source is one with invalid coordinates/centroid,
187 e.g. because the centroid algorithm failed. Invalid sources are
188 stored as lsst.afw.table.SourceCatalog instances; one such catalog
189 is stored per sky-tile. The corresponding butler dataset name is
190 "invalidSource", so they can be retrieved using e,g,:
192 >>> invalidSources = butler.get("invalidSource", skyTile=12345)
194 As for the other datasets, if no "invalid" sources were
195 identified for the sky-tile, no dataset is written.
198 sourceHistogramResolution = pexConfig.RangeField(
199 dtype=int, default=2000, min=1,
200 doc=
"X and Y resolution of 2D source position histograms.")
201 doMakeSourceHistogram = pexConfig.Field(
202 dtype=bool, default=
True,
204 Make 2D histogram of "good" source positions? If set, a square
205 image covering the sky-tile being processed and with resolution
206 equal to sourceHistogramResolution will be created. The value of
207 each pixel in this image will be the number of "good" sources
210 doMakeBadSourceHistogram = pexConfig.Field(
211 dtype=bool, default=
True,
213 Make 2D histogram of "bad" source positions? If set, a square
214 image covering the sky-tile being processed and with resolution
215 equal to sourceHistogramResolution will be created. The value of
216 each pixel in this image will be the number of "bad" sources
220 doWriteSourceHistogram = pexConfig.Field(
221 dtype=bool, default=
True,
223 Write "good" source histogram to persistent storage via the butler?
225 If True, one histogram image is written per sky-tile containing
226 at least one "good" source, via the butler. The corresponding
227 dataset name is "sourceHist", and the type of these histograms is
228 lsst.afw.image.DecoratedImageU. They can be retrieved using e.g.:
230 >>> img = butler.get("sourceHist", skyTile=12345)
232 doWriteBadSourceHistogram = pexConfig.Field(
233 dtype=bool, default=
True,
235 Write "bad" source histogram to persistent storage via the butler?
236 If True, one histogram image is written per sky-tile containing
237 at least one "bad" source, via the butler. The corresponding
238 dataset name is "badSourceHist", and the type of these histograms
239 is lsst.afw.image.DecoratedImageU. They can be retrieved using e.g.:
241 >>> img = butler.get("badSourceHist", skyTile=12345)
244 measPrefix = pexConfig.Field(
245 dtype=str, optional=
True, default=
None,
247 Prefix for all source measurement fields. Must match the value
248 of the lsst.meas.algorithms.SourceMeasurementConfig prefix
249 configuration parameter, which is typically available as
250 measurement.prefix in the CCD processing task configuration.
252 measSlots = pexConfig.ConfigField(
253 dtype=measAlgorithms.SourceSlotConfig,
255 Mapping from algorithms to special aliases in
256 lsst.afw.table.SourceTable. Must match the value of the
257 lsst.meas.algorithms.SourceMeasurementConfig slots
258 configuration parameter, which is typically available as
259 measurement.slots in the CCD processing task configuration.
262 >>> from lsst.meas.algorithms import SourceSlotConfig
263 >>> help(SourceSlotConfig)
267 self.sourceProcessing.badFlagFields = [
"flags.negative",
269 "shape.sdss.flags.unweightedbad",
271 flags =
",".join([
"flags.negative",
274 "flags.pixel.interpolated.center",
275 "flags.pixel.saturated.center",
278 "flux.gaussian":
"flux.gaussian.flags," + flags,
279 "flux.naive":
"flux.naive.flags," + flags,
280 "flux.psf":
"flux.psf.flags," + flags,
281 "flux.sinc":
"flux.sinc.flags," + flags,
282 "flux.kron":
"flux.kron.flags,flux.kron.flags.aperture," + flags,
283 "multishapelet.exp.flux":
"multishapelet.exp.flux.flags," + flags,
284 "multishapelet.dev.flux":
"multishapelet.dev.flux.flags," + flags,
285 "multishapelet.combo.flux":
"multishapelet.combo.flux.flags," + flags,
286 "shape.sdss":
"shape.sdss.flags.unweightedbad," + flags,
291 """Create an lsst.afw.table.FlagKeyVector identifying sources to
292 ignore when computing measurement means for the given algorithm.
294 vec = afwTable.FlagKeyVector()
295 if alg
in config.algorithmFlags:
296 flags = config.algorithmFlags[alg]
297 for f
in flags.split(
","):
302 if si.field.getTypeString() !=
"Flag":
303 raise TypeError(f +
" field is not a Flag field")
309 """Cluster the sources inside a sky-tile using the OPTICS algorithm -
310 this is an attempt to group sources from individual exposures that
311 correspond to measurements of the same astronomical object. For each
312 cluster, means of individual source measurements (currently including
313 fluxes, shapes, and sky-coordinates) are computed; these cluster
314 attributes are suitable for ingest into the LSST Object database table.
316 For details on the clustering algorithm used, see:
318 "OPTICS: Ordering Points To Identify the Clustering Structure".
319 Mihael Ankerst, Markus M. Breunig, Hans-Peter Kriegel, Jorg Sander (1999).
320 ACM SIGMOD international conference on Management of data.
321 ACM Press. pp. 49-60.
323 http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.129.6542
324 http://en.wikipedia.org/wiki/OPTICS_algorithm
326 The parameters of this algorithm can be adjusted via the 'clustering'
327 attribute of the lsst.ap.tasks.SourceAssocConfig instance passed to
330 >>> from lsst.ap.tasks import *
331 >>> from lsst.ap.cluster import ClusteringConfig
332 >>> # display documentation for top-level task configuration parameters
333 ... help(SourceAssocConfig)
334 >>> # display documentation for clustering parameters
335 ... help(ClusteringConfig)
336 >>> # create a task configuration and fiddle with a value
337 ... config = SourceAssocConfig()
338 >>> config.clustering.minNeighbors = 2
340 Some control over which sources participate in spatial clustering is
341 provided via the 'sourceProcessing.badFlagFields' configuration
342 parameter - this is a list of flag field names. If at least one of
343 these flags is set for an input source, then that source is deemed "bad"
344 and does not participate in spatial clustering. Here's how one would
345 set it (and discover other source processing parameters):
347 >>> from lsst.ap.tasks import *
348 >>> from lsst.ap.cluster import SourceProcessingConfig
349 >>> # display documentation for source processing
350 ... # configuration parameters
351 ... help(SourceProcessingConfig)
352 >>> config = SourceAssocConfig()
353 >>> config.processing.badFlagFields = ["flags.evil", "flags.devilish"]
355 Inputs and Assumptions:
356 -----------------------
358 This task retrieves "src", "calexp_md", and "processCcd_config" datasets
359 from the butler (but note that the names of the input source and
360 calibrated exposure metadata datasets can be changed via configuration
361 parameters). It assumes that single frame measurement has been run
362 (e.g. via lsst.pipe.tasks.processCcd.ProcessCcdTask, or some camera
363 specific variant thereof). Note also that care is required when
364 interleaving CCD processing and source association - this task does not
365 support incremental updates to source clusters. In other words, one should
366 ensure that all CCDs overlapping a sky-tile T have been processed before
367 running this task on T. Otherwise, multiple runs on T will be required,
368 where each run processes all the data for the sky-tile from scratch.
373 This task writes "object", "source", "badSource", "invalidSource",
374 "sourceHist", and "badSourceHist" datasets via the butler. Whether any
375 of these is actually written for a sky-tile depends on configuration
376 parameters and the input data.
378 ConfigClass = SourceAssocConfig
379 _DefaultName =
"sourceAssoc"
382 """Pass all parameters through to the the base class constructor."""
383 pipeBase.Task.__init__(self, *args, **kwds)
387 """Cluster sources falling inside the given sky-tile with the OPTICS
390 @param skyTileId: Integer sky-tile ID
391 @param butler: Butler responsible for retrieving calibrated
392 exposure metadata and associated sources
394 @return A lsst.pipe.base.Struct with the following fields:
396 - sources: Sources inside the sky-tile with valid
397 positions and no "bad" flags set.
398 - badSources: Sources inside the sky-tile with valid
399 positions and at least one "bad" flag set.
400 - invalidSources: Sources with invalid positions/centroids.
401 - clusters: A list of lsst.afw.table.SourceCatalog objects,
402 one per cluster generated.
403 - exposures: An lsst.ap.match.ExposureInfoMap,
404 mapping calibrated exposure IDs to
405 lsst.ap.match.ExposureInfo objects.
406 - sourceHistogram: A 2D histogram of source positions.
407 - badSourceHistogram: A 2D histogram of bad source positions.
409 Note that any of these return values can legitimately be None
410 due to lack of inputs, problems reading them in, or the task
414 qsp = skypix.createQuadSpherePixelization(butler.mapper.skypolicy)
415 root, x, y = qsp.coords(skyTileId);
416 skyTile = apUtils.PT1SkyTile(qsp.resolution, root, x, y, skyTileId)
418 spControl = self.config.sourceProcessing.makeControl()
420 results = pipeBase.Struct(
423 invalidSources =
None,
425 exposures = apMatch.ExposureInfoMap(),
426 sourceHistogram =
None,
427 badSourceHistogram =
None
430 for dataRef
in butler.subset(
431 self.config.inputSourceDataset, self.config.inputLevel, skyTile=skyTileId):
433 if not dataRef.datasetExists(self.config.inputSourceDataset):
436 expMd = dataRef.get(self.config.inputCalexpMetadataDataset, immediate=
True)
437 expSources = dataRef.get(self.config.inputSourceDataset, immediate=
True)
438 expConfig = dataRef.get(
"processCcd_config", immediate=
True)
439 if (self.config.measPrefix != expConfig.measurement.prefix
or
440 self.config.measSlots != expConfig.measurement.slots):
441 self.log.warn(str.format(
442 "skipping {} : processCcd measurement prefix and slot configuration "
443 "do not match sourceAssoc configuration", str(dataRef.dataId)))
446 self.log.warn(str.format(
447 "skipping {} : failed to unpersist {}, {}, or processCcd_config dataset: {}",
448 str(dataRef.dataId), self.config.inputCalexpMetadataDataset,
449 self.config.inputSourceDataset, traceback.format_exc()))
451 if sourceTable ==
None:
453 sourceTable, schemaMapper = apCluster.makeOutputSourceTable(
454 expSources.getTable(), spControl)
463 expInfo = apMatch.ExposureInfo(expMd)
465 self.log.warn(str.format(
466 "skipping {} : failed to convert {} dataset to ExposureInfo",
467 str(dataRef.dataId), self.config.inputCalexpMetadataDataset))
469 results.exposures.insert(expInfo)
470 apCluster.processSources(
478 results.invalidSources)
480 del expSources, expMd
482 if (sourceTable ==
None):
485 if self.config.doCluster
and len(results.sources) > 0:
486 results.clusters = apCluster.cluster(
487 results.sources, self.config.clustering.makeControl())
489 if self.config.doMakeSourceHistogram
and len(results.sources) > 0:
490 results.sourceHistogram, wcs = apUtils.createImageCoveringSkyTile(
491 qsp, skyTileId, self.config.sourceHistogramResolution)
492 apUtils.makeSourceHistogram(
493 results.sourceHistogram.getImage(), results.sources, wcs,
False)
494 if self.config.doMakeBadSourceHistogram
and len(results.badSources) > 0:
495 results.badSourceHistogram, wcs = apUtils.createImageCoveringSkyTile(
496 qsp, skyTileId, self.config.sourceHistogramResolution)
497 apUtils.makeSourceHistogram(
498 results.badSourceHistogram.getImage(), results.badSources, wcs,
False)
503 """Compute source cluster attributes for a sky-tile.
505 @param skyTileId: Integer sky-tile ID
506 @param clusters: List of lsst.afw.table.SourceCatalog objects,
507 each containing the sources for one cluster
508 @param exposures: A lsst.ap.match.ExposureInfoMap object, mapping
509 calibrated exposure IDs to lsst.ap.match.ExposureInfo
512 @return An lsst.ap.cluster.SourceClusterCatalog containing measurement
513 means for each cluster.
515 if len(clusters) == 0:
517 self.log.info(str.format(
"Computing attributes for {} clusters", len(clusters)))
518 spControl = self.config.sourceProcessing.makeControl()
519 minNeighbors = self.config.clustering.makeControl().minNeighbors
520 scTable = apCluster.makeSourceClusterTable(
521 clusters[0].getTable(),
522 apCluster.SourceClusterIdFactory(skyTileId),
524 flagNoiseKey = scTable.getSchema().find(
"flag.noise").key
525 scCat = apCluster.SourceClusterCatalog(scTable)
526 algorithmFlags = dict()
527 for alg
in spControl.fluxFields:
528 if alg
in clusters[0].getSchema():
529 algorithmFlags[alg] =
_flagKeys(clusters[0].getSchema(), self.config, alg)
530 for alg
in spControl.shapeFields:
531 if alg
in clusters[0].getSchema():
532 algorithmFlags[alg] =
_flagKeys(clusters[0].getSchema(), self.config, alg)
534 for sources
in clusters:
535 if len(sources) == 1
and minNeighbors > 0:
537 if self.config.doDiscardNoiseClusters:
541 sc.set(flagNoiseKey,
True)
544 sev = apCluster.computeBasicAttributes(
545 sc, sources, exposures, spControl.exposurePrefix)
546 for alg
in spControl.fluxFields:
547 if alg
in algorithmFlags:
548 apCluster.computeFluxMean(sc, sev, alg, algorithmFlags[alg],
550 for alg
in spControl.shapeFields:
551 if alg
in algorithmFlags:
552 apCluster.computeShapeMean(sc, sev, alg, algorithmFlags[alg])
553 apCluster.setClusterFields(sources, sc, spControl)
554 msg =
"Computed attributes for {} clusters"
555 if self.config.doDiscardNoiseClusters:
556 msg +=
", discarded {} noise clusters"
558 msg +=
", including {} noise clusters"
559 self.log.info(str.format(msg, len(scCat), numNoise))
563 def run(self, skyTileId, butler):
564 """Run source association on a single sky-tile; return None.
566 self.log.info(str.format(
"Processing sky-tile {}", skyTileId))
567 res = self.
cluster(skyTileId, butler)
568 if (self.config.doCluster
and res.clusters !=
None and
569 len(res.clusters) > 0):
570 clusters = self.
attributes(skyTileId, res.clusters, res.exposures)
572 if self.config.doWriteClusters
and len(clusters) > 0:
573 butler.put(clusters,
"object", skyTile=skyTileId)
575 if (self.config.doWriteSources
and res.sources !=
None and
576 len(res.sources) > 0):
577 butler.put(res.sources,
"source", skyTile=skyTileId)
578 if (self.config.doWriteBadSources
and res.badSources !=
None and
579 len(res.badSources) > 0):
580 butler.put(res.badSources,
"badSource", skyTile=skyTileId)
581 if (self.config.doWriteInvalidSources
and res.invalidSources !=
None and
582 len(res.invalidSources) > 0):
583 butler.put(res.invalidSources,
"invalidSource", skyTile=skyTileId)
585 if self.config.doWriteSourceHistogram
and res.sourceHistogram !=
None:
586 butler.put(res.sourceHistogram,
"sourceHist", skyTile=skyTileId)
587 if (self.config.doWriteBadSourceHistogram
and
588 res.badSourceHistogram !=
None):
589 butler.put(res.badSourceHistogram,
"badSourceHist", skyTile=skyTileId)
593 """Create an argument parser
595 return SourceAssocArgumentParser(name=cls._DefaultName)
599 """Parse argument list and run command.
601 @param args: list of command-line arguments;
603 @param config: config for task (instance of lsst.pex.config.Config);
604 if None use cls.ConfigClass()
605 @param log: log (instance of lsst.pex.logging.Log);
606 if None use the default log
608 argumentParser = cls._makeArgumentParser()
610 config = cls.ConfigClass()
611 parsedCmd = argumentParser.parse_args(config=config, args=args, log=log)
612 name = cls._DefaultName
613 task = cls(name=name, config=parsedCmd.config, log=parsedCmd.log)
614 if not hasattr(parsedCmd,
"skyTileIds")
or len(parsedCmd.skyTileIds) == 0:
615 print >>sys.stderr,
"Running on all sky-tiles"
616 parsedCmd.skyTileIds = parsedCmd.butler.queryMetadata(
618 for skyTileId
in parsedCmd.skyTileIds:
619 parsedCmd.butler.put(
620 parsedCmd.config, name +
"_config", skyTile=skyTileId)
622 task.run(skyTileId, parsedCmd.butler)
624 if parsedCmd.doraise:
626 task.log.fatal(str.format(
627 "Failed on skyTile {}: {}", skyTileId, e))
628 traceback.print_exc(file=sys.stderr)
629 parsedCmd.butler.put(
630 task.getFullMetadata(), name +
"_metadata", skyTile=skyTileId)
Custom catalog class for record/table subclasses that are guaranteed to have an ID, and should generally be sorted by that ID.