LSST Applications  21.0.0-147-g0e635eb1+1acddb5be5,22.0.0+052faf71bd,22.0.0+1ea9a8b2b2,22.0.0+6312710a6c,22.0.0+729191ecac,22.0.0+7589c3a021,22.0.0+9f079a9461,22.0.1-1-g7d6de66+b8044ec9de,22.0.1-1-g87000a6+536b1ee016,22.0.1-1-g8e32f31+6312710a6c,22.0.1-10-gd060f87+016f7cdc03,22.0.1-12-g9c3108e+df145f6f68,22.0.1-16-g314fa6d+c825727ab8,22.0.1-19-g93a5c75+d23f2fb6d8,22.0.1-19-gb93eaa13+aab3ef7709,22.0.1-2-g8ef0a89+b8044ec9de,22.0.1-2-g92698f7+9f079a9461,22.0.1-2-ga9b0f51+052faf71bd,22.0.1-2-gac51dbf+052faf71bd,22.0.1-2-gb66926d+6312710a6c,22.0.1-2-gcb770ba+09e3807989,22.0.1-20-g32debb5+b8044ec9de,22.0.1-23-gc2439a9a+fb0756638e,22.0.1-3-g496fd5d+09117f784f,22.0.1-3-g59f966b+1e6ba2c031,22.0.1-3-g849a1b8+f8b568069f,22.0.1-3-gaaec9c0+c5c846a8b1,22.0.1-32-g5ddfab5d3+60ce4897b0,22.0.1-4-g037fbe1+64e601228d,22.0.1-4-g8623105+b8044ec9de,22.0.1-5-g096abc9+d18c45d440,22.0.1-5-g15c806e+57f5c03693,22.0.1-7-gba73697+57f5c03693,master-g6e05de7fdc+c1283a92b8,master-g72cdda8301+729191ecac,w.2021.39
LSST Data Management Base Package
Classes | Functions | Variables
lsst.pipe.tasks.postprocess Namespace Reference

Classes

class  WriteObjectTableConnections
 
class  TransformCatalogBaseConnections
 
class  TransformCatalogBaseConfig
 
class  TransformCatalogBaseTask
 
class  TransformObjectCatalogConnections
 

Functions

def flattenFilters (df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)
 

Variables

 dflist
 
 df
 

Function Documentation

◆ flattenFilters()

def lsst.pipe.tasks.postprocess.flattenFilters (   df,
  noDupCols = ['coord_ra', 'coord_dec'],
  camelCase = False,
  inputBands = None 
)
Flattens a dataframe with multilevel column index

Definition at line 43 of file postprocess.py.

43 def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
44  """Flattens a dataframe with multilevel column index
45  """
46  newDf = pd.DataFrame()
47  # band is the level 0 index
48  dfBands = df.columns.unique(level=0).values
49  for band in dfBands:
50  subdf = df[band]
51  columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
52  newColumns = {c: columnFormat.format(band, c)
53  for c in subdf.columns if c not in noDupCols}
54  cols = list(newColumns.keys())
55  newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
56 
57  # Band must be present in the input and output or else column is all NaN:
58  presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
59  # Get the unexploded columns from any present band's partition
60  noDupDf = df[presentBands[0]][noDupCols]
61  newDf = pd.concat([noDupDf, newDf], axis=1)
62  return newDf
63 
64 
daf::base::PropertyList * list
Definition: fits.cc:913
daf::base::PropertySet * set
Definition: fits.cc:912
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)
Definition: postprocess.py:43

Variable Documentation

◆ df

lsst.pipe.tasks.postprocess.df
_DefaultName = "transformObjectCatalog"
ConfigClass = TransformObjectCatalogConfig

# Used by Gen 2 runDataRef only:
inputDataset = 'deepCoadd_obj'
outputDataset = 'objectTable'

@classmethod
def _makeArgumentParser(cls):
    parser = ArgumentParser(name=cls._DefaultName)
    parser.add_id_argument("--id", cls.inputDataset,
                           ContainerClass=CoaddDataIdContainer,
                           help="data ID, e.g. --id tract=12345 patch=1,2")
    return parser

def run(self, parq, funcs=None, dataId=None, band=None):
    # NOTE: band kwarg is ignored here.
    dfDict = {}
    analysisDict = {}
    templateDf = pd.DataFrame()

    if isinstance(parq, DeferredDatasetHandle):
        columns = parq.get(component='columns')
        inputBands = columns.unique(level=1).values
    else:
        inputBands = parq.columnLevelNames['band']

    outputBands = self.config.outputBands if self.config.outputBands else inputBands

    # Perform transform for data of filters that exist in parq.
    for inputBand in inputBands:
        if inputBand not in outputBands:
            self.log.info("Ignoring %s band data in the input", inputBand)
            continue
        self.log.info("Transforming the catalog of band %s", inputBand)
        result = self.transform(inputBand, parq, funcs, dataId)
        dfDict[inputBand] = result.df
        analysisDict[inputBand] = result.analysis
        if templateDf.empty:
            templateDf = result.df

    # Fill NaNs in columns of other wanted bands
    for filt in outputBands:
        if filt not in dfDict:
            self.log.info("Adding empty columns for band %s", filt)
            dfDict[filt] = pd.DataFrame().reindex_like(templateDf)

    # This makes a multilevel column index, with band as first level
    df = pd.concat(dfDict, axis=1, names=['band', 'column'])

    if not self.config.multilevelOutput:
        noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
        if self.config.primaryKey in noDupCols:
            noDupCols.remove(self.config.primaryKey)
        if dataId is not None:
            noDupCols += list(dataId.keys())
        df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
                            inputBands=inputBands)

    self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))

    return df


class TractObjectDataIdContainer(CoaddDataIdContainer):

def makeDataRefList(self, namespace):
def getPatchRefList(tract):
    return [namespace.butler.dataRef(datasetType=self.datasetType,
                                     tract=tract.getId(),
                                     patch="%d,%d" % patch.getIndex()) for patch in tract]

tractRefs = defaultdict(list)  # Data references for each tract
for dataId in self.idList:
    skymap = self.getSkymap(namespace)

    if "tract" in dataId:
        tractId = dataId["tract"]
        if "patch" in dataId:
            tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
                                                               tract=tractId,
                                                               patch=dataId['patch']))
        else:
            tractRefs[tractId] += getPatchRefList(skymap[tractId])
    else:
        tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
                         for tract in skymap)
outputRefList = []
for tractRefList in tractRefs.values():
    existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
    outputRefList.append(existingRefs)

self.refList = outputRefList


class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
                                dimensions=("tract", "skymap")):
inputCatalogs = connectionTypes.Input(
doc="Per-Patch objectTables conforming to the standard data model.",
name="objectTable",
storageClass="DataFrame",
dimensions=("tract", "patch", "skymap"),
multiple=True,
)
outputCatalog = connectionTypes.Output(
doc="Pre-tract horizontal concatenation of the input objectTables",
name="objectTable_tract",
storageClass="DataFrame",
dimensions=("tract", "skymap"),
)


class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
                           pipelineConnections=ConsolidateObjectTableConnections):
coaddName = pexConfig.Field(
dtype=str,
default="deep",
doc="Name of coadd"
)


class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
_DefaultName = "consolidateObjectTable"
ConfigClass = ConsolidateObjectTableConfig

inputDataset = 'objectTable'
outputDataset = 'objectTable_tract'

def runQuantum(self, butlerQC, inputRefs, outputRefs):
    inputs = butlerQC.get(inputRefs)
    self.log.info("Concatenating %s per-patch Object Tables",
                  len(inputs['inputCatalogs']))
    df = pd.concat(inputs['inputCatalogs'])
    butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)

@classmethod
def _makeArgumentParser(cls):
    parser = ArgumentParser(name=cls._DefaultName)

    parser.add_id_argument("--id", cls.inputDataset,
                           help="data ID, e.g. --id tract=12345",
                           ContainerClass=TractObjectDataIdContainer)
    return parser

def runDataRef(self, patchRefList):
    df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
    patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)

def writeMetadata(self, dataRef):
pass


class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
                              defaultTemplates={"catalogType": ""},
                              dimensions=("instrument", "visit", "detector")):

inputCatalog = connectionTypes.Input(
doc="Wide input catalog of sources produced by WriteSourceTableTask",
name="{catalogType}source",
storageClass="DataFrame",
dimensions=("instrument", "visit", "detector"),
deferLoad=True
)
outputCatalog = connectionTypes.Output(
doc="Narrower, per-detector Source Table transformed and converted per a "
    "specified set of functors",
name="{catalogType}sourceTable",
storageClass="DataFrame",
dimensions=("instrument", "visit", "detector")
)


class TransformSourceTableConfig(TransformCatalogBaseConfig,
                         pipelineConnections=TransformSourceTableConnections):

def setDefaults(self):
super().setDefaults()
self.primaryKey = 'sourceId'


class TransformSourceTableTask(TransformCatalogBaseTask):
_DefaultName = "transformSourceTable"
ConfigClass = TransformSourceTableConfig

inputDataset = 'source'
outputDataset = 'sourceTable'

@classmethod
def _makeArgumentParser(cls):
    parser = ArgumentParser(name=cls._DefaultName)
    parser.add_id_argument("--id", datasetType=cls.inputDataset,
                           level="sensor",
                           help="data ID, e.g. --id visit=12345 ccd=0")
    return parser

def runDataRef(self, dataRef):
    parq = dataRef.get()
    funcs = self.getFunctors()
    band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel
    df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
    self.write(df, dataRef)
    return df


class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
                                     dimensions=("instrument", "visit",),
                                     defaultTemplates={"calexpType": ""}):
calexp = connectionTypes.Input(
    doc="Processed exposures used for metadata",
    name="{calexpType}calexp",
    storageClass="ExposureF",
    dimensions=("instrument", "visit", "detector"),
    deferLoad=True,
    multiple=True,
)
visitSummary = connectionTypes.Output(
    doc=("Per-visit consolidated exposure metadata.  These catalogs use "
         "detector id for the id and are sorted for fast lookups of a "
         "detector."),
    name="{calexpType}visitSummary",
    storageClass="ExposureCatalog",
    dimensions=("instrument", "visit"),
)


class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
                                pipelineConnections=ConsolidateVisitSummaryConnections):
pass


class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
_DefaultName = "consolidateVisitSummary"
ConfigClass = ConsolidateVisitSummaryConfig

@classmethod
def _makeArgumentParser(cls):
    parser = ArgumentParser(name=cls._DefaultName)

    parser.add_id_argument("--id", "calexp",
                           help="data ID, e.g. --id visit=12345",
                           ContainerClass=VisitDataIdContainer)
    return parser

def writeMetadata(self, dataRef):
pass

def writeConfig(self, butler, clobber=False, doBackup=True):
pass

def runDataRef(self, dataRefList):
visit = dataRefList[0].dataId['visit']

self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
               len(dataRefList), visit)

expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)

dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)

def runQuantum(self, butlerQC, inputRefs, outputRefs):
dataRefs = butlerQC.get(inputRefs.calexp)
visit = dataRefs[0].dataId.byName()['visit']

self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
               len(dataRefs), visit)

expCatalog = self._combineExposureMetadata(visit, dataRefs)

butlerQC.put(expCatalog, outputRefs.visitSummary)

def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
schema = self._makeVisitSummarySchema()
cat = afwTable.ExposureCatalog(schema)
cat.resize(len(dataRefs))

cat['visit'] = visit

for i, dataRef in enumerate(dataRefs):
    if isGen3:
        visitInfo = dataRef.get(component='visitInfo')
        filterLabel = dataRef.get(component='filterLabel')
        summaryStats = dataRef.get(component='summaryStats')
        detector = dataRef.get(component='detector')
        wcs = dataRef.get(component='wcs')
        photoCalib = dataRef.get(component='photoCalib')
        detector = dataRef.get(component='detector')
        bbox = dataRef.get(component='bbox')
        validPolygon = dataRef.get(component='validPolygon')
    else:
        # Note that we need to read the calexp because there is
        # no magic access to the psf except through the exposure.
        gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
        exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
        visitInfo = exp.getInfo().getVisitInfo()
        filterLabel = dataRef.get("calexp_filterLabel")
        summaryStats = exp.getInfo().getSummaryStats()
        wcs = exp.getWcs()
        photoCalib = exp.getPhotoCalib()
        detector = exp.getDetector()
        bbox = dataRef.get(datasetType='calexp_bbox')
        validPolygon = exp.getInfo().getValidPolygon()

    rec = cat[i]
    rec.setBBox(bbox)
    rec.setVisitInfo(visitInfo)
    rec.setWcs(wcs)
    rec.setPhotoCalib(photoCalib)
    rec.setValidPolygon(validPolygon)

    rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
    rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
    rec.setId(detector.getId())
    rec['psfSigma'] = summaryStats.psfSigma
    rec['psfIxx'] = summaryStats.psfIxx
    rec['psfIyy'] = summaryStats.psfIyy
    rec['psfIxy'] = summaryStats.psfIxy
    rec['psfArea'] = summaryStats.psfArea
    rec['raCorners'][:] = summaryStats.raCorners
    rec['decCorners'][:] = summaryStats.decCorners
    rec['ra'] = summaryStats.ra
    rec['decl'] = summaryStats.decl
    rec['zenithDistance'] = summaryStats.zenithDistance
    rec['zeroPoint'] = summaryStats.zeroPoint
    rec['skyBg'] = summaryStats.skyBg
    rec['skyNoise'] = summaryStats.skyNoise
    rec['meanVar'] = summaryStats.meanVar
    rec['astromOffsetMean'] = summaryStats.astromOffsetMean
    rec['astromOffsetStd'] = summaryStats.astromOffsetStd

metadata = dafBase.PropertyList()
metadata.add("COMMENT", "Catalog id is detector id, sorted.")
# We are looping over existing datarefs, so the following is true
metadata.add("COMMENT", "Only detectors with data have entries.")
cat.setMetadata(metadata)

cat.sort()
return cat

def _makeVisitSummarySchema(self):
schema = afwTable.ExposureTable.makeMinimalSchema()
schema.addField('visit', type='I', doc='Visit number')
schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
schema.addField('band', type='String', size=32, doc='Name of band')
schema.addField('psfSigma', type='F',
                doc='PSF model second-moments determinant radius (center of chip) (pixel)')
schema.addField('psfArea', type='F',
                doc='PSF model effective area (center of chip) (pixel**2)')
schema.addField('psfIxx', type='F',
                doc='PSF model Ixx (center of chip) (pixel**2)')
schema.addField('psfIyy', type='F',
                doc='PSF model Iyy (center of chip) (pixel**2)')
schema.addField('psfIxy', type='F',
                doc='PSF model Ixy (center of chip) (pixel**2)')
schema.addField('raCorners', type='ArrayD', size=4,
                doc='Right Ascension of bounding box corners (degrees)')
schema.addField('decCorners', type='ArrayD', size=4,
                doc='Declination of bounding box corners (degrees)')
schema.addField('ra', type='D',
                doc='Right Ascension of bounding box center (degrees)')
schema.addField('decl', type='D',
                doc='Declination of bounding box center (degrees)')
schema.addField('zenithDistance', type='F',
                doc='Zenith distance of bounding box center (degrees)')
schema.addField('zeroPoint', type='F',
                doc='Mean zeropoint in detector (mag)')
schema.addField('skyBg', type='F',
                doc='Average sky background (ADU)')
schema.addField('skyNoise', type='F',
                doc='Average sky noise (ADU)')
schema.addField('meanVar', type='F',
                doc='Mean variance of the weight plane (ADU**2)')
schema.addField('astromOffsetMean', type='F',
                doc='Mean offset of astrometric calibration matches (arcsec)')
schema.addField('astromOffsetStd', type='F',
                doc='Standard deviation of offsets of astrometric calibration matches (arcsec)')

return schema


class VisitDataIdContainer(DataIdContainer):

Definition at line 1725 of file postprocess.py.

◆ dflist

lsst.pipe.tasks.postprocess.dflist
_DefaultName = "writeObjectTable"
ConfigClass = WriteObjectTableConfig
RunnerClass = MergeSourcesRunner

# Names of table datasets to be merged
inputDatasets = ('forced_src', 'meas', 'ref')

# Tag of output dataset written by `MergeSourcesTask.write`
outputDataset = 'obj'

def __init__(self, butler=None, schema=None, **kwargs):
    # It is a shame that this class can't use the default init for CmdLineTask
    # But to do so would require its own special task runner, which is many
    # more lines of specialization, so this is how it is for now
    super().__init__(**kwargs)

def runDataRef(self, patchRefList):
catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
dataId = patchRefList[0].dataId
mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog))

def runQuantum(self, butlerQC, inputRefs, outputRefs):
inputs = butlerQC.get(inputRefs)

measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
            zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
                    zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}

catalogs = {}
for band in measDict.keys():
    catalogs[band] = {'meas': measDict[band]['meas'],
                      'forced_src': forcedSourceDict[band]['forced_src'],
                      'ref': inputs['inputCatalogRef']}
dataId = butlerQC.quantum.dataId
df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
outputs = pipeBase.Struct(outputCatalog=df)
butlerQC.put(outputs, outputRefs)

@classmethod
def _makeArgumentParser(cls):
return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])

def readCatalog(self, patchRef):
band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel
catalogDict = {}
for dataset in self.inputDatasets:
    catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
    self.log.info("Read %d sources from %s for band %s: %s",
                  len(catalog), dataset, band, patchRef.dataId)
    catalogDict[dataset] = catalog
return band, catalogDict

def run(self, catalogs, tract, patch):
dfs = []
for filt, tableDict in catalogs.items():
    for dataset, table in tableDict.items():
        # Convert afwTable to pandas DataFrame
        df = table.asAstropy().to_pandas().set_index('id', drop=True)

        # Sort columns by name, to ensure matching schema among patches
        df = df.reindex(sorted(df.columns), axis=1)
        df['tractId'] = tract
        df['patchId'] = patch

        # Make columns a 3-level MultiIndex
        df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
                                               names=('dataset', 'band', 'column'))
        dfs.append(df)

catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
return catalog

def write(self, patchRef, catalog):
patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
# since the filter isn't actually part of the data ID for the dataset we're saving,
# it's confusing to see it in the log message, even if the butler simply ignores it.
mergeDataId = patchRef.dataId.copy()
del mergeDataId["filter"]
self.log.info("Wrote merged catalog: %s", mergeDataId)

def writeMetadata(self, dataRefList):
pass


class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
                          defaultTemplates={"catalogType": ""},
                          dimensions=("instrument", "visit", "detector")):

catalog = connectionTypes.Input(
doc="Input full-depth catalog of sources produced by CalibrateTask",
name="{catalogType}src",
storageClass="SourceCatalog",
dimensions=("instrument", "visit", "detector")
)
outputCatalog = connectionTypes.Output(
doc="Catalog of sources, `src` in Parquet format. The 'id' column is "
    "replaced with an index; all other columns are unchanged.",
name="{catalogType}source",
storageClass="DataFrame",
dimensions=("instrument", "visit", "detector")
)


class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
                     pipelineConnections=WriteSourceTableConnections):
doApplyExternalPhotoCalib = pexConfig.Field(
dtype=bool,
default=False,
doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
     "generating Source Tables from older src tables which do not already have local calib columns")
)
doApplyExternalSkyWcs = pexConfig.Field(
dtype=bool,
default=False,
doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
     "generating Source Tables from older src tables which do not already have local calib columns")
)


class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
_DefaultName = "writeSourceTable"
ConfigClass = WriteSourceTableConfig

def runDataRef(self, dataRef):
    src = dataRef.get('src')
    if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
        src = self.addCalibColumns(src, dataRef)

    ccdVisitId = dataRef.get('ccdExposureId')
    result = self.run(src, ccdVisitId=ccdVisitId)
    dataRef.put(result.table, 'source')

def runQuantum(self, butlerQC, inputRefs, outputRefs):
    inputs = butlerQC.get(inputRefs)
    inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
    result = self.run(**inputs).table
    outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
    butlerQC.put(outputs, outputRefs)

def run(self, catalog, ccdVisitId=None):
self.log.info("Generating parquet table from src catalog %s", ccdVisitId)
df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
df['ccdVisitId'] = ccdVisitId
return pipeBase.Struct(table=ParquetTable(dataFrame=df))

def addCalibColumns(self, catalog, dataRef):

Definition at line 515 of file postprocess.py.