doxygen/xlink_master_2021_04_01_08.31.19/ingest_index_manager_8py_source.html

 # This file is part of meas_algorithms.

 #

 # Developed for the LSST Data Management System.

 # This product includes software developed by the LSST Project

 # (https://www.lsst.org).

 # See the COPYRIGHT file at the top-level directory of this distribution

 # for details of code ownership.

 #

 # This program is free software: you can redistribute it and/or modify

 # it under the terms of the GNU General Public License as published by

 # the Free Software Foundation, either version 3 of the License, or

 # (at your option) any later version.

 #

 # This program is distributed in the hope that it will be useful,

 # but WITHOUT ANY WARRANTY; without even the implied warranty of

 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 # GNU General Public License for more details.

 #

 # You should have received a copy of the GNU General Public License

 # along with this program.  If not, see <https://www.gnu.org/licenses/>.


 __all__ = ["IngestIndexManager", "IngestGaiaManager"]


 from ctypes import c_int

 import os.path

 import itertools

 import multiprocessing


 import astropy.time

 import astropy.units as u

 import numpy as np


 import lsst.sphgeom

 import lsst.afw.table as afwTable

 from lsst.afw.image import fluxErrFromABMagErr


 # global shared counter to keep track of source ids

 # (multiprocess sharing is most easily done with a global)

 COUNTER = multiprocessing.Value(c_int, 0)

 # global shared counter to keep track of number of files processed.

 FILE_PROGRESS = multiprocessing.Value(c_int, 0)


 class IngestIndexManager:

     """

     Ingest a reference catalog from external files into a butler repository,

     using a multiprocessing Pool to speed up the work.


     Parameters

     ----------

     filenames : `dict` [`int`, `str`]

         The HTM pixel id and filenames to ingest the catalog into.

     config : `lsst.meas.algorithms.IngestIndexedReferenceConfig`

         The Task configuration holding the field names.

     file_reader : `lsst.pipe.base.Task`

         The file reader to use to load the files.

     indexer : `lsst.meas.algorithms.HtmIndexer`

         The class used to compute the HTM pixel per coordinate.

     schema : `lsst.afw.table.Schema`

         The schema of the output catalog.

     key_map : `dict` [`str`, `lsst.afw.table.Key`]

         The mapping from output field names to keys in the Schema.

     htmRange : `tuple` [`int`]

         The start and end HTM pixel ids.

     addRefCatMetadata : callable

         A function called to add extra metadata to each output Catalog.

     log : `lsst.log.Log`

         The log to send messages to.

     """

     _flags = ['photometric', 'resolved', 'variable']


     def __init__(self, filenames, config, file_reader, indexer,

                  schema, key_map, htmRange, addRefCatMetadata, log):

         self.filenamesfilenames = filenames

         self.configconfig = config

         self.file_readerfile_reader = file_reader

         self.indexerindexer = indexer

         self.schemaschema = schema

         self.key_mapkey_map = key_map

         self.htmRangehtmRange = htmRange

         self.addRefCatMetadataaddRefCatMetadata = addRefCatMetadata

         self.loglog = log

         if self.configconfig.coord_err_unit is not None:

             # cache this to speed up coordinate conversions

             self.coord_err_unitcoord_err_unit = u.Unit(self.configconfig.coord_err_unit)


     def run(self, inputFiles):

         """Index a set of input files from a reference catalog, and write the

         output to the appropriate filenames, in parallel.


         Parameters

         ----------

         inputFiles : `list`

             A list of file paths to read data from.

         """

         global COUNTER, FILE_PROGRESS

         self.nInputFilesnInputFiles = len(inputFiles)


         with multiprocessing.Manager() as manager:

             COUNTER.value = 0

             FILE_PROGRESS.value = 0

             fileLocks = manager.dict()

             self.loglog.info("Creating %s file locks.", self.htmRangehtmRange[1] - self.htmRangehtmRange[0])

             for i in range(self.htmRangehtmRange[0], self.htmRangehtmRange[1]):

                 fileLocks[i] = manager.Lock()

             self.loglog.info("File locks created.")

             with multiprocessing.Pool(self.configconfig.n_processes) as pool:

                 pool.starmap(self._ingestOneFile_ingestOneFile, zip(inputFiles, itertools.repeat(fileLocks)))


     def _ingestOneFile(self, filename, fileLocks):

         """Read and process one file, and write its records to the correct

         indexed files, while handling exceptions in a useful way so that they

         don't get swallowed by the multiprocess pool.


         Parameters

         ----------

         filename : `str`

             The file to process.

         fileLocks : `dict` [`int`, `multiprocessing.Lock`]

             A Lock for each HTM pixel; each pixel gets one file written, and

             we need to block when one process is accessing that file.

         """

         global FILE_PROGRESS

         inputData = self.file_readerfile_reader.run(filename)

         fluxes = self._getFluxes_getFluxes(inputData)

         coordErr = self._getCoordErr_getCoordErr(inputData)

         matchedPixels = self.indexerindexer.indexPoints(inputData[self.configconfig.ra_name],

                                                  inputData[self.configconfig.dec_name])

         pixel_ids = set(matchedPixels)

         for pixelId in pixel_ids:

             with fileLocks[pixelId]:

                 self._doOnePixel_doOnePixel(inputData, matchedPixels, pixelId, fluxes, coordErr)

         with FILE_PROGRESS.get_lock():

             oldPercent = 100 * FILE_PROGRESS.value / self.nInputFilesnInputFiles

             FILE_PROGRESS.value += 1

             percent = 100 * FILE_PROGRESS.value / self.nInputFilesnInputFiles

             # only log each "new percent"

             if np.floor(percent) - np.floor(oldPercent) >= 1:

                 self.loglog.info("Completed %d / %d files: %d %% complete ",

                               FILE_PROGRESS.value,

                               self.nInputFilesnInputFiles,

                               percent)


     def _doOnePixel(self, inputData, matchedPixels, pixelId, fluxes, coordErr):

         """Process one HTM pixel, appending to an existing catalog or creating

         a new catalog, as needed.


         Parameters

         ----------

         inputData : `numpy.ndarray`

             The data from one input file.

         matchedPixels : `numpy.ndarray`

             The row-matched pixel indexes corresponding to ``inputData``.

         pixelId : `int`

             The pixel index we are currently processing.

         fluxes : `dict` [`str`, `numpy.ndarray`]

             The values that will go into the flux and fluxErr fields in the

             output catalog.

         coordErr : `dict` [`str`, `numpy.ndarray`]

             The values that will go into the coord_raErr, coord_decErr, and

             coord_ra_dec_Cov fields in the output catalog (in radians).

         """

         idx = np.where(matchedPixels == pixelId)[0]

         catalog = self.getCataloggetCatalog(pixelId, self.schemaschema, len(idx))

         for outputRow, inputRow in zip(catalog[-len(idx):], inputData[idx]):

             self._fillRecord_fillRecord(outputRow, inputRow)


         global COUNTER

         with COUNTER.get_lock():

             self._setIds_setIds(inputData[idx], catalog)


         # set fluxes from the pre-computed array

         for name, array in fluxes.items():

             catalog[self.key_mapkey_map[name]][-len(idx):] = array[idx]


         # set coordinate errors from the pre-computed array

         for name, array in coordErr.items():

             catalog[name][-len(idx):] = array[idx]


         catalog.writeFits(self.filenamesfilenames[pixelId])


     def _setIds(self, inputData, catalog):

         """Fill the `id` field of catalog with a running index, filling the

         last values up to the length of ``inputData``.


         Fill with `self.config.id_name` if specified, otherwise use the

         global running counter value.


         Parameters

         ----------

         inputData : `numpy.ndarray`

             The input data that is being processed.

         catalog : `lsst.afw.table.SimpleCatalog`

             The output catalog to fill the ids.

         """

         global COUNTER

         size = len(inputData)

         if self.configconfig.id_name:

             catalog['id'][-size:] = inputData[self.configconfig.id_name]

         else:

             idEnd = COUNTER.value + size

             catalog['id'][-size:] = np.arange(COUNTER.value, idEnd)

             COUNTER.value = idEnd


     def getCatalog(self, pixelId, schema, nNewElements):

         """Get a catalog from disk or create it if it doesn't exist.


         Parameters

         ----------

         pixelId : `dict`

             Identifier for catalog to retrieve

         schema : `lsst.afw.table.Schema`

             Schema to use in catalog creation it does not exist.

         nNewElements : `int`

             The number of new elements that will be added to the catalog,

             so space can be preallocated.


         Returns

         -------

         catalog : `lsst.afw.table.SimpleCatalog`

             The new or read-and-resized catalog specified by `dataId`.

         """

         # This is safe, because we lock on this file before getCatalog is called.

         if os.path.isfile(self.filenamesfilenames[pixelId]):

             catalog = afwTable.SimpleCatalog.readFits(self.filenamesfilenames[pixelId])

             catalog.resize(len(catalog) + nNewElements)

             return catalog.copy(deep=True)  # ensure contiguity, so that column-assignment works

         catalog = afwTable.SimpleCatalog(schema)

         catalog.resize(nNewElements)

         self.addRefCatMetadataaddRefCatMetadata(catalog)

         return catalog


     @staticmethod

     def computeCoord(row, ra_name, dec_name):

         """Create an ICRS coord. from a row of a catalog being ingested.


         Parameters

         ----------

         row : `numpy.ndarray`

             Row from catalog being ingested.

         ra_name : `str`

             Name of RA key in catalog being ingested.

         dec_name : `str`

             Name of Dec key in catalog being ingested.


         Returns

         -------

         coord : `lsst.geom.SpherePoint`

             ICRS coordinate.

         """

         return lsst.geom.SpherePoint(row[ra_name], row[dec_name], lsst.geom.degrees)


     def _getCoordErr(self, inputData, ):

         """Compute the ra/dec error fields that will go into the output catalog.


         Parameters

         ----------

         inputData : `numpy.ndarray`

             The input data to compute fluxes for.


         Returns

         -------

         coordErr : `dict` [`str`, `numpy.ndarray`]

             The values that will go into the coord_raErr, coord_decErr, fields

             in the output catalog (in radians).


         Notes

         -----

         This does not currently handle the ra/dec covariance field,

         ``coord_ra_dec_Cov``. That field may require extra work, as its units

         may be more complicated in external catalogs.

         """

         result = {}

         if hasattr(self, "coord_err_unit"):

             result['coord_raErr'] = u.Quantity(inputData[self.configconfig.ra_err_name],

                                                self.coord_err_unitcoord_err_unit).to_value(u.radian)

             result['coord_decErr'] = u.Quantity(inputData[self.configconfig.dec_err_name],

                                                 self.coord_err_unitcoord_err_unit).to_value(u.radian)

         return result


     def _setFlags(self, record, row):

         """Set flags in an output record.


         Parameters

         ----------

         record : `lsst.afw.table.SimpleRecord`

             Row from indexed catalog to modify.

         row : `numpy.ndarray`

             Row from catalog being ingested.

         """

         names = record.schema.getNames()

         for flag in self._flags_flags:

             if flag in names:

                 attr_name = 'is_{}_name'.format(flag)

                 record.set(self.key_mapkey_map[flag], bool(row[getattr(self.configconfig, attr_name)]))


     def _getFluxes(self, inputData):

         """Compute the flux fields that will go into the output catalog.


         Parameters

         ----------

         inputData : `numpy.ndarray`

             The input data to compute fluxes for.


         Returns

         -------

         fluxes : `dict` [`str`, `numpy.ndarray`]

             The values that will go into the flux and fluxErr fields in the

             output catalog.

         """

         result = {}

         for item in self.configconfig.mag_column_list:

             result[item+'_flux'] = (inputData[item]*u.ABmag).to_value(u.nJy)

         if len(self.configconfig.mag_err_column_map) > 0:

             for err_key in self.configconfig.mag_err_column_map.keys():

                 error_col_name = self.configconfig.mag_err_column_map[err_key]

                 # TODO: multiply by 1e9 here until we have a replacement (see DM-16903)

                 # NOTE: copy the arrays because the numpy strides may not be useable by C++.

                 fluxErr = fluxErrFromABMagErr(inputData[error_col_name].copy(),

                                               inputData[err_key].copy())*1e9

                 result[err_key+'_fluxErr'] = fluxErr

         return result


     def _setProperMotion(self, record, row):

         """Set proper motion fields in a record of an indexed catalog.


         The proper motions are read from the specified columns,

         scaled appropriately, and installed in the appropriate

         columns of the output.


         Parameters

         ----------

         record : `lsst.afw.table.SimpleRecord`

             Row from indexed catalog to modify.

         row : structured `numpy.array`

             Row from catalog being ingested.

         """

         if self.configconfig.pm_ra_name is None:  # IngestIndexedReferenceConfig.validate ensures all or none

             return

         radPerOriginal = np.radians(self.configconfig.pm_scale)/(3600*1000)

         record.set(self.key_mapkey_map["pm_ra"], row[self.configconfig.pm_ra_name]*radPerOriginal*lsst.geom.radians)

         record.set(self.key_mapkey_map["pm_dec"], row[self.configconfig.pm_dec_name]*radPerOriginal*lsst.geom.radians)

         record.set(self.key_mapkey_map["epoch"], self._epochToMjdTai_epochToMjdTai(row[self.configconfig.epoch_name]))

         if self.configconfig.pm_ra_err_name is not None:  # pm_dec_err_name also, by validation

             record.set(self.key_mapkey_map["pm_raErr"], row[self.configconfig.pm_ra_err_name]*radPerOriginal)

             record.set(self.key_mapkey_map["pm_decErr"], row[self.configconfig.pm_dec_err_name]*radPerOriginal)


     def _setParallax(self, record, row):

         """Set the parallax fields in a record of a refcat.

         """

         if self.configconfig.parallax_name is None:

             return

         scale = self.configconfig.parallax_scale*lsst.geom.milliarcseconds

         record.set(self.key_mapkey_map['parallax'], row[self.configconfig.parallax_name]*scale)

         record.set(self.key_mapkey_map['parallaxErr'], row[self.configconfig.parallax_err_name]*scale)


     def _epochToMjdTai(self, nativeEpoch):

         """Convert an epoch in native format to TAI MJD (a float).

         """

         return astropy.time.Time(nativeEpoch, format=self.configconfig.epoch_format,

                                  scale=self.configconfig.epoch_scale).tai.mjd


     def _setExtra(self, record, row):

         """Set extra data fields in a record of an indexed catalog.


         Parameters

         ----------

         record : `lsst.afw.table.SimpleRecord`

             Row from indexed catalog to modify.

         row : structured `numpy.array`

             Row from catalog being ingested.

         """

         for extra_col in self.configconfig.extra_col_names:

             value = row[extra_col]

             # If data read from a text file contains string like entires,

             # numpy stores this as its own internal type, a numpy.str_

             # object. This seems to be a consequence of how numpy stores

             # string like objects in fixed column arrays. This checks

             # if any of the values to be added to the catalog are numpy

             # string types, and if they are, casts them to a python string

             # which is what the python c++ records expect

             if isinstance(value, np.str_):

                 value = str(value)

             record.set(self.key_mapkey_map[extra_col], value)


     def _fillRecord(self, record, row):

         """Fill a record in an indexed catalog to be persisted.


         Parameters

         ----------

         record : `lsst.afw.table.SimpleRecord`

             Row from indexed catalog to modify.

         row : structured `numpy.array`

             Row from catalog being ingested.

         """

         record.setCoord(self.computeCoordcomputeCoord(row, self.configconfig.ra_name, self.configconfig.dec_name))


         self._setFlags_setFlags(record, row)

         self._setProperMotion_setProperMotion(record, row)

         self._setParallax_setParallax(record, row)

         self._setExtra_setExtra(record, row)


 class IngestGaiaManager(IngestIndexManager):

     """Special-case ingest manager to deal with Gaia fluxes.

     """

     def _getFluxes(self, input):

         result = {}


         def gaiaFluxToFlux(flux, zeroPoint):

             """Equations 5.19 and 5.30 from the Gaia calibration document define the

             conversion from Gaia electron/second fluxes to AB magnitudes.

             https://gea.esac.esa.int/archive/documentation/GDR2/Data_processing/chap_cu5pho/sec_cu5pho_calibr/ssec_cu5pho_calibr_extern.html

             """

             result = ((zeroPoint + -2.5 * np.log10(flux))*u.ABmag).to_value(u.nJy)

             # set 0 instrumental fluxes to 0 (instead of NaN/inf from the math)

             result[flux == 0] = 0

             return result


         # Some fluxes are 0, so log10(flux) can give warnings. We handle the

         # zeros explicitly, so they warnings are irrelevant.

         with np.errstate(invalid='ignore', divide='ignore'):

             # The constants below come from table 5.3 in this document;

             # https://gea.esac.esa.int/archive/documentation/GDR2/Data_processing/chap_cu5pho/sec_cu5pho_calibr/ssec_cu5pho_calibr_extern.html

             result['phot_g_mean_flux'] = gaiaFluxToFlux(input['phot_g_mean_flux'], 25.7934)

             result['phot_bp_mean_flux'] = gaiaFluxToFlux(input['phot_bp_mean_flux'], 25.3806)

             result['phot_rp_mean_flux'] = gaiaFluxToFlux(input['phot_rp_mean_flux'], 25.1161)


         result['phot_g_mean_fluxErr'] = result['phot_g_mean_flux'] / input['phot_g_mean_flux_over_error']

         result['phot_bp_mean_fluxErr'] = result['phot_bp_mean_flux'] / input['phot_bp_mean_flux_over_error']

         result['phot_rp_mean_fluxErr'] = result['phot_rp_mean_flux'] / input['phot_rp_mean_flux_over_error']


         return result

lsst::afw::table::SortedCatalogT
Custom catalog class for record/table subclasses that are guaranteed to have an ID,...
Definition: SortedCatalog.h:42

lsst::geom::SpherePoint
Point in an unspecified spherical coordinate system.
Definition: SpherePoint.h:57

lsst::meas::algorithms.ingestIndexManager.IngestGaiaManager
Definition: ingestIndexManager.py:405

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager
Definition: ingestIndexManager.py:45

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.log
log
Definition: ingestIndexManager.py:83

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.getCatalog
def getCatalog(self, pixelId, schema, nNewElements)
Definition: ingestIndexManager.py:206

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._setProperMotion
def _setProperMotion(self, record, row)
Definition: ingestIndexManager.py:325

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.coord_err_unit
coord_err_unit
Definition: ingestIndexManager.py:86

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.nInputFiles
nInputFiles
Definition: ingestIndexManager.py:98

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._getCoordErr
def _getCoordErr(self, inputData)
Definition: ingestIndexManager.py:254

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.filenames
filenames
Definition: ingestIndexManager.py:75

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._getFluxes
def _getFluxes(self, inputData)
Definition: ingestIndexManager.py:298

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.file_reader
file_reader
Definition: ingestIndexManager.py:77

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._ingestOneFile
def _ingestOneFile(self, filename, fileLocks)
Definition: ingestIndexManager.py:111

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.run
def run(self, inputFiles)
Definition: ingestIndexManager.py:88

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.schema
schema
Definition: ingestIndexManager.py:79

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.config
config
Definition: ingestIndexManager.py:76

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._setIds
def _setIds(self, inputData, catalog)
Definition: ingestIndexManager.py:183

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.__init__
def __init__(self, filenames, config, file_reader, indexer, schema, key_map, htmRange, addRefCatMetadata, log)
Definition: ingestIndexManager.py:74

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.htmRange
htmRange
Definition: ingestIndexManager.py:81

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._flags
list _flags
Definition: ingestIndexManager.py:71

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._setParallax
def _setParallax(self, record, row)
Definition: ingestIndexManager.py:349

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._doOnePixel
def _doOnePixel(self, inputData, matchedPixels, pixelId, fluxes, coordErr)
Definition: ingestIndexManager.py:145

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.computeCoord
def computeCoord(row, ra_name, dec_name)
Definition: ingestIndexManager.py:235

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._epochToMjdTai
def _epochToMjdTai(self, nativeEpoch)
Definition: ingestIndexManager.py:358

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.addRefCatMetadata
addRefCatMetadata
Definition: ingestIndexManager.py:82

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._fillRecord
def _fillRecord(self, record, row)
Definition: ingestIndexManager.py:387

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._setFlags
def _setFlags(self, record, row)
Definition: ingestIndexManager.py:282

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._setExtra
def _setExtra(self, record, row)
Definition: ingestIndexManager.py:364

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.indexer
indexer
Definition: ingestIndexManager.py:78

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.key_map
key_map
Definition: ingestIndexManager.py:80

set
daf::base::PropertySet * set
Definition: fits.cc:912

lsst::afw::image
Backwards-compatibility support for depersisting the old Calib (FluxMag0/FluxMag0Err) objects.
Definition: imageAlgorithm.dox:1

lsst::afw::image::fluxErrFromABMagErr
double fluxErrFromABMagErr(double magErr, double mag) noexcept
Compute flux error in Janskys from AB magnitude error and AB magnitude.
Definition: Calib.h:60

lsst::afw::table
Definition: table.dox:3

lsst::log.log.logContinued.info
def info(fmt, *args)
Definition: logContinued.py:201

lsst.pex.config.history.format
def format(config, name=None, writeSourceLine=True, prefix="", verbose=False)
Definition: history.py:174

lsst::sphgeom
Definition: Angle.h:38