doxygen/xlink_master_2019_11_16_09.13.30/ingest_index_manager_8py_source.html

 # This file is part of meas_algorithms.
 #
 # Developed for the LSST Data Management System.
 # This product includes software developed by the LSST Project
 # (https://www.lsst.org).
 # See the COPYRIGHT file at the top-level directory of this distribution
 # for details of code ownership.
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.

 __all__ = ["IngestIndexManager", "IngestGaiaManager"]

 import os.path
 import itertools
 import multiprocessing

 import astropy.time
 import astropy.units as u
 import numpy as np

 import lsst.sphgeom
 import lsst.afw.table as afwTable
 from lsst.afw.image import fluxErrFromABMagErr


 # global shared counter to keep track of source ids
 # (multiprocess sharing is most easily done with a global)
 COUNTER = 0
 # global shared counter to keep track of number of files processed.
 FILE_PROGRESS = 0


 class IngestIndexManager:
     """
     Ingest a reference catalog from external files into a butler repository,
     using a multiprocessing Pool to speed up the work.

     Parameters
     ----------
     filenames : `dict` [`int`, `str`]
         The HTM pixel id and filenames to ingest the catalog into.
     config : `lsst.meas.algorithms.IngestIndexedReferenceConfig`
         The Task configuration holding the field names.
     file_reader : `lsst.pipe.base.Task`
         The file reader to use to load the files.
     indexer : `lsst.meas.algorithms.HtmIndexer`
         The class used to compute the HTM pixel per coordinate.
     schema : `lsst.afw.table.Schema`
         The schema of the output catalog.
     key_map : `dict` [`str`, `lsst.afw.table.Key`]
         The mapping from output field names to keys in the Schema.
     htmRange : `tuple` [`int`]
         The start and end HTM pixel ids.
     addRefCatMetadata : callable
         A function called to add extra metadata to each output Catalog.
     log : `lsst.log.Log`
         The log to send messages to.
     """
     _flags = ['photometric', 'resolved', 'variable']

     def __init__(self, filenames, config, file_reader, indexer,
                  schema, key_map, htmRange, addRefCatMetadata, log):
         self.filenames = filenames
         self.config = config
         self.file_reader = file_reader
         self.indexer = indexer
         self.schema = schema
         self.key_map = key_map
         self.htmRange = htmRange
         self.addRefCatMetadata = addRefCatMetadata
         self.log = log

     def run(self, inputFiles):
         """Index a set of input files from a reference catalog, and write the
         output to the appropriate filenames, in parallel.

         Parameters
         ----------
         inputFiles : `list`
             A list of file paths to read data from.
         """
         global COUNTER, FILE_PROGRESS
         self.nInputFiles = len(inputFiles)

         with multiprocessing.Manager() as manager:
             COUNTER = multiprocessing.Value('i', 0)
             FILE_PROGRESS = multiprocessing.Value('i', 0)
             fileLocks = manager.dict()
             self.log.info("Creating %s file locks.", self.htmRange[1] - self.htmRange[0])
             for i in range(self.htmRange[0], self.htmRange[1]):
                 fileLocks[i] = manager.Lock()
             self.log.info("File locks created.")
             with multiprocessing.Pool(self.config.n_processes) as pool:
                 pool.starmap(self._ingestOneFile, zip(inputFiles, itertools.repeat(fileLocks)))

     def _ingestOneFile(self, filename, fileLocks):
         """Read and process one file, and write its records to the correct
         indexed files, while handling exceptions in a useful way so that they
         don't get swallowed by the multiprocess pool.

         Parameters
         ----------
         filename : `str`
             The file to process.
         fileLocks : `dict` [`int`, `multiprocessing.Lock`]
             A Lock for each HTM pixel; each pixel gets one file written, and
             we need to block when one process is accessing that file.
         """
         global FILE_PROGRESS
         inputData = self.file_reader.run(filename)
         fluxes = self._getFluxes(inputData)
         matchedPixels = self.indexer.indexPoints(inputData[self.config.ra_name],
                                                  inputData[self.config.dec_name])
         pixel_ids = set(matchedPixels)
         for pixelId in pixel_ids:
             with fileLocks[pixelId]:
                 self._doOnePixel(inputData, matchedPixels, pixelId, fluxes)
         with FILE_PROGRESS.get_lock():
             oldPercent = 100 * FILE_PROGRESS.value / self.nInputFiles
             FILE_PROGRESS.value += 1
             percent = 100 * FILE_PROGRESS.value / self.nInputFiles
             # only log each "new percent"
             if np.floor(percent) - np.floor(oldPercent) >= 1:
                 self.log.info("Completed %d / %d files: %d %% complete ",
                               FILE_PROGRESS.value,
                               self.nInputFiles,
                               percent)

     def _doOnePixel(self, inputData, matchedPixels, pixelId, fluxes):
         """Process one HTM pixel, appending to an existing catalog or creating
         a new catalog, as needed.

         Parameters
         ----------
         inputData : `numpy.ndarray`
             The data from one input file.
         matchedPixels : `numpy.ndarray`
             The row-matched pixel indexes corresponding to ``inputData``.
         pixelId : `int`
             The pixel index we are currently processing.
         fluxes : `dict` [`str`, `numpy.ndarray`]
             The values that will go into the flux and fluxErr fields in the
             output catalog.
         """
         idx = np.where(matchedPixels == pixelId)[0]
         catalog = self.getCatalog(pixelId, self.schema, len(idx))
         for outputRow, inputRow in zip(catalog[-len(idx):], inputData[idx]):
             self._fillRecord(outputRow, inputRow)

         global COUNTER
         with COUNTER.get_lock():
             self._setIds(inputData[idx], catalog)

         for name, array in fluxes.items():
             catalog[self.key_map[name]][-len(idx):] = array[idx]

         catalog.writeFits(self.filenames[pixelId])

     def _setIds(self, inputData, catalog):
         """Fill the `id` field of catalog with a running index, filling the
         last values up to the length of ``inputData``.

         Fill with `self.config.id_name` if specified, otherwise use the
         global running counter value.

         Parameters
         ----------
         inputData : `numpy.ndarray`
             The input data that is being processed.
         catalog : `lsst.afw.table.SimpleCatalog`
             The output catalog to fill the ids.
         """
         global COUNTER
         size = len(inputData)
         if self.config.id_name:
             catalog['id'][-size:] = inputData[self.config.id_name]
         else:
             idEnd = COUNTER.value + size
             catalog['id'][-size:] = np.arange(COUNTER.value, idEnd)
             COUNTER.value = idEnd

     def getCatalog(self, pixelId, schema, nNewElements):
         """Get a catalog from disk or create it if it doesn't exist.

         Parameters
         ----------
         pixelId : `dict`
             Identifier for catalog to retrieve
         schema : `lsst.afw.table.Schema`
             Schema to use in catalog creation it does not exist.
         nNewElements : `int`
             The number of new elements that will be added to the catalog,
             so space can be preallocated.

         Returns
         -------
         catalog : `lsst.afw.table.SimpleCatalog`
             The new or read-and-resized catalog specified by `dataId`.
         """
         # This is safe, because we lock on this file before getCatalog is called.
         if os.path.isfile(self.filenames[pixelId]):
             catalog = afwTable.SimpleCatalog.readFits(self.filenames[pixelId])
             catalog.resize(len(catalog) + nNewElements)
             return catalog.copy(deep=True)  # ensure contiguity, so that column-assignment works
         catalog = afwTable.SimpleCatalog(schema)
         catalog.resize(nNewElements)
         self.addRefCatMetadata(catalog)
         return catalog

     @staticmethod
     def computeCoord(row, ra_name, dec_name):
         """Create an ICRS coord. from a row of a catalog being ingested.

         Parameters
         ----------
         row : `numpy.ndarray`
             Row from catalog being ingested.
         ra_name : `str`
             Name of RA key in catalog being ingested.
         dec_name : `str`
             Name of Dec key in catalog being ingested.

         Returns
         -------
         coord : `lsst.geom.SpherePoint`
             ICRS coordinate.
         """
         return lsst.geom.SpherePoint(row[ra_name], row[dec_name], lsst.geom.degrees)

     def _setCoordErr(self, record, row):
         """Set coordinate error in a record of an indexed catalog.

         The errors are read from the specified columns, and installed
         in the appropriate columns of the output.

         Parameters
         ----------
         record : `lsst.afw.table.SimpleRecord`
             Row from indexed catalog to modify.
         row : `numpy.ndarray`
             Row from catalog being ingested.
         """
         if self.config.ra_err_name:  # IngestIndexedReferenceConfig.validate ensures all or none
             record.set(self.key_map["coord_raErr"], np.radians(row[self.config.ra_err_name]))
             record.set(self.key_map["coord_decErr"], np.radians(row[self.config.dec_err_name]))

     def _setFlags(self, record, row):
         """Set flags in an output record.

         Parameters
         ----------
         record : `lsst.afw.table.SimpleRecord`
             Row from indexed catalog to modify.
         row : `numpy.ndarray`
             Row from catalog being ingested.
         """
         names = record.schema.getNames()
         for flag in self._flags:
             if flag in names:
                 attr_name = 'is_{}_name'.format(flag)
                 record.set(self.key_map[flag], bool(row[getattr(self.config, attr_name)]))

     def _getFluxes(self, inputData):
         """Compute the flux fields that will go into the output catalog.

         Parameters
         ----------
         inputData : `numpy.ndarray`
             The input data to compute fluxes for.

         Returns
         -------
         fluxes : `dict` [`str`, `numpy.ndarray`]
             The values that will go into the flux and fluxErr fields in the
             output catalog.
         """
         result = {}
         for item in self.config.mag_column_list:
             result[item+'_flux'] = (inputData[item]*u.ABmag).to_value(u.nJy)
         if len(self.config.mag_err_column_map) > 0:
             for err_key in self.config.mag_err_column_map.keys():
                 error_col_name = self.config.mag_err_column_map[err_key]
                 # TODO: multiply by 1e9 here until we have a replacement (see DM-16903)
                 # NOTE: copy the arrays because the numpy strides may not be useable by C++.
                 fluxErr = fluxErrFromABMagErr(inputData[error_col_name].copy(),
                                               inputData[err_key].copy())*1e9
                 result[err_key+'_fluxErr'] = fluxErr
         return result

     def _setProperMotion(self, record, row):
         """Set proper motion fields in a record of an indexed catalog.

         The proper motions are read from the specified columns,
         scaled appropriately, and installed in the appropriate
         columns of the output.

         Parameters
         ----------
         record : `lsst.afw.table.SimpleRecord`
             Row from indexed catalog to modify.
         row : structured `numpy.array`
             Row from catalog being ingested.
         """
         if self.config.pm_ra_name is None:  # IngestIndexedReferenceConfig.validate ensures all or none
             return
         radPerOriginal = np.radians(self.config.pm_scale)/(3600*1000)
         record.set(self.key_map["pm_ra"], row[self.config.pm_ra_name]*radPerOriginal*lsst.geom.radians)
         record.set(self.key_map["pm_dec"], row[self.config.pm_dec_name]*radPerOriginal*lsst.geom.radians)
         record.set(self.key_map["epoch"], self._epochToMjdTai(row[self.config.epoch_name]))
         if self.config.pm_ra_err_name is not None:  # pm_dec_err_name also, by validation
             record.set(self.key_map["pm_raErr"], row[self.config.pm_ra_err_name]*radPerOriginal)
             record.set(self.key_map["pm_decErr"], row[self.config.pm_dec_err_name]*radPerOriginal)

     def _setParallax(self, record, row):
         """Set the parallax fields in a record of a refcat.
         """
         if self.config.parallax_name is None:
             return
         scale = self.config.parallax_scale*lsst.geom.milliarcseconds
         record.set(self.key_map['parallax'], row[self.config.parallax_name]*scale)
         record.set(self.key_map['parallaxErr'], row[self.config.parallax_err_name]*scale)

     def _epochToMjdTai(self, nativeEpoch):
         """Convert an epoch in native format to TAI MJD (a float).
         """
         return astropy.time.Time(nativeEpoch, format=self.config.epoch_format,
                                  scale=self.config.epoch_scale).tai.mjd

     def _setExtra(self, record, row):
         """Set extra data fields in a record of an indexed catalog.

         Parameters
         ----------
         record : `lsst.afw.table.SimpleRecord`
             Row from indexed catalog to modify.
         row : structured `numpy.array`
             Row from catalog being ingested.
         """
         for extra_col in self.config.extra_col_names:
             value = row[extra_col]
             # If data read from a text file contains string like entires,
             # numpy stores this as its own internal type, a numpy.str_
             # object. This seems to be a consequence of how numpy stores
             # string like objects in fixed column arrays. This checks
             # if any of the values to be added to the catalog are numpy
             # string types, and if they are, casts them to a python string
             # which is what the python c++ records expect
             if isinstance(value, np.str_):
                 value = str(value)
             record.set(self.key_map[extra_col], value)

     def _fillRecord(self, record, row):
         """Fill a record in an indexed catalog to be persisted.

         Parameters
         ----------
         record : `lsst.afw.table.SimpleRecord`
             Row from indexed catalog to modify.
         row : structured `numpy.array`
             Row from catalog being ingested.
         """
         record.setCoord(self.computeCoord(row, self.config.ra_name, self.config.dec_name))

         self._setCoordErr(record, row)
         self._setFlags(record, row)
         self._setProperMotion(record, row)
         self._setParallax(record, row)
         self._setExtra(record, row)


 class IngestGaiaManager(IngestIndexManager):
     """Special-case ingest manager to deal with Gaia fluxes.
     """
     def _getFluxes(self, input):
         result = {}

         def gaiaFluxToFlux(flux, zeroPoint):
             """Equations 5.19 and 5.30 from the Gaia calibration document define the
             conversion from Gaia electron/second fluxes to AB magnitudes.
             https://gea.esac.esa.int/archive/documentation/GDR2/Data_processing/chap_cu5pho/sec_cu5pho_calibr/ssec_cu5pho_calibr_extern.html
             """
             result = ((zeroPoint + -2.5 * np.log10(flux))*u.ABmag).to_value(u.nJy)
             # set 0 instrumental fluxes to 0 (instead of NaN/inf from the math)
             result[flux == 0] = 0
             return result

         # Some fluxes are 0, so log10(flux) can give warnings. We handle the
         # zeros explicitly, so they warnings are irrelevant.
         with np.errstate(invalid='ignore', divide='ignore'):
             # The constants below come from table 5.3 in this document;
             # https://gea.esac.esa.int/archive/documentation/GDR2/Data_processing/chap_cu5pho/sec_cu5pho_calibr/ssec_cu5pho_calibr_extern.html
             result['phot_g_mean_flux'] = gaiaFluxToFlux(input['phot_g_mean_flux'], 25.7934)
             result['phot_bp_mean_flux'] = gaiaFluxToFlux(input['phot_bp_mean_flux'], 25.3806)
             result['phot_rp_mean_flux'] = gaiaFluxToFlux(input['phot_rp_mean_flux'], 25.1161)

         result['phot_g_mean_fluxErr'] = result['phot_g_mean_flux'] / input['phot_g_mean_flux_over_error']
         result['phot_bp_mean_fluxErr'] = result['phot_bp_mean_flux'] / input['phot_bp_mean_flux_over_error']
         result['phot_rp_mean_fluxErr'] = result['phot_rp_mean_flux'] / input['phot_rp_mean_flux_over_error']

         return result
pex.config.history.format
def format(config, name=None, writeSourceLine=True, prefix="", verbose=False)
Definition: history.py:174

lsst::afw::image::fluxErrFromABMagErr
double fluxErrFromABMagErr(double magErr, double mag) noexcept
Compute flux error in Janskys from AB magnitude error and AB magnitude.
Definition: Calib.h:60

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.config
config
Definition: ingestIndexManager.py:75

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._setProperMotion
def _setProperMotion(self, record, row)
Definition: ingestIndexManager.py:301

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._setCoordErr
def _setCoordErr(self, record, row)
Definition: ingestIndexManager.py:241

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._setExtra
def _setExtra(self, record, row)
Definition: ingestIndexManager.py:340

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._flags
list _flags
Definition: ingestIndexManager.py:70

lsst::afw::table
Definition: table.dox:3

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.computeCoord
def computeCoord(row, ra_name, dec_name)
Definition: ingestIndexManager.py:222

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.getCatalog
def getCatalog(self, pixelId, schema, nNewElements)
Definition: ingestIndexManager.py:193

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._setFlags
def _setFlags(self, record, row)
Definition: ingestIndexManager.py:258

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.htmRange
htmRange
Definition: ingestIndexManager.py:80

set
daf::base::PropertySet * set
Definition: fits.cc:902

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._setIds
def _setIds(self, inputData, catalog)
Definition: ingestIndexManager.py:170

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._fillRecord
def _fillRecord(self, record, row)
Definition: ingestIndexManager.py:363

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.nInputFiles
nInputFiles
Definition: ingestIndexManager.py:94

lsst::sphgeom
Definition: Angle.h:38

lsst::afw::table::SortedCatalogT
Custom catalog class for record/table subclasses that are guaranteed to have an ID, and should generally be sorted by that ID.
Definition: fwd.h:63

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._getFluxes
def _getFluxes(self, inputData)
Definition: ingestIndexManager.py:274

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._ingestOneFile
def _ingestOneFile(self, filename, fileLocks)
Definition: ingestIndexManager.py:107

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.schema
schema
Definition: ingestIndexManager.py:78

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.__init__
def __init__(self, filenames, config, file_reader, indexer, schema, key_map, htmRange, addRefCatMetadata, log)
Definition: ingestIndexManager.py:73

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.file_reader
file_reader
Definition: ingestIndexManager.py:76

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.log
log
Definition: ingestIndexManager.py:82

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.indexer
indexer
Definition: ingestIndexManager.py:77

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.key_map
key_map
Definition: ingestIndexManager.py:79

lsst::geom::SpherePoint
Point in an unspecified spherical coordinate system.
Definition: SpherePoint.h:57

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.run
def run(self, inputFiles)
Definition: ingestIndexManager.py:84

lsst::log.log.logContinued.info
def info(fmt, args)
Definition: logContinued.py:190

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.filenames
filenames
Definition: ingestIndexManager.py:74

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._epochToMjdTai
def _epochToMjdTai(self, nativeEpoch)
Definition: ingestIndexManager.py:334

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager
Definition: ingestIndexManager.py:44

lsst::afw::image
Backwards-compatibility support for depersisting the old Calib (FluxMag0/FluxMag0Err) objects...
Definition: imageAlgorithm.dox:1

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._doOnePixel
def _doOnePixel(self, inputData, matchedPixels, pixelId, fluxes)
Definition: ingestIndexManager.py:140

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager.addRefCatMetadata
addRefCatMetadata
Definition: ingestIndexManager.py:81

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager._setParallax
def _setParallax(self, record, row)
Definition: ingestIndexManager.py:325

lsst::meas::algorithms.ingestIndexManager.IngestGaiaManager
Definition: ingestIndexManager.py:382