doxygen/xlink_master_2021_04_01_08.31.19/ingest_index_reference_task_8py_source.html

 #

 # LSST Data Management System

 #

 # Copyright 2008-2017  AURA/LSST.

 #

 # This product includes software developed by the

 # LSST Project (http://www.lsst.org/).

 #

 # This program is free software: you can redistribute it and/or modify

 # it under the terms of the GNU General Public License as published by

 # the Free Software Foundation, either version 3 of the License, or

 # (at your option) any later version.

 #

 # This program is distributed in the hope that it will be useful,

 # but WITHOUT ANY WARRANTY; without even the implied warranty of

 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 # GNU General Public License for more details.

 #

 # You should have received a copy of the LSST License Statement and

 # the GNU General Public License along with this program.  If not,

 # see <https://www.lsstcorp.org/LegalNotices/>.

 #


 __all__ = ["IngestIndexedReferenceConfig", "IngestIndexedReferenceTask", "DatasetConfig",

            "IngestGaiaReferenceTask"]


 import os.path


 import astropy.units


 import lsst.pex.config as pexConfig

 import lsst.pipe.base as pipeBase

 import lsst.geom

 import lsst.sphgeom

 import lsst.afw.table as afwTable

 from lsst.daf.base import PropertyList

 from .indexerRegistry import IndexerRegistry

 from .readTextCatalogTask import ReadTextCatalogTask

 from .loadReferenceObjects import LoadReferenceObjectsTask

 from . import ingestIndexManager


 # The most recent Indexed Reference Catalog on-disk format version.

 LATEST_FORMAT_VERSION = 1


 def addRefCatMetadata(catalog):

     """Add metadata to a new (not yet populated) reference catalog.


     Parameters

     ----------

     catalog : `lsst.afw.table.SimpleCatalog`

         Catalog to which metadata should be attached.  Will be modified

         in-place.

     """

     md = catalog.getMetadata()

     if md is None:

         md = PropertyList()

     md.set("REFCAT_FORMAT_VERSION", LATEST_FORMAT_VERSION)

     catalog.setMetadata(md)


 class IngestReferenceRunner(pipeBase.TaskRunner):

     """Task runner for the reference catalog ingester


     Data IDs are ignored so the runner should just run the task on the parsed command.

     """


     def run(self, parsedCmd):

         """Run the task.


         Several arguments need to be collected to send on to the task methods.


         Parameters

         ----------

         parsedCmd : `argparse.Namespace`

             Parsed command.


         Returns

         -------

         results : `lsst.pipe.base.Struct` or `None`

             A empty struct if self.doReturnResults, else None

         """

         files = parsedCmd.files

         butler = parsedCmd.butler

         task = self.TaskClass(config=self.config, log=self.log, butler=butler)

         task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)


         task.createIndexedCatalog(files)

         if self.doReturnResults:

             return pipeBase.Struct()


 class DatasetConfig(pexConfig.Config):

     """The description of the on-disk storage format for the persisted

     reference catalog.

     """

     format_version = pexConfig.Field(

         dtype=int,

         doc="Version number of the persisted on-disk storage format."

         "\nVersion 0 had Jy as flux units (default 0 for unversioned catalogs)."

         "\nVersion 1 had nJy as flux units.",

         default=0  # This needs to always be 0, so that unversioned catalogs are interpreted as version 0.

     )

     ref_dataset_name = pexConfig.Field(

         dtype=str,

         default='cal_ref_cat',

         doc='String to pass to the butler to retrieve persisted files.',

     )

     indexer = IndexerRegistry.makeField(

         default='HTM',

         doc='Name of indexer algoritm to use.  Default is HTM',

     )


 class IngestIndexedReferenceConfig(pexConfig.Config):

     dataset_config = pexConfig.ConfigField(

         dtype=DatasetConfig,

         doc="Configuration for reading the ingested data",

     )

     n_processes = pexConfig.Field(

         dtype=int,

         doc=("Number of python processes to use when ingesting."),

         default=1

     )

     file_reader = pexConfig.ConfigurableField(

         target=ReadTextCatalogTask,

         doc='Task to use to read the files.  Default is to expect text files.'

     )

     ra_name = pexConfig.Field(

         dtype=str,

         doc="Name of RA column (values in decimal degrees)",

     )

     dec_name = pexConfig.Field(

         dtype=str,

         doc="Name of Dec column (values in decimal degrees)",

     )

     ra_err_name = pexConfig.Field(

         dtype=str,

         doc="Name of RA error column",

         optional=True,

     )

     dec_err_name = pexConfig.Field(

         dtype=str,

         doc="Name of Dec error column",

         optional=True,

     )

     coord_err_unit = pexConfig.Field(

         dtype=str,

         doc="Unit of RA/Dec error fields (astropy.unit.Unit compatible)",

         optional=True

     )

     mag_column_list = pexConfig.ListField(

         dtype=str,

         doc="The values in the reference catalog are assumed to be in AB magnitudes. "

             "List of column names to use for photometric information.  At least one entry is required."

     )

     mag_err_column_map = pexConfig.DictField(

         keytype=str,

         itemtype=str,

         default={},

         doc="A map of magnitude column name (key) to magnitude error column (value)."

     )

     is_photometric_name = pexConfig.Field(

         dtype=str,

         optional=True,

         doc='Name of column stating if satisfactory for photometric calibration (optional).'

     )

     is_resolved_name = pexConfig.Field(

         dtype=str,

         optional=True,

         doc='Name of column stating if the object is resolved (optional).'

     )

     is_variable_name = pexConfig.Field(

         dtype=str,

         optional=True,

         doc='Name of column stating if the object is measured to be variable (optional).'

     )

     id_name = pexConfig.Field(

         dtype=str,

         optional=True,

         doc='Name of column to use as an identifier (optional).'

     )

     pm_ra_name = pexConfig.Field(

         dtype=str,

         doc="Name of proper motion RA column",

         optional=True,

     )

     pm_dec_name = pexConfig.Field(

         dtype=str,

         doc="Name of proper motion Dec column",

         optional=True,

     )

     pm_ra_err_name = pexConfig.Field(

         dtype=str,

         doc="Name of proper motion RA error column",

         optional=True,

     )

     pm_dec_err_name = pexConfig.Field(

         dtype=str,

         doc="Name of proper motion Dec error column",

         optional=True,

     )

     pm_scale = pexConfig.Field(

         dtype=float,

         doc="Scale factor by which to multiply proper motion values to obtain units of milliarcsec/year",

         default=1.0,

     )

     parallax_name = pexConfig.Field(

         dtype=str,

         doc="Name of parallax column",

         optional=True,

     )

     parallax_err_name = pexConfig.Field(

         dtype=str,

         doc="Name of parallax error column",

         optional=True,

     )

     parallax_scale = pexConfig.Field(

         dtype=float,

         doc="Scale factor by which to multiply parallax values to obtain units of milliarcsec",

         default=1.0,

     )

     epoch_name = pexConfig.Field(

         dtype=str,

         doc="Name of epoch column",

         optional=True,

     )

     epoch_format = pexConfig.Field(

         dtype=str,

         doc="Format of epoch column: any value accepted by astropy.time.Time, e.g. 'iso' or 'unix'",

         optional=True,

     )

     epoch_scale = pexConfig.Field(

         dtype=str,

         doc="Scale of epoch column: any value accepted by astropy.time.Time, e.g. 'utc'",

         optional=True,

     )

     extra_col_names = pexConfig.ListField(

         dtype=str,

         default=[],

         doc='Extra columns to add to the reference catalog.'

     )


     def setDefaults(self):

         # Newly ingested reference catalogs always have the latest format_version.

         self.dataset_configdataset_config.format_version = LATEST_FORMAT_VERSION


     def validate(self):

         pexConfig.Config.validate(self)


         def assertAllOrNone(*names):

             """Raise ValueError unless all the named fields are set or are

             all none (or blank)

             """

             setNames = [name for name in names if bool(getattr(self, name))]

             if len(setNames) in (len(names), 0):

                 return

             prefix = "Both or neither" if len(names) == 2 else "All or none"

             raise ValueError("{} of {} must be set, but only {} are set".format(

                 prefix, ", ".join(names), ", ".join(setNames)))


         if not (self.ra_namera_name and self.dec_namedec_name and self.mag_column_listmag_column_list):

             raise ValueError(

                 "ra_name and dec_name and at least one entry in mag_column_list must be supplied.")

         if self.mag_err_column_mapmag_err_column_map and set(self.mag_column_listmag_column_list) != set(self.mag_err_column_mapmag_err_column_map.keys()):

             raise ValueError(

                 "mag_err_column_map specified, but keys do not match mag_column_list: {} != {}".format(

                     sorted(self.mag_err_column_mapmag_err_column_map.keys()), sorted(self.mag_column_listmag_column_list)))

         assertAllOrNone("ra_err_name", "dec_err_name", "coord_err_unit")

         if self.coord_err_unitcoord_err_unit is not None:

             result = astropy.units.Unit(self.coord_err_unitcoord_err_unit, parse_strict='silent')

             if isinstance(result, astropy.units.UnrecognizedUnit):

                 msg = f"{self.coord_err_unit} is not a valid astropy unit string."

                 raise pexConfig.FieldValidationError(IngestIndexedReferenceConfig.coord_err_unit, self, msg)


         assertAllOrNone("epoch_name", "epoch_format", "epoch_scale")

         assertAllOrNone("pm_ra_name", "pm_dec_name")

         assertAllOrNone("pm_ra_err_name", "pm_dec_err_name")

         assertAllOrNone("parallax_name", "parallax_err_name")

         if self.pm_ra_err_namepm_ra_err_name and not self.pm_ra_namepm_ra_name:

             raise ValueError('"pm_ra/dec_name" must be specified if "pm_ra/dec_err_name" are specified')

         if (self.pm_ra_namepm_ra_name or self.parallax_nameparallax_name) and not self.epoch_nameepoch_name:

             raise ValueError(

                 '"epoch_name" must be specified if "pm_ra/dec_name" or "parallax_name" are specified')


 class IngestIndexedReferenceTask(pipeBase.CmdLineTask):

     """Class for producing and loading indexed reference catalogs.


     This implements an indexing scheme based on hierarchical triangular

     mesh (HTM). The term index really means breaking the catalog into

     localized chunks called shards.  In this case each shard contains

     the entries from the catalog in a single HTM trixel


     For producing catalogs this task makes the following assumptions

     about the input catalogs:

     - RA, Dec are in decimal degrees.

     - Epoch is available in a column, in a format supported by astropy.time.Time.

     - There are no off-diagonal covariance terms, such as covariance

       between RA and Dec, or between PM RA and PM Dec. Support for such

      covariance would have to be added to to the config, including consideration

      of the units in the input catalog.


     Parameters

     ----------

     butler : `lsst.daf.persistence.Butler`

         Data butler for reading and writing catalogs

     """

     canMultiprocess = False

     ConfigClass = IngestIndexedReferenceConfig

     RunnerClass = IngestReferenceRunner

     _DefaultName = 'IngestIndexedReferenceTask'


     @classmethod

     def _makeArgumentParser(cls):

         """Create an argument parser.


         This returns a standard parser with an extra "files" argument.

         """

         parser = pipeBase.InputOnlyArgumentParser(name=cls._DefaultName_DefaultName)

         parser.add_argument("files", nargs="+", help="Names of files to index")

         return parser


     def __init__(self, *args, butler=None, **kwargs):

         self.butlerbutler = butler

         super().__init__(*args, **kwargs)

         self.indexerindexer = IndexerRegistry[self.config.dataset_config.indexer.name](

             self.config.dataset_config.indexer.active)

         self.makeSubtask('file_reader')

         self.IngestManagerIngestManager = ingestIndexManager.IngestIndexManager


     def createIndexedCatalog(self, inputFiles):

         """Index a set of files comprising a reference catalog.


         Outputs are persisted in the butler repository.


         Parameters

         ----------

         inputFiles : `list`

             A list of file paths to read.

         """

         schema, key_map = self._saveMasterSchema_saveMasterSchema(inputFiles[0])

         # create an HTM we can interrogate about pixel ids

         htm = lsst.sphgeom.HtmPixelization(self.indexerindexer.htm.get_depth())

         filenames = self._getButlerFilenames_getButlerFilenames(htm)

         worker = self.IngestManagerIngestManager(filenames,

                                     self.config,

                                     self.file_reader,

                                     self.indexerindexer,

                                     schema,

                                     key_map,

                                     htm.universe()[0],

                                     addRefCatMetadata,

                                     self.log)

         worker.run(inputFiles)


         # write the config that was used to generate the refcat

         dataId = self.indexerindexer.makeDataId(None, self.config.dataset_config.ref_dataset_name)

         self.butlerbutler.put(self.config.dataset_config, 'ref_cat_config', dataId=dataId)


     def _saveMasterSchema(self, filename):

         """Generate and save the master catalog schema.


         Parameters

         ----------

         filename : `str`

             An input file to read to get the input dtype.

         """

         arr = self.file_reader.run(filename)

         schema, key_map = self.makeSchemamakeSchema(arr.dtype)

         dataId = self.indexerindexer.makeDataId('master_schema',

                                          self.config.dataset_config.ref_dataset_name)


         catalog = afwTable.SimpleCatalog(schema)

         addRefCatMetadata(catalog)

         self.butlerbutler.put(catalog, 'ref_cat', dataId=dataId)

         return schema, key_map


     def _getButlerFilenames(self, htm):

         """Get filenames from the butler for each output pixel."""

         filenames = {}

         start, end = htm.universe()[0]

         # path manipulation because butler.get() per pixel will take forever

         dataId = self.indexerindexer.makeDataId(start, self.config.dataset_config.ref_dataset_name)

         path = self.butlerbutler.get('ref_cat_filename', dataId=dataId)[0]

         base = os.path.join(os.path.dirname(path), "%d"+os.path.splitext(path)[1])

         for pixelId in range(start, end):

             filenames[pixelId] = base % pixelId


         return filenames


     def makeSchema(self, dtype):

         """Make the schema to use in constructing the persisted catalogs.


         Parameters

         ----------

         dtype : `numpy.dtype`

             Data type describing each entry in ``config.extra_col_names``

             for the catalogs being ingested.


         Returns

         -------

         schemaAndKeyMap : `tuple` of (`lsst.afw.table.Schema`, `dict`)

             A tuple containing two items:

             - The schema for the output source catalog.

             - A map of catalog keys to use in filling the record

         """

         # make a schema with the standard fields

         schema = LoadReferenceObjectsTask.makeMinimalSchema(

             filterNameList=self.config.mag_column_list,

             addCentroid=False,

             addIsPhotometric=bool(self.config.is_photometric_name),

             addIsResolved=bool(self.config.is_resolved_name),

             addIsVariable=bool(self.config.is_variable_name),

             coordErrDim=2 if bool(self.config.ra_err_name) else 0,

             addProperMotion=2 if bool(self.config.pm_ra_name) else 0,

             properMotionErrDim=2 if bool(self.config.pm_ra_err_name) else 0,

             addParallax=bool(self.config.parallax_name),

         )

         keysToSkip = set(("id", "centroid_x", "centroid_y", "hasCentroid"))

         key_map = {fieldName: schema[fieldName].asKey() for fieldName in schema.getOrderedNames()

                    if fieldName not in keysToSkip}


         def addField(name):

             if dtype[name].kind == 'U':

                 # dealing with a string like thing.  Need to get type and size.

                 at_size = dtype[name].itemsize

                 return schema.addField(name, type=str, size=at_size)

             else:

                 at_type = dtype[name].type

                 return schema.addField(name, at_type)


         for col in self.config.extra_col_names:

             key_map[col] = addField(col)

         return schema, key_map


 class IngestGaiaReferenceTask(IngestIndexedReferenceTask):

     """A special-cased version of the refcat ingester for Gaia DR2.

     """

     def __init__(self, *args, **kwargs):

         super().__init__(*args, **kwargs)

         self.IngestManagerIngestManagerIngestManager = ingestIndexManager.IngestGaiaManager

lsst::afw::table::SortedCatalogT
Custom catalog class for record/table subclasses that are guaranteed to have an ID,...
Definition: SortedCatalog.h:42

lsst::daf::base::PropertyList
Class for storing ordered metadata with comments.
Definition: PropertyList.h:68

lsst::meas::algorithms.ingestIndexManager.IngestGaiaManager
Definition: ingestIndexManager.py:405

lsst::meas::algorithms.ingestIndexManager.IngestIndexManager
Definition: ingestIndexManager.py:45

lsst::meas::algorithms.ingestIndexReferenceTask.DatasetConfig
Definition: ingestIndexReferenceTask.py:93

lsst::meas::algorithms.ingestIndexReferenceTask.IngestGaiaReferenceTask
Definition: ingestIndexReferenceTask.py:438

lsst::meas::algorithms.ingestIndexReferenceTask.IngestGaiaReferenceTask.__init__
def __init__(self, *args, **kwargs)
Definition: ingestIndexReferenceTask.py:441

lsst::meas::algorithms.ingestIndexReferenceTask.IngestGaiaReferenceTask.IngestManager
IngestManager
Definition: ingestIndexReferenceTask.py:443

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig
Definition: ingestIndexReferenceTask.py:115

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig.mag_column_list
mag_column_list
Definition: ingestIndexReferenceTask.py:152

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig.parallax_name
parallax_name
Definition: ingestIndexReferenceTask.py:208

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig.setDefaults
def setDefaults(self)
Definition: ingestIndexReferenceTask.py:244

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig.epoch_name
epoch_name
Definition: ingestIndexReferenceTask.py:223

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig.validate
def validate(self)
Definition: ingestIndexReferenceTask.py:248

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig.pm_ra_err_name
pm_ra_err_name
Definition: ingestIndexReferenceTask.py:193

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig.ra_name
ra_name
Definition: ingestIndexReferenceTask.py:129

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig.dataset_config
dataset_config
Definition: ingestIndexReferenceTask.py:116

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig.mag_err_column_map
mag_err_column_map
Definition: ingestIndexReferenceTask.py:157

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig.pm_ra_name
pm_ra_name
Definition: ingestIndexReferenceTask.py:183

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig.coord_err_unit
coord_err_unit
Definition: ingestIndexReferenceTask.py:147

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig.dec_name
dec_name
Definition: ingestIndexReferenceTask.py:133

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceTask
Definition: ingestIndexReferenceTask.py:287

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceTask.__init__
def __init__(self, *args, butler=None, **kwargs)
Definition: ingestIndexReferenceTask.py:324

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceTask._saveMasterSchema
def _saveMasterSchema(self, filename)
Definition: ingestIndexReferenceTask.py:361

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceTask.IngestManager
IngestManager
Definition: ingestIndexReferenceTask.py:330

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceTask.makeSchema
def makeSchema(self, dtype)
Definition: ingestIndexReferenceTask.py:392

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceTask._getButlerFilenames
def _getButlerFilenames(self, htm)
Definition: ingestIndexReferenceTask.py:379

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceTask.indexer
indexer
Definition: ingestIndexReferenceTask.py:327

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceTask._DefaultName
string _DefaultName
Definition: ingestIndexReferenceTask.py:312

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceTask.createIndexedCatalog
def createIndexedCatalog(self, inputFiles)
Definition: ingestIndexReferenceTask.py:332

lsst::meas::algorithms.ingestIndexReferenceTask.IngestIndexedReferenceTask.butler
butler
Definition: ingestIndexReferenceTask.py:325

lsst::meas::algorithms.ingestIndexReferenceTask.IngestReferenceRunner
Definition: ingestIndexReferenceTask.py:62

lsst::meas::algorithms.ingestIndexReferenceTask.IngestReferenceRunner.run
def run(self, parsedCmd)
Definition: ingestIndexReferenceTask.py:68

lsst::sphgeom::HtmPixelization
HtmPixelization provides HTM indexing of points and regions.
Definition: HtmPixelization.h:50

set
daf::base::PropertySet * set
Definition: fits.cc:912

astshim.keyMap.keyMapContinued.keys
def keys(self)
Definition: keyMapContinued.py:6

lsst::afw::table
Definition: table.dox:3

lsst::daf::base
Definition: Utils.h:47

lsst::geom
Definition: AffineTransform.h:36

lsst::meas::algorithms.ingestIndexReferenceTask.addRefCatMetadata
def addRefCatMetadata(catalog)
Definition: ingestIndexReferenceTask.py:46

lsst.pex.config.history.format
def format(config, name=None, writeSourceLine=True, prefix="", verbose=False)
Definition: history.py:174

lsst.pex.config
Definition: __init__.py:1

lsst.pipe.base
Definition: __init__.py:1

lsst.pipe.tasks.assembleCoadd.run
def run(self, skyInfo, tempExpRefList, imageScalerList, weightList, altMaskList=None, mask=None, supplementaryData=None)
Definition: assembleCoadd.py:738

lsst::sphgeom
Definition: Angle.h:38