24 __all__ = [
"IngestIndexedReferenceConfig",
"IngestIndexedReferenceTask",
"DatasetConfig",
25 "IngestGaiaReferenceTask"]
29 import lsst.pex.config
as pexConfig
35 from .indexerRegistry
import IndexerRegistry
36 from .readTextCatalogTask
import ReadTextCatalogTask
37 from .loadReferenceObjects
import LoadReferenceObjectsTask
38 from .
import ingestIndexManager
41 LATEST_FORMAT_VERSION = 1
45 """Add metadata to a new (not yet populated) reference catalog. 49 catalog : `lsst.afw.table.SimpleCatalog` 50 Catalog to which metadata should be attached. Will be modified 53 md = catalog.getMetadata()
56 md.set(
"REFCAT_FORMAT_VERSION", LATEST_FORMAT_VERSION)
57 catalog.setMetadata(md)
61 """Task runner for the reference catalog ingester 63 Data IDs are ignored so the runner should just run the task on the parsed command. 66 def run(self, parsedCmd):
69 Several arguments need to be collected to send on to the task methods. 73 parsedCmd : `argparse.Namespace` 78 results : `lsst.pipe.base.Struct` or `None` 79 A empty struct if self.doReturnResults, else None 81 files = parsedCmd.files
82 butler = parsedCmd.butler
83 task = self.TaskClass(config=self.config, log=self.log, butler=butler)
84 task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
86 task.createIndexedCatalog(files)
87 if self.doReturnResults:
88 return pipeBase.Struct()
92 """The description of the on-disk storage format for the persisted 95 format_version = pexConfig.Field(
97 doc=
"Version number of the persisted on-disk storage format." 98 "\nVersion 0 had Jy as flux units (default 0 for unversioned catalogs)." 99 "\nVersion 1 had nJy as flux units.",
102 ref_dataset_name = pexConfig.Field(
104 default=
'cal_ref_cat',
105 doc=
'String to pass to the butler to retrieve persisted files.',
107 indexer = IndexerRegistry.makeField(
109 doc=
'Name of indexer algoritm to use. Default is HTM',
114 dataset_config = pexConfig.ConfigField(
116 doc=
"Configuration for reading the ingested data",
118 n_processes = pexConfig.Field(
120 doc=(
"Number of python processes to use when ingesting."),
123 file_reader = pexConfig.ConfigurableField(
124 target=ReadTextCatalogTask,
125 doc=
'Task to use to read the files. Default is to expect text files.' 127 ra_name = pexConfig.Field(
129 doc=
"Name of RA column",
131 dec_name = pexConfig.Field(
133 doc=
"Name of Dec column",
135 ra_err_name = pexConfig.Field(
137 doc=
"Name of RA error column",
140 dec_err_name = pexConfig.Field(
142 doc=
"Name of Dec error column",
145 mag_column_list = pexConfig.ListField(
147 doc=
"The values in the reference catalog are assumed to be in AB magnitudes. " 148 "List of column names to use for photometric information. At least one entry is required." 150 mag_err_column_map = pexConfig.DictField(
154 doc=
"A map of magnitude column name (key) to magnitude error column (value)." 156 is_photometric_name = pexConfig.Field(
159 doc=
'Name of column stating if satisfactory for photometric calibration (optional).' 161 is_resolved_name = pexConfig.Field(
164 doc=
'Name of column stating if the object is resolved (optional).' 166 is_variable_name = pexConfig.Field(
169 doc=
'Name of column stating if the object is measured to be variable (optional).' 171 id_name = pexConfig.Field(
174 doc=
'Name of column to use as an identifier (optional).' 176 pm_ra_name = pexConfig.Field(
178 doc=
"Name of proper motion RA column",
181 pm_dec_name = pexConfig.Field(
183 doc=
"Name of proper motion Dec column",
186 pm_ra_err_name = pexConfig.Field(
188 doc=
"Name of proper motion RA error column",
191 pm_dec_err_name = pexConfig.Field(
193 doc=
"Name of proper motion Dec error column",
196 pm_scale = pexConfig.Field(
198 doc=
"Scale factor by which to multiply proper motion values to obtain units of milliarcsec/year",
201 parallax_name = pexConfig.Field(
203 doc=
"Name of parallax column",
206 parallax_err_name = pexConfig.Field(
208 doc=
"Name of parallax error column",
211 parallax_scale = pexConfig.Field(
213 doc=
"Scale factor by which to multiply parallax values to obtain units of milliarcsec",
216 epoch_name = pexConfig.Field(
218 doc=
"Name of epoch column",
221 epoch_format = pexConfig.Field(
223 doc=
"Format of epoch column: any value accepted by astropy.time.Time, e.g. 'iso' or 'unix'",
226 epoch_scale = pexConfig.Field(
228 doc=
"Scale of epoch column: any value accepted by astropy.time.Time, e.g. 'utc'",
231 extra_col_names = pexConfig.ListField(
234 doc=
'Extra columns to add to the reference catalog.' 242 pexConfig.Config.validate(self)
244 def assertAllOrNone(*names):
245 """Raise ValueError unless all the named fields are set or are 248 setNames = [name
for name
in names
if bool(getattr(self, name))]
249 if len(setNames)
in (len(names), 0):
251 prefix =
"Both or neither" if len(names) == 2
else "All or none" 252 raise ValueError(
"{} of {} must be set, but only {} are set".
format(
253 prefix,
", ".join(names),
", ".join(setNames)))
257 "ra_name and dec_name and at least one entry in mag_column_list must be supplied.")
260 "mag_err_column_map specified, but keys do not match mag_column_list: {} != {}".
format(
262 assertAllOrNone(
"ra_err_name",
"dec_err_name")
263 assertAllOrNone(
"epoch_name",
"epoch_format",
"epoch_scale")
264 assertAllOrNone(
"pm_ra_name",
"pm_dec_name")
265 assertAllOrNone(
"pm_ra_err_name",
"pm_dec_err_name")
266 assertAllOrNone(
"parallax_name",
"parallax_err_name")
268 raise ValueError(
'"pm_ra/dec_name" must be specified if "pm_ra/dec_err_name" are specified')
271 '"epoch_name" must be specified if "pm_ra/dec_name" or "parallax_name" are specified')
275 """Class for producing and loading indexed reference catalogs. 277 This implements an indexing scheme based on hierarchical triangular 278 mesh (HTM). The term index really means breaking the catalog into 279 localized chunks called shards. In this case each shard contains 280 the entries from the catalog in a single HTM trixel 282 For producing catalogs this task makes the following assumptions 283 about the input catalogs: 284 - RA, Dec, RA error and Dec error are all in decimal degrees. 285 - Epoch is available in a column, in a format supported by astropy.time.Time. 286 - There are no off-diagonal covariance terms, such as covariance 287 between RA and Dec, or between PM RA and PM Dec. Gaia is a well 288 known example of a catalog that has such terms, and thus should not 289 be ingested with this task. 293 butler : `lsst.daf.persistence.Butler` 294 Data butler for reading and writing catalogs 296 canMultiprocess =
False 297 ConfigClass = IngestIndexedReferenceConfig
298 RunnerClass = IngestReferenceRunner
299 _DefaultName =
'IngestIndexedReferenceTask' 302 def _makeArgumentParser(cls):
303 """Create an argument parser. 305 This returns a standard parser with an extra "files" argument. 307 parser = pipeBase.InputOnlyArgumentParser(name=cls.
_DefaultName)
308 parser.add_argument(
"files", nargs=
"+", help=
"Names of files to index")
314 self.
indexer = IndexerRegistry[self.config.dataset_config.indexer.name](
315 self.config.dataset_config.indexer.active)
316 self.makeSubtask(
'file_reader')
320 """Index a set of files comprising a reference catalog. 322 Outputs are persisted in the butler repository. 327 A list of file paths to read. 342 worker.run(inputFiles)
345 dataId = self.
indexer.makeDataId(
None, self.config.dataset_config.ref_dataset_name)
346 self.
butler.put(self.config.dataset_config,
'ref_cat_config', dataId=dataId)
348 def _saveMasterSchema(self, filename):
349 """Generate and save the master catalog schema. 354 An input file to read to get the input dtype. 356 arr = self.file_reader.
run(filename)
358 dataId = self.
indexer.makeDataId(
'master_schema',
359 self.config.dataset_config.ref_dataset_name)
363 self.
butler.put(catalog,
'ref_cat', dataId=dataId)
364 return schema, key_map
366 def _getButlerFilenames(self, htm):
367 """Get filenames from the butler for each output pixel.""" 369 start, end = htm.universe()[0]
371 dataId = self.
indexer.makeDataId(start, self.config.dataset_config.ref_dataset_name)
372 path = self.
butler.get(
'ref_cat_filename', dataId=dataId)[0]
373 base = os.path.join(os.path.dirname(path),
"%d"+os.path.splitext(path)[1])
374 for pixelId
in range(start, end):
375 filenames[pixelId] = base % pixelId
380 """Make the schema to use in constructing the persisted catalogs. 384 dtype : `numpy.dtype` 385 Data type describing each entry in ``config.extra_col_names`` 386 for the catalogs being ingested. 390 schemaAndKeyMap : `tuple` of (`lsst.afw.table.Schema`, `dict`) 391 A tuple containing two items: 392 - The schema for the output source catalog. 393 - A map of catalog keys to use in filling the record 396 schema = LoadReferenceObjectsTask.makeMinimalSchema(
397 filterNameList=self.config.mag_column_list,
399 addIsPhotometric=bool(self.config.is_photometric_name),
400 addIsResolved=bool(self.config.is_resolved_name),
401 addIsVariable=bool(self.config.is_variable_name),
402 coordErrDim=2
if bool(self.config.ra_err_name)
else 0,
403 addProperMotion=2
if bool(self.config.pm_ra_name)
else 0,
404 properMotionErrDim=2
if bool(self.config.pm_ra_err_name)
else 0,
405 addParallax=bool(self.config.parallax_name),
407 keysToSkip =
set((
"id",
"centroid_x",
"centroid_y",
"hasCentroid"))
408 key_map = {fieldName: schema[fieldName].asKey()
for fieldName
in schema.getOrderedNames()
409 if fieldName
not in keysToSkip}
412 if dtype[name].kind ==
'U': 414 at_size = dtype[name].itemsize
415 return schema.addField(name, type=str, size=at_size)
417 at_type = dtype[name].type
418 return schema.addField(name, at_type)
420 for col
in self.config.extra_col_names:
421 key_map[col] = addField(col)
422 return schema, key_map
426 """A special-cased version of the refcat ingester for Gaia DR2. def __init__(self, args, butler=None, kwargs)
def format(config, name=None, writeSourceLine=True, prefix="", verbose=False)
Class for storing ordered metadata with comments.
def makeSchema(self, dtype)
def addRefCatMetadata(catalog)
daf::base::PropertySet * set
Custom catalog class for record/table subclasses that are guaranteed to have an ID, and should generally be sorted by that ID.
def run(self, skyInfo, tempExpRefList, imageScalerList, weightList, altMaskList=None, mask=None, supplementaryData=None)
def __init__(self, args, kwargs)
def createIndexedCatalog(self, inputFiles)
HtmPixelization provides HTM indexing of points and regions.
def _saveMasterSchema(self, filename)
def _getButlerFilenames(self, htm)