LSST Applications g0f08755f38+82efc23009,g12f32b3c4e+e7bdf1200e,g1653933729+a8ce1bb630,g1a0ca8cf93+50eff2b06f,g28da252d5a+52db39f6a5,g2bbee38e9b+37c5a29d61,g2bc492864f+37c5a29d61,g2cdde0e794+c05ff076ad,g3156d2b45e+41e33cbcdc,g347aa1857d+37c5a29d61,g35bb328faa+a8ce1bb630,g3a166c0a6a+37c5a29d61,g3e281a1b8c+fb992f5633,g414038480c+7f03dfc1b0,g41af890bb2+11b950c980,g5fbc88fb19+17cd334064,g6b1c1869cb+12dd639c9a,g781aacb6e4+a8ce1bb630,g80478fca09+72e9651da0,g82479be7b0+04c31367b4,g858d7b2824+82efc23009,g9125e01d80+a8ce1bb630,g9726552aa6+8047e3811d,ga5288a1d22+e532dc0a0b,gae0086650b+a8ce1bb630,gb58c049af0+d64f4d3760,gc28159a63d+37c5a29d61,gcf0d15dbbd+2acd6d4d48,gd7358e8bfb+778a810b6e,gda3e153d99+82efc23009,gda6a2b7d83+2acd6d4d48,gdaeeff99f8+1711a396fd,ge2409df99d+6b12de1076,ge79ae78c31+37c5a29d61,gf0baf85859+d0a5978c5a,gf3967379c6+4954f8c433,gfb92a5be7c+82efc23009,gfec2e1e490+2aaed99252,w.2024.46
LSST Data Management Base Package
Loading...
Searching...
No Matches
convertReferenceCatalog.py
Go to the documentation of this file.
1# This file is part of meas_algorithms.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22"""
23Convert an external reference catalog into the hierarchical triangular mesh
24(HTM) sharded LSST-style format, to be ingested into the butler.
25"""
26
27__all__ = ["ConvertReferenceCatalogTask", "ConvertReferenceCatalogConfig", "DatasetConfig"]
28
29import argparse
30import glob
31import numpy
32import os
33import pathlib
34import logging
35import itertools
36
37import astropy
38
39import lsst.afw.table
40import lsst.pipe.base
41import lsst.pex.config as pexConfig
42from lsst.daf.base import PropertyList
43
44from .indexerRegistry import IndexerRegistry
45from .readTextCatalogTask import ReadTextCatalogTask
46from . import convertRefcatManager
47
48# The most recent Indexed Reference Catalog on-disk format version.
49# See DatasetConfig.format_version for details of version numbers.
50LATEST_FORMAT_VERSION = 2
51
52
53def addRefCatMetadata(catalog):
54 """Add metadata to a new (not yet populated) reference catalog.
55
56 Parameters
57 ----------
58 catalog : `lsst.afw.table.SimpleCatalog`
59 Catalog to which metadata should be attached. Will be modified
60 in-place.
61 """
62 md = catalog.getMetadata()
63 if md is None:
64 md = PropertyList()
65 md.set("REFCAT_FORMAT_VERSION", LATEST_FORMAT_VERSION)
66 catalog.setMetadata(md)
67
68
69def _makeSchema(filterNameList, *, addCentroid=False,
70 addIsPhotometric=False, addIsResolved=False,
71 addIsVariable=False, fullPositionInformation=False):
72 """Make a standard schema for reference object catalogs.
73
74 Parameters
75 ----------
76 filterNameList : `list` of `str`
77 List of filter names. Used to create <filterName>_flux fields.
78 addCentroid : `bool`
79 If True then add fields "centroid" and "hasCentroid".
80 addIsPhotometric : `bool`
81 If True then add field "photometric".
82 addIsResolved : `bool`
83 If True then add field "resolved".
84 addIsVariable : `bool`
85 If True then add field "variable".
86 fullPositionInformation : `bool`
87 If True then add epoch, proper motion, and parallax, along with the
88 full five-dimensional covariance between ra and dec coordinates,
89 proper motion in ra and dec, and parallax.
90
91 Returns
92 -------
93 schema : `lsst.afw.table.Schema`
94 Schema for reference catalog, an
95 `lsst.afw.table.SimpleCatalog`.
96 """
98 if addCentroid:
99 lsst.afw.table.Point2DKey.addFields(
100 schema,
101 "centroid",
102 "centroid on an exposure, if relevant",
103 "pixel",
104 )
105 schema.addField(
106 field="hasCentroid",
107 type="Flag",
108 doc="is position known?",
109 )
110 for filterName in filterNameList:
111 schema.addField(
112 field="%s_flux" % (filterName,),
113 type=numpy.float64,
114 doc="flux in filter %s" % (filterName,),
115 units="nJy",
116 )
117 for filterName in filterNameList:
118 schema.addField(
119 field="%s_fluxErr" % (filterName,),
120 type=numpy.float64,
121 doc="flux uncertainty in filter %s" % (filterName,),
122 units="nJy",
123 )
124 if addIsPhotometric:
125 schema.addField(
126 field="photometric",
127 type="Flag",
128 doc="set if the object can be used for photometric calibration",
129 )
130 if addIsResolved:
131 schema.addField(
132 field="resolved",
133 type="Flag",
134 doc="set if the object is spatially resolved",
135 )
136 if addIsVariable:
137 schema.addField(
138 field="variable",
139 type="Flag",
140 doc="set if the object has variable brightness",
141 )
142 lsst.afw.table.CovarianceMatrix2fKey.addFields(
143 schema=schema,
144 prefix="coord",
145 names=["ra", "dec"],
146 units=["rad", "rad"],
147 diagonalOnly=True,
148 )
149
150 if fullPositionInformation:
151 schema.addField(
152 field="epoch",
153 type=numpy.float64,
154 doc="date of observation (TAI, MJD)",
155 units="day",
156 )
157 schema.addField(
158 field="pm_ra",
159 type="Angle",
160 doc="proper motion in the right ascension direction = dra/dt * cos(dec)",
161 units="rad/year",
162 )
163 schema.addField(
164 field="pm_dec",
165 type="Angle",
166 doc="proper motion in the declination direction",
167 units="rad/year",
168 )
169 lsst.afw.table.CovarianceMatrix2fKey.addFields(
170 schema=schema,
171 prefix="pm",
172 names=["ra", "dec"],
173 units=["rad/year", "rad/year"],
174 diagonalOnly=True,
175 )
176 schema.addField(
177 field="pm_flag",
178 type="Flag",
179 doc="Set if proper motion or proper motion error is bad",
180 )
181 schema.addField(
182 field="parallax",
183 type="Angle",
184 doc="parallax",
185 units="rad",
186 )
187 schema.addField(
188 field="parallaxErr",
189 type="Angle",
190 doc="uncertainty in parallax",
191 units="rad",
192 )
193 schema.addField(
194 field="parallax_flag",
195 type="Flag",
196 doc="Set if parallax or parallax error is bad",
197 )
198 # Add all the off-diagonal covariance terms
199 fields = ["coord_ra", "coord_dec", "pm_ra", "pm_dec", "parallax"]
200 units = ["rad", "rad", "rad/year", "rad/year", "rad"]
201 for field, unit in zip(itertools.combinations(fields, r=2), itertools.combinations(units, r=2)):
202 i_field = field[0]
203 i_unit = unit[0]
204 j_field = field[1]
205 j_unit = unit[1]
206 formatted_unit = "rad^2"
207 if ("year" in i_unit) and ("year" in j_unit):
208 formatted_unit += "/year^2"
209 elif ("year" in i_unit) or ("year" in j_unit):
210 formatted_unit += "/year"
211 schema.addField(
212 field=f"{i_field}_{j_field}_Cov",
213 type="F",
214 doc=f"Covariance between {i_field} and {j_field}",
215 units=formatted_unit
216 )
217 return schema
218
219
220def _addExtraColumnsToSchema(schema, dtype, extra_col_names, key_map):
221 """Add extra columns to a schema from a numpy dtype.
222
223 Note that schema and key_map will be modified in place.
224
225 Parameters
226 ----------
227 schema : `lsst.afw.table.Schema`
228 Schema to append extra columns.
229 dtype : `numpy.dtype`
230 Numpy record array dtype.
231 extra_col_names : `list` [`str`]
232 Extra column names to convert from dtype into schema.
233 key_map : `dict` [`str`, `lsst.afw.table.Key`]
234 Mapping from column name to table key.
235 """
236 def addField(name):
237 if dtype[name].kind == 'U':
238 # dealing with a string like thing. Need to get type and size.
239 at_size = dtype[name].itemsize
240 return schema.addField(name, type=str, size=at_size)
241 elif dtype[name].kind == 'b':
242 # Dealing with a boolean, which needs to be a flag.
243 return schema.addField(name, type="Flag")
244 else:
245 at_type = dtype[name].type
246 return schema.addField(name, at_type)
247
248 for col in extra_col_names:
249 key_map[col] = addField(col)
250
251
252class DatasetConfig(pexConfig.Config):
253 """Description of the on-disk storage format for the converted reference
254 catalog.
255 """
256 format_version = pexConfig.Field(
257 dtype=int,
258 doc="Version number of the persisted on-disk storage format."
259 "\nVersion 0 had Jy as flux units (default 0 for unversioned catalogs)."
260 "\nVersion 1 had nJy as flux units."
261 "\nVersion 2 had position-related covariances.",
262 default=0 # This needs to always be 0, so that unversioned catalogs are interpreted as version 0.
263 )
264 ref_dataset_name = pexConfig.Field(
265 dtype=str,
266 doc="Name of this reference catalog; this should match the name used during butler ingest.",
267 )
268 indexer = IndexerRegistry.makeField(
269 default='HTM',
270 doc='Name of indexer algoritm to use. Default is HTM',
271 )
272
273
274class ConvertReferenceCatalogConfig(pexConfig.Config):
275 dataset_config = pexConfig.ConfigField(
276 dtype=DatasetConfig,
277 doc="Configuration for reading the ingested data",
278 )
279 n_processes = pexConfig.Field(
280 dtype=int,
281 doc=("Number of python processes to use when ingesting."),
282 default=1
283 )
284 manager = pexConfig.ConfigurableField(
286 doc="Multiprocessing manager to perform the actual conversion of values, file-by-file."
287 )
288 file_reader = pexConfig.ConfigurableField(
289 target=ReadTextCatalogTask,
290 doc='Task to use to read the files. Default is to expect text files.'
291 )
292 ra_name = pexConfig.Field(
293 dtype=str,
294 doc="Name of RA column (values in decimal degrees)",
295 )
296 dec_name = pexConfig.Field(
297 dtype=str,
298 doc="Name of Dec column (values in decimal degrees)",
299 )
300 ra_err_name = pexConfig.Field(
301 dtype=str,
302 doc="Name of RA error column",
303 optional=True,
304 )
305 dec_err_name = pexConfig.Field(
306 dtype=str,
307 doc="Name of Dec error column",
308 optional=True,
309 )
310 coord_err_unit = pexConfig.Field(
311 dtype=str,
312 doc="Unit of RA/Dec error fields (astropy.unit.Unit compatible)",
313 optional=True
314 )
315 mag_column_list = pexConfig.ListField(
316 dtype=str,
317 doc="The values in the reference catalog are assumed to be in AB magnitudes. "
318 "List of column names to use for photometric information. At least one entry is required."
319 )
320 mag_err_column_map = pexConfig.DictField(
321 keytype=str,
322 itemtype=str,
323 default={},
324 doc="A map of magnitude column name (key) to magnitude error column (value)."
325 )
326 is_photometric_name = pexConfig.Field(
327 dtype=str,
328 optional=True,
329 doc='Name of column stating if satisfactory for photometric calibration (optional).'
330 )
331 is_resolved_name = pexConfig.Field(
332 dtype=str,
333 optional=True,
334 doc='Name of column stating if the object is resolved (optional).'
335 )
336 is_variable_name = pexConfig.Field(
337 dtype=str,
338 optional=True,
339 doc='Name of column stating if the object is measured to be variable (optional).'
340 )
341 id_name = pexConfig.Field(
342 dtype=str,
343 optional=True,
344 doc='Name of column to use as an identifier (optional).'
345 )
346 pm_ra_name = pexConfig.Field(
347 dtype=str,
348 doc="Name of proper motion RA column",
349 optional=True,
350 )
351 pm_dec_name = pexConfig.Field(
352 dtype=str,
353 doc="Name of proper motion Dec column",
354 optional=True,
355 )
356 pm_ra_err_name = pexConfig.Field(
357 dtype=str,
358 doc="Name of proper motion RA error column",
359 optional=True,
360 )
361 pm_dec_err_name = pexConfig.Field(
362 dtype=str,
363 doc="Name of proper motion Dec error column",
364 optional=True,
365 )
366 pm_scale = pexConfig.Field(
367 dtype=float,
368 doc="Scale factor by which to multiply proper motion values to obtain units of milliarcsec/year",
369 default=1.0,
370 )
371 parallax_name = pexConfig.Field(
372 dtype=str,
373 doc="Name of parallax column",
374 optional=True,
375 )
376 parallax_err_name = pexConfig.Field(
377 dtype=str,
378 doc="Name of parallax error column",
379 optional=True,
380 )
381 parallax_scale = pexConfig.Field(
382 dtype=float,
383 doc="Scale factor by which to multiply parallax values to obtain units of milliarcsec",
384 default=1.0,
385 )
386 full_position_information = pexConfig.Field(
387 dtype=bool,
388 doc="Include epoch, proper motions, parallax, and covariances between sky coordinates, proper motion,"
389 " and parallax in the schema. If true, a custom ``ConvertRefcatManager`` class must exist to"
390 " compute the output covariances.",
391 default=False
392 )
393 epoch_name = pexConfig.Field(
394 dtype=str,
395 doc="Name of epoch column",
396 optional=True,
397 )
398 epoch_format = pexConfig.Field(
399 dtype=str,
400 doc="Format of epoch column: any value accepted by astropy.time.Time, e.g. 'iso' or 'unix'",
401 optional=True,
402 )
403 epoch_scale = pexConfig.Field(
404 dtype=str,
405 doc="Scale of epoch column: any value accepted by astropy.time.Time, e.g. 'utc'",
406 optional=True,
407 )
408 extra_col_names = pexConfig.ListField(
409 dtype=str,
410 default=[],
411 doc='Extra columns to add to the reference catalog.'
412 )
413
414 def setDefaults(self):
415 # Newly ingested reference catalogs always have the latest format_version.
416 self.dataset_config.format_version = LATEST_FORMAT_VERSION
417 # gen3 refcats are all depth=7
418 self.dataset_config.indexer['HTM'].depth = 7
419
420 def validate(self):
421 super().validate()
422
423 def assertAllOrNone(*names):
424 """Raise ValueError unless all the named fields are set or are
425 all none (or blank).
426 """
427 setNames = [name for name in names if bool(getattr(self, name))]
428 if len(setNames) in (len(names), 0):
429 return
430 prefix = "Both or neither" if len(names) == 2 else "All or none"
431 raise ValueError("{} of {} must be set, but only {} are set".format(
432 prefix, ", ".join(names), ", ".join(setNames)))
433
434 if not (self.ra_name and self.dec_name and self.mag_column_list):
435 raise ValueError(
436 "ra_name and dec_name and at least one entry in mag_column_list must be supplied.")
437 if self.mag_err_column_map and set(self.mag_column_list) != set(self.mag_err_column_map.keys()):
438 raise ValueError(
439 "mag_err_column_map specified, but keys do not match mag_column_list: {} != {}".format(
440 sorted(self.mag_err_column_map.keys()), sorted(self.mag_column_list)))
441 assertAllOrNone("ra_err_name", "dec_err_name", "coord_err_unit")
442 if self.coord_err_unit is not None:
443 result = astropy.units.Unit(self.coord_err_unit, parse_strict='silent')
444 if isinstance(result, astropy.units.UnrecognizedUnit):
445 msg = f"{self.coord_err_unit} is not a valid astropy unit string."
446 raise pexConfig.FieldValidationError(ConvertReferenceCatalogConfig.coord_err_unit, self, msg)
447
448 assertAllOrNone("epoch_name", "epoch_format", "epoch_scale")
449 assertAllOrNone("pm_ra_name", "pm_dec_name")
450 assertAllOrNone("pm_ra_err_name", "pm_dec_err_name")
451 assertAllOrNone("parallax_name", "parallax_err_name")
452 if self.pm_ra_err_name and not self.pm_ra_name:
453 raise ValueError('"pm_ra/dec_name" must be specified if "pm_ra/dec_err_name" are specified')
454 if (self.pm_ra_name or self.parallax_name) and not self.epoch_name:
455 raise ValueError(
456 '"epoch_name" must be specified if "pm_ra/dec_name" or "parallax_name" are specified')
457
458 # Need all the error field names set if we are including covariances.
460 # Since full_position_information is True, this will only pass for
461 # the "All" case.
462 assertAllOrNone("full_position_information",
463 "ra_err_name", "dec_err_name", "coord_err_unit",
464 "epoch_name", "epoch_format", "epoch_scale",
465 "pm_ra_name", "pm_dec_name",
466 "pm_ra_err_name", "pm_dec_err_name",
467 "parallax_name", "parallax_err_name"
468 )
469
470
471class ConvertReferenceCatalogTask(lsst.pipe.base.Task):
472 """Class for producing HTM-indexed reference catalogs from external
473 catalog data.
474
475 This implements an indexing scheme based on hierarchical triangular
476 mesh (HTM). The term index really means breaking the catalog into
477 localized chunks called shards. In this case each shard contains
478 the entries from the catalog in a single HTM trixel
479
480 For producing catalogs this task makes the following assumptions
481 about the input catalogs:
482
483 - RA, Dec are in decimal degrees.
484 - Epoch is available in a column, in a format supported by astropy.time.Time.
485 - There are either no off-diagonal covariance terms, or there are all the
486 five-dimensional covariance terms (between RA, Dec, proper motion, and
487 parallax). In the latter case, a custom ``ConvertRefcatManager`` must
488 exist to handle the covariance terms.
489
490 Parameters
491 ----------
492 output_dir : `str`
493 The path to write the output files to, in a subdirectory defined by
494 ``DatasetConfig.ref_dataset_name``.
495 """
496 canMultiprocess = False
497 ConfigClass = ConvertReferenceCatalogConfig
498 _DefaultName = 'ConvertReferenceCatalogTask'
499
500 def __init__(self, *, output_dir=None, **kwargs):
501 super().__init__(**kwargs)
502 if output_dir is None:
503 raise RuntimeError("Must specify output_dir.")
504 self.base_dir = output_dir
505 self.output_dir = os.path.join(output_dir, self.config.dataset_config.ref_dataset_name)
506 self.ingest_table_file = os.path.join(self.base_dir, "filename_to_htm.ecsv")
507 self.indexer = IndexerRegistry[self.config.dataset_config.indexer.name](
508 self.config.dataset_config.indexer.active)
509 self.makeSubtask('file_reader')
510
511 def run(self, inputFiles):
512 """Index a set of files comprising a reference catalog.
513
514 Outputs are persisted in the butler repository.
515
516 Parameters
517 ----------
518 inputFiles : `list`
519 A list of file paths to read.
520 """
521 # Create the output path, if it doesn't exist; fail if the path exists:
522 # we don't want to accidentally append to existing files.
523 pathlib.Path(self.output_dir).mkdir(exist_ok=False)
524
525 schema, key_map = self._writeMasterSchema(inputFiles[0])
526 # create an HTM we can interrogate about pixel ids
527 htm = lsst.sphgeom.HtmPixelization(self.indexer.htm.get_depth())
528 filenames = self._getOutputFilenames(htm)
529 worker = self.config.manager.target(filenames,
530 self.config,
532 self.indexer,
533 schema,
534 key_map,
535 htm.universe()[0],
536 addRefCatMetadata,
537 self.log)
538 result = worker.run(inputFiles)
539
540 self._writeConfig()
541 self._writeIngestHelperFile(result)
542
543 def _writeIngestHelperFile(self, result):
544 """Write the astropy table containing the htm->filename relationship,
545 used for the ``butler ingest-files`` command after this task completes.
546 """
547 dimension = f"htm{self.config.dataset_config.indexer.active.depth}"
548 table = astropy.table.Table(names=("filename", dimension), dtype=('str', 'int'))
549 for key in result:
550 table.add_row((result[key], key))
551 table.write(self.ingest_table_file)
552
553 def _writeConfig(self):
554 """Write the config that was used to generate the refcat."""
555 filename = os.path.join(self.output_dir, "config.py")
556 with open(filename, 'w') as file:
557 self.config.dataset_config.saveToStream(file)
558
559 def _getOutputFilenames(self, htm):
560 """Get filenames from the butler for each output htm pixel.
561
562 Parameters
563 ----------
564 htm : `lsst.sphgeom.HtmPixelization`
565 The HTM pixelization scheme to be used to build filenames.
566
567 Returns
568 -------
569 filenames : `list [str]`
570 List of filenames to write each HTM pixel to.
571 """
572 filenames = {}
573 start, end = htm.universe()[0]
574 path = os.path.join(self.output_dir, f"{self.indexer.htm}.fits")
575 base = os.path.join(os.path.dirname(path), "%d"+os.path.splitext(path)[1])
576 for pixelId in range(start, end):
577 filenames[pixelId] = base % pixelId
578
579 return filenames
580
581 def makeSchema(self, dtype):
582 """Make the schema to use in constructing the persisted catalogs.
583
584 Parameters
585 ----------
586 dtype : `numpy.dtype`
587 Data type describing each entry in ``config.extra_col_names``
588 for the catalogs being ingested.
589
590 Returns
591 -------
592 schemaAndKeyMap : `tuple` of (`lsst.afw.table.Schema`, `dict`)
593 A tuple containing two items:
594 - The schema for the output source catalog.
595 - A map of catalog keys to use in filling the record
596 """
597 # make a schema with the standard fields
598 schema = _makeSchema(
599 filterNameList=self.config.mag_column_list,
600 addCentroid=False,
601 addIsPhotometric=bool(self.config.is_photometric_name),
602 addIsResolved=bool(self.config.is_resolved_name),
603 addIsVariable=bool(self.config.is_variable_name),
604 fullPositionInformation=self.config.full_position_information,
605 )
606 keysToSkip = set(("id", "centroid_x", "centroid_y", "hasCentroid"))
607 key_map = {fieldName: schema[fieldName].asKey() for fieldName in schema.getOrderedNames()
608 if fieldName not in keysToSkip}
609
610 _addExtraColumnsToSchema(schema, dtype, self.config.extra_col_names, key_map)
611
612 return schema, key_map
613
614 def _writeMasterSchema(self, inputfile):
615 """Generate and save the master catalog schema.
616
617 Parameters
618 ----------
619 inputfile : `str`
620 An input file to read to get the input dtype.
621 """
622 arr = self.file_reader.run(inputfile)
623 schema, key_map = self.makeSchema(arr.dtype)
624
625 catalog = lsst.afw.table.SimpleCatalog(schema)
626 addRefCatMetadata(catalog)
627 outputfile = os.path.join(self.output_dir, "master_schema.fits")
628 catalog.writeFits(outputfile)
629 return schema, key_map
630
631 def _reduce_kwargs(self):
632 # Need to be able to pickle this class to use the multiprocess manager.
633 kwargs = super()._reduce_kwargs()
634 kwargs['output_dir'] = self.base_dir
635 return kwargs
636
637
639 """Construct an argument parser for the ``convertReferenceCatalog`` script.
640
641 Returns
642 -------
643 argparser : `argparse.ArgumentParser`
644 The argument parser that defines the ``convertReferenceCatalog``
645 command-line interface.
646 """
647 parser = argparse.ArgumentParser(
648 description=__doc__,
649 formatter_class=argparse.RawDescriptionHelpFormatter,
650 epilog='More information is available at https://pipelines.lsst.io.'
651 )
652 parser.add_argument("outputDir",
653 help="Path to write the output shard files, configs, and `ingest-files` table to.")
654 parser.add_argument("configFile",
655 help="File containing the ConvertReferenceCatalogConfig fields.")
656 # Use a "+"-list here, so we can produce a more useful error if the user
657 # uses an unquoted glob that gets shell expanded.
658 parser.add_argument("fileglob", nargs="+",
659 help="Quoted glob for the files to be read in and converted."
660 " Example (note required quotes to prevent shell expansion):"
661 ' "gaia_source/csv/GaiaSource*"')
662 return parser
663
664
665def run_convert(outputDir, configFile, fileglob):
666 """Run `ConvertReferenceCatalogTask` on the input arguments.
667
668 Parameters
669 ----------
670 outputDir : `str`
671 Path to write the output files to.
672 configFile : `str`
673 File specifying the ``ConvertReferenceCatalogConfig`` fields.
674 fileglob : `str`
675 Quoted glob for the files to be read in and converted.
676 """
677 # We have to initialize the logger manually when running from the commandline.
678 logging.basicConfig(level=logging.INFO, format="{name} {levelname}: {message}", style="{")
679
680 config = ConvertReferenceCatalogTask.ConfigClass()
681 config.load(configFile)
682 converter = ConvertReferenceCatalogTask(output_dir=outputDir, config=config)
683 files = glob.glob(fileglob)
684 converter.run(files)
685 with open(os.path.join(outputDir, "convertReferenceCatalogConfig.py"), "w") as outfile:
686 converter.config.saveToStream(outfile)
687 msg = ("Completed refcat conversion.\n\n"
688 "Ingest the resulting files with the following commands, substituting the path\n"
689 "to your butler repo for `REPO`, and the ticket number you are tracking this\n"
690 "ingest on for `DM-NNNNN`:\n"
691 f"\n butler register-dataset-type REPO {config.dataset_config.ref_dataset_name} "
692 "SimpleCatalog htm7"
693 "\n butler ingest-files -t direct REPO gaia_dr2 refcats/DM-NNNNN "
694 f"{converter.ingest_table_file}"
695 "\n butler collection-chain REPO --mode extend refcats refcats/DM-NNNNN")
696 print(msg)
697
698
699def main():
700 args = build_argparser().parse_args()
701 if len(args.fileglob) > 1:
702 raise RuntimeError("Final argument must be a quoted file glob, not a shell-expanded list of files.")
703 # Fileglob comes out as a length=1 list, so we can test it above.
704 run_convert(args.outputDir, args.configFile, args.fileglob[0])
Tag types used to declare specialized field types.
Definition misc.h:31
static Schema makeMinimalSchema()
Return a minimal schema for Simple tables and records.
Definition Simple.h:140
Custom catalog class for record/table subclasses that are guaranteed to have an ID,...
Class for storing ordered metadata with comments.
HtmPixelization provides HTM indexing of points and regions.
_makeSchema(filterNameList, *addCentroid=False, addIsPhotometric=False, addIsResolved=False, addIsVariable=False, fullPositionInformation=False)
_addExtraColumnsToSchema(schema, dtype, extra_col_names, key_map)