LSST Applications  21.0.0-172-gfb10e10a+18fedfabac,22.0.0+297cba6710,22.0.0+80564b0ff1,22.0.0+8d77f4f51a,22.0.0+a28f4c53b1,22.0.0+dcf3732eb2,22.0.1-1-g7d6de66+2a20fdde0d,22.0.1-1-g8e32f31+297cba6710,22.0.1-1-geca5380+7fa3b7d9b6,22.0.1-12-g44dc1dc+2a20fdde0d,22.0.1-15-g6a90155+515f58c32b,22.0.1-16-g9282f48+790f5f2caa,22.0.1-2-g92698f7+dcf3732eb2,22.0.1-2-ga9b0f51+7fa3b7d9b6,22.0.1-2-gd1925c9+bf4f0e694f,22.0.1-24-g1ad7a390+a9625a72a8,22.0.1-25-g5bf6245+3ad8ecd50b,22.0.1-25-gb120d7b+8b5510f75f,22.0.1-27-g97737f7+2a20fdde0d,22.0.1-32-gf62ce7b1+aa4237961e,22.0.1-4-g0b3f228+2a20fdde0d,22.0.1-4-g243d05b+871c1b8305,22.0.1-4-g3a563be+32dcf1063f,22.0.1-4-g44f2e3d+9e4ab0f4fa,22.0.1-42-gca6935d93+ba5e5ca3eb,22.0.1-5-g15c806e+85460ae5f3,22.0.1-5-g58711c4+611d128589,22.0.1-5-g75bb458+99c117b92f,22.0.1-6-g1c63a23+7fa3b7d9b6,22.0.1-6-g50866e6+84ff5a128b,22.0.1-6-g8d3140d+720564cf76,22.0.1-6-gd805d02+cc5644f571,22.0.1-8-ge5750ce+85460ae5f3,master-g6e05de7fdc+babf819c66,master-g99da0e417a+8d77f4f51a,w.2021.48
LSST Data Management Base Package
convertReferenceCatalog.py
Go to the documentation of this file.
1 # This file is part of meas_algorithms.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 """
23 Convert an external reference catalog into the hierarchical triangular mesh
24 (HTM) sharded LSST-style format, to be ingested into the butler.
25 """
26 
27 __all__ = ["ConvertReferenceCatalogTask"]
28 
29 import argparse
30 import glob
31 import os
32 import pathlib
33 import logging
34 
35 import astropy
36 
37 from . import ConvertReferenceCatalogBase
38 
39 
41  """Class for producing HTM-indexed reference catalogs from external
42  catalog data.
43 
44  Parameters
45  ----------
46  output_dir : `str`
47  The path to write the output files to, in a subdirectory defined by
48  ``DatasetConfig.ref_dataset_name``.
49  """
50  _DefaultName = 'ConvertReferenceCatalogTask'
51 
52  def __init__(self, *, output_dir=None, **kwargs):
53  super().__init__(**kwargs)
54  if output_dir is None:
55  raise RuntimeError("Must specify output_dir.")
56  self.base_dirbase_dir = output_dir
57  self.output_diroutput_dir = os.path.join(output_dir, self.config.dataset_config.ref_dataset_name)
58  self.ingest_table_fileingest_table_file = os.path.join(self.base_dirbase_dir, "filename_to_htm.ecsv")
59 
60  def _preRun(self):
61  # Create the output path, if it doesn't exist; fail if the path exists:
62  # we don't want to accidentally append to existing files.
63  pathlib.Path(self.output_diroutput_dir).mkdir(exist_ok=False)
64 
65  def _postRun(self, result):
66  # Write the astropy table containing the htm->filename relationship
67  dimension = f"htm{self.config.dataset_config.indexer.active.depth}"
68  table = astropy.table.Table(names=("filename", dimension), dtype=('str', 'int'))
69  for key in result:
70  table.add_row((result[key], key))
71  table.write(self.ingest_table_fileingest_table_file)
72 
73  def _persistConfig(self):
74  filename = os.path.join(self.output_diroutput_dir, "config.py")
75  with open(filename, 'w') as file:
76  self.config.dataset_config.saveToStream(file)
77 
78  def _getOnePixelFilename(self, start):
79  return os.path.join(self.output_diroutput_dir, f"{self.indexer.htm}.fits")
80 
81  def _writeMasterSchema(self, catalog):
82  filename = os.path.join(self.output_diroutput_dir, "master_schema.fits")
83  catalog.writeFits(filename)
84 
85  def _reduce_kwargs(self):
86  # Need to be able to pickle this class to use the multiprocess manager.
87  kwargs = super()._reduce_kwargs()
88  kwargs['output_dir'] = self.base_dirbase_dir
89  return kwargs
90 
91 
93  """Construct an argument parser for the ``convertReferenceCatalog`` script.
94 
95  Returns
96  -------
97  argparser : `argparse.ArgumentParser`
98  The argument parser that defines the ``convertReferenceCatalog``
99  command-line interface.
100  """
101  parser = argparse.ArgumentParser(
102  description=__doc__,
103  formatter_class=argparse.RawDescriptionHelpFormatter,
104  epilog='More information is available at https://pipelines.lsst.io.'
105  )
106  parser.add_argument("outputDir",
107  help="Path to write the output shard files, configs, and `ingest-files` table to.")
108  parser.add_argument("configFile",
109  help="File containing the ConvertReferenceCatalogConfig fields.")
110  # Use a "+"-list here, so we can produce a more useful error if the user
111  # uses an unquoted glob that gets shell expanded.
112  parser.add_argument("fileglob", nargs="+",
113  help="Quoted glob for the files to be read in and converted."
114  " Example (note required quotes to prevent shell expansion):"
115  ' "gaia_source/csv/GaiaSource*"')
116  return parser
117 
118 
119 def run_convert(outputDir, configFile, fileglob):
120  """Run `ConvertReferenceCatalogTask` on the input arguments.
121 
122  Parameters
123  ----------
124  outputDir : `str`
125  Path to write the output files to.
126  configFile : `str`
127  File specifying the ``ConvertReferenceCatalogConfig`` fields.
128  fileglob : `str`
129  Quoted glob for the files to be read in and converted.
130  """
131  # We have to initialize the logger manually when running from the commandline.
132  logging.basicConfig(level=logging.INFO, format="{name} {levelname}: {message}", style="{")
133 
134  config = ConvertReferenceCatalogTask.ConfigClass()
135  config.load(configFile)
136  config.validate()
137  converter = ConvertReferenceCatalogTask(output_dir=outputDir, config=config)
138  files = glob.glob(fileglob)
139  converter.run(files)
140  with open(os.path.join(outputDir, "convertReferenceCatalogConfig.py"), "w") as outfile:
141  converter.config.saveToStream(outfile)
142  msg = ("Completed refcat conversion."
143  " Ingest the resulting files with the following commands,"
144  " substituting the path to your butler repo for REPO:"
145  f"\n butler register-dataset-type REPO {config.dataset_config.ref_dataset_name} "
146  "SimpleCatalog htm7"
147  f"\n butler ingest-files -t direct REPO gaia_dr2 refcats {converter.ingest_table_file}")
148  print(msg)
149 
150 
151 def main():
152  args = build_argparser().parse_args()
153  if len(args.fileglob) > 1:
154  raise RuntimeError("Final argument must be a quoted file glob, not a shell-expanded list of files.")
155  # Fileglob comes out as a length=1 list, so we can test it above.
156  run_convert(args.outputDir, args.configFile, args.fileglob[0])
def run_convert(outputDir, configFile, fileglob)