LSSTApplications  19.0.0-10-g920eed2,19.0.0-11-g48a0200+2,19.0.0-18-gfc4e62b+13,19.0.0-2-g3b2f90d+2,19.0.0-2-gd671419+5,19.0.0-20-g5a5a17ab+11,19.0.0-21-g2644856+13,19.0.0-23-g84eeccb+1,19.0.0-24-g878c510+1,19.0.0-25-g6c8df7140,19.0.0-25-gb330496+1,19.0.0-3-g2b32d65+5,19.0.0-3-g8227491+12,19.0.0-3-g9c54d0d+12,19.0.0-3-gca68e65+8,19.0.0-3-gcfc5f51+5,19.0.0-3-ge110943+11,19.0.0-3-ge74d124,19.0.0-3-gfe04aa6+13,19.0.0-30-g9c3fd16+1,19.0.0-4-g06f5963+5,19.0.0-4-g3d16501+13,19.0.0-4-g4a9c019+5,19.0.0-4-g5a8b323,19.0.0-4-g66397f0+1,19.0.0-4-g8278b9b+1,19.0.0-4-g8557e14,19.0.0-4-g8964aba+13,19.0.0-4-ge404a01+12,19.0.0-5-g40f3a5a,19.0.0-5-g4db63b3,19.0.0-5-gfb03ce7+13,19.0.0-6-gbaebbfb+12,19.0.0-61-gec4c6e08+1,19.0.0-7-g039c0b5+11,19.0.0-7-gbea9075+4,19.0.0-7-gc567de5+13,19.0.0-71-g41c0270,19.0.0-9-g2f02add+1,19.0.0-9-g463f923+12,w.2020.22
LSSTDataManagementBasePackage
ingest.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2012,2015 LSST Corporation.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 import os
23 import re
24 
25 
26 from astro_metadata_translator import fix_header, DecamTranslator
27 from lsst.afw.fits import readMetadata
28 from lsst.pipe.tasks.ingest import ParseTask, IngestTask, IngestArgumentParser
29 from lsst.obs.base.ingest import RawFileData
30 import lsst.obs.base
31 from ._instrument import DarkEnergyCamera
32 
33 __all__ = ["DecamRawIngestTask", "DecamIngestArgumentParser", "DecamIngestTask", "DecamParseTask"]
34 
35 
37  """Task for ingesting raw DECam data into a Gen3 Butler repository.
38  """
39  def extractMetadata(self, filename: str) -> RawFileData:
40  datasets = []
41  fitsData = lsst.afw.fits.Fits(filename, 'r')
42  # NOTE: The primary header (HDU=0) does not contain detector data.
43  for i in range(1, fitsData.countHdus()):
44  fitsData.setHdu(i)
45  header = fitsData.readMetadata()
46  if header['CCDNUM'] > 62: # ignore the guide CCDs
47  continue
48  fix_header(header)
49  datasets.append(self._calculate_dataset_info(header, filename))
50 
51  # The data model currently assumes that whilst multiple datasets
52  # can be associated with a single file, they must all share the
53  # same formatter.
54  instrument = DarkEnergyCamera()
55  FormatterClass = instrument.getRawFormatter(datasets[0].dataId)
56 
57  self.log.debug(f"Found images for {len(datasets)} detectors in {filename}")
58  return RawFileData(datasets=datasets, filename=filename,
59  FormatterClass=FormatterClass)
60 
61 
63  """Gen2 DECam ingest additional arguments.
64  """
65 
66  def __init__(self, *args, **kwargs):
67  super(DecamIngestArgumentParser, self).__init__(*args, **kwargs)
68  self.add_argument("--filetype", default="raw", choices=["instcal", "raw"],
69  help="Data processing level of the files to be ingested")
70 
71 
73  """Gen2 DECam file ingest task.
74  """
75  ArgumentParser = DecamIngestArgumentParser
76 
77  def __init__(self, *args, **kwargs):
78  super(DecamIngestTask, self).__init__(*args, **kwargs)
79 
80  def run(self, args):
81  """Ingest all specified files and add them to the registry
82  """
83  if args.filetype == "instcal":
84  root = args.input
85  with self.register.openRegistry(root, create=args.create, dryrun=args.dryrun) as registry:
86  for infile in args.files:
87  fileInfo, hduInfoList = self.parse.getInfo(infile, args.filetype)
88  if len(hduInfoList) > 0:
89  outfileInstcal = os.path.join(root, self.parse.getDestination(args.butler,
90  hduInfoList[0],
91  infile, "instcal"))
92  outfileDqmask = os.path.join(root, self.parse.getDestination(args.butler,
93  hduInfoList[0], infile,
94  "dqmask"))
95  outfileWtmap = os.path.join(root, self.parse.getDestination(args.butler,
96  hduInfoList[0], infile,
97  "wtmap"))
98 
99  ingestedInstcal = self.ingest(fileInfo["instcal"], outfileInstcal,
100  mode=args.mode, dryrun=args.dryrun)
101  ingestedDqmask = self.ingest(fileInfo["dqmask"], outfileDqmask,
102  mode=args.mode, dryrun=args.dryrun)
103  ingestedWtmap = self.ingest(fileInfo["wtmap"], outfileWtmap,
104  mode=args.mode, dryrun=args.dryrun)
105 
106  if not (ingestedInstcal or ingestedDqmask or ingestedWtmap):
107  continue
108 
109  for info in hduInfoList:
110  self.register.addRow(registry, info, dryrun=args.dryrun, create=args.create)
111 
112  elif args.filetype == "raw":
113  IngestTask.run(self, args)
114 
115 
117  """Parse an image filename to get the required information to
118  put the file in the correct location and populate the registry.
119  """
120 
121  def __init__(self, *args, **kwargs):
122  super(ParseTask, self).__init__(*args, **kwargs)
123 
124  self.expnumMapper = None
125 
126  # Note that these should be syncronized with the fields in
127  # root.register.columns defined in config/ingest.py
128  self.instcalPrefix = "instcal"
129  self.dqmaskPrefix = "dqmask"
130  self.wtmapPrefix = "wtmap"
131 
132  def _listdir(self, path, prefix):
133  for file in os.listdir(path):
134  fileName = os.path.join(path, file)
135  md = readMetadata(fileName)
136  fix_header(md, translator_class=DecamTranslator)
137  if "EXPNUM" not in md.names():
138  return
139  expnum = md.getScalar("EXPNUM")
140  if expnum not in self.expnumMapper:
141  self.expnumMapper[expnum] = {self.instcalPrefix: None,
142  self.wtmapPrefix: None,
143  self.dqmaskPrefix: None}
144  self.expnumMapper[expnum][prefix] = fileName
145 
146  def buildExpnumMapper(self, basepath):
147  """Extract exposure numbers from filenames to set self.expnumMapper
148 
149  Parameters
150  ----------
151  basepath : `str`
152  Location on disk of instcal, dqmask, and wtmap subdirectories.
153  """
154  self.expnumMapper = {}
155 
156  instcalPath = basepath
157  dqmaskPath = re.sub(self.instcalPrefix, self.dqmaskPrefix, instcalPath)
158  wtmapPath = re.sub(self.instcalPrefix, self.wtmapPrefix, instcalPath)
159  if instcalPath == dqmaskPath:
160  raise RuntimeError("instcal and mask directories are the same")
161  if instcalPath == wtmapPath:
162  raise RuntimeError("instcal and weight map directories are the same")
163 
164  if not os.path.isdir(dqmaskPath):
165  raise OSError("Directory %s does not exist" % (dqmaskPath))
166  if not os.path.isdir(wtmapPath):
167  raise OSError("Directory %s does not exist" % (wtmapPath))
168 
169  # Traverse each directory and extract the expnums
170  for path, prefix in zip((instcalPath, dqmaskPath, wtmapPath),
171  (self.instcalPrefix, self.dqmaskPrefix, self.wtmapPrefix)):
172  self._listdir(path, prefix)
173 
174  def getInfo(self, filename, filetype="raw"):
175  """Get metadata header info from multi-extension FITS decam image file.
176 
177  The science pixels, mask, and weight (inverse variance) are
178  stored in separate files each with a unique name but with a
179  common unique identifier EXPNUM in the FITS header. We have
180  to aggregate the 3 filenames for a given EXPNUM and return
181  this information along with that returned by the base class.
182 
183  Parameters
184  ----------
185  filename : `str`
186  Image file to retrieve info from.
187  filetype : `str`
188  One of "raw" or "instcal".
189 
190  Returns
191  -------
192  phuInfo : `dict`
193  Primary header unit info.
194  infoList : `list` of `dict`
195  Info for the other HDUs.
196 
197  Notes
198  -----
199  For filetype="instcal", we expect a directory structure that looks
200  like the following:
201 
202  .. code-block:: none
203 
204  dqmask/
205  instcal/
206  wtmap/
207 
208  The user creates the registry by running:
209 
210  .. code-block:: none
211 
212  ingestImagesDecam.py outputRepository --filetype=instcal --mode=link instcal/*fits
213  """
214  if filetype == "instcal":
215  if self.expnumMapper is None:
216  self.buildExpnumMapper(os.path.dirname(os.path.abspath(filename)))
217 
218  # Note that phuInfo will have
219  # 'side': 'X', 'ccd': 0
220  phuInfo, infoList = super(DecamParseTask, self).getInfo(filename)
221  expnum = phuInfo["visit"]
222  phuInfo[self.instcalPrefix] = self.expnumMapper[expnum][self.instcalPrefix]
223  phuInfo[self.dqmaskPrefix] = self.expnumMapper[expnum][self.dqmaskPrefix]
224  phuInfo[self.wtmapPrefix] = self.expnumMapper[expnum][self.wtmapPrefix]
225  for info in infoList:
226  expnum = info["visit"]
227  info[self.instcalPrefix] = self.expnumMapper[expnum][self.instcalPrefix]
228  info[self.dqmaskPrefix] = self.expnumMapper[expnum][self.dqmaskPrefix]
229  info[self.wtmapPrefix] = self.expnumMapper[expnum][self.wtmapPrefix]
230 
231  elif filetype == "raw":
232  phuInfo, infoList = super(DecamParseTask, self).getInfo(filename)
233  for info in infoList:
234  info[self.instcalPrefix] = ""
235  info[self.dqmaskPrefix] = ""
236  info[self.wtmapPrefix] = ""
237 
238  # Some data IDs can not be extracted from the zeroth extension
239  # of the MEF. Add them so Butler does not try to find them
240  # in the registry which may still yet to be created.
241  for key in ("ccdnum", "hdu", "ccd", "calib_hdu"):
242  if key not in phuInfo:
243  phuInfo[key] = 0
244 
245  return phuInfo, infoList
246 
247  @staticmethod
249  return md.getScalar('EXTNAME')
250 
251  def getDestination(self, butler, info, filename, filetype="raw"):
252  """Get destination for the file
253 
254  Parameters
255  ----------
256  butler : `lsst.daf.persistence.Butler`
257  Data butler.
258  info : data ID
259  File properties, used as dataId for the butler.
260  filename : `str`
261  Input filename.
262 
263  Returns
264  -------
265  raw : `str`
266  Destination filename.
267  """
268  raw = butler.get("%s_filename"%(filetype), info)[0]
269  # Ensure filename is devoid of cfitsio directions about HDUs
270  c = raw.find("[")
271  if c > 0:
272  raw = raw[:c]
273  return raw
lsst.obs.decam.ingest.DecamRawIngestTask.extractMetadata
RawFileData extractMetadata(self, str filename)
Definition: ingest.py:39
lsst.obs.decam.ingest.DecamParseTask.__init__
def __init__(self, *args, **kwargs)
Definition: ingest.py:121
lsst.obs.decam.ingest.DecamIngestArgumentParser
Definition: ingest.py:62
lsst.obs.decam.ingest.DecamParseTask.wtmapPrefix
wtmapPrefix
Definition: ingest.py:130
lsst::afw::fits::Fits
A simple struct that combines the two arguments that must be passed to most cfitsio routines and cont...
Definition: fits.h:297
lsst.obs.decam.ingest.DecamParseTask.getExtensionName
def getExtensionName(md)
Definition: ingest.py:248
lsst.gdb.afw.printers.debug
bool debug
Definition: printers.py:9
lsst.obs.decam.ingest.DecamIngestArgumentParser.__init__
def __init__(self, *args, **kwargs)
Definition: ingest.py:66
lsst.obs.decam.ingest.DecamParseTask.getDestination
def getDestination(self, butler, info, filename, filetype="raw")
Definition: ingest.py:251
lsst.obs.decam.ingest.DecamIngestTask.run
def run(self, args)
Definition: ingest.py:80
lsst.obs.decam.ingest.DecamParseTask.dqmaskPrefix
dqmaskPrefix
Definition: ingest.py:129
lsst.obs.decam.ingest.DecamIngestTask
Definition: ingest.py:72
lsst.pipe.tasks.ingest.IngestTask
Definition: ingest.py:380
lsst.pipe.tasks.ingest.ParseTask
Definition: ingest.py:67
lsst.obs.decam.ingest.DecamParseTask.instcalPrefix
instcalPrefix
Definition: ingest.py:128
lsst.obs.decam.ingest.DecamParseTask.expnumMapper
expnumMapper
Definition: ingest.py:124
lsst.pipe.tasks.ingest
Definition: ingest.py:1
lsst.obs.decam.ingest.DecamParseTask._listdir
def _listdir(self, path, prefix)
Definition: ingest.py:132
lsstDebug.getInfo
getInfo
Definition: lsstDebug.py:87
lsst.pipe.base.task.Task.log
log
Definition: task.py:148
lsst.obs.decam.ingest.DecamRawIngestTask
Definition: ingest.py:36
lsst.obs.base.ingest
Definition: ingest.py:1
lsst.obs.base.ingest.RawFileData
Definition: ingest.py:69
lsst::afw::image.readMetadata.readMetadataContinued.readMetadata
readMetadata
Definition: readMetadataContinued.py:28
lsst.obs.decam._instrument.DarkEnergyCamera
Definition: _instrument.py:38
lsst.pipe.tasks.ingest.IngestTask.ingest
def ingest(self, infile, outfile, mode="move", dryrun=False)
Definition: ingest.py:452
lsst.pipe.tasks.ingest.IngestArgumentParser
Definition: ingest.py:37
lsst::afw::fits
Definition: fits.h:31
lsst.obs.decam.ingest.DecamParseTask.getInfo
def getInfo(self, filename, filetype="raw")
Definition: ingest.py:174
lsst.obs.decam.ingest.DecamParseTask
Definition: ingest.py:116
lsst.obs.decam.ingest.DecamIngestTask.__init__
def __init__(self, *args, **kwargs)
Definition: ingest.py:77
lsst.obs.base.ingest.RawIngestTask
Definition: ingest.py:160
lsst.obs.base
Definition: __init__.py:1
lsst.obs.decam.ingest.DecamParseTask.buildExpnumMapper
def buildExpnumMapper(self, basepath)
Definition: ingest.py:146
lsst.obs.base.ingest.RawIngestTask._calculate_dataset_info
def _calculate_dataset_info(self, header, filename)
Definition: ingest.py:240