LSST Applications  21.0.0+75b29a8a7f,21.0.0+e70536a077,21.0.0-1-ga51b5d4+62c747d40b,21.0.0-10-gbfb87ad6+3307648ee3,21.0.0-15-gedb9d5423+47cba9fc36,21.0.0-2-g103fe59+fdf0863a2a,21.0.0-2-g1367e85+d38a93257c,21.0.0-2-g45278ab+e70536a077,21.0.0-2-g5242d73+d38a93257c,21.0.0-2-g7f82c8f+e682ffb718,21.0.0-2-g8dde007+d179fbfa6a,21.0.0-2-g8f08a60+9402881886,21.0.0-2-ga326454+e682ffb718,21.0.0-2-ga63a54e+08647d4b1b,21.0.0-2-gde069b7+26c92b3210,21.0.0-2-gecfae73+0445ed2f95,21.0.0-2-gfc62afb+d38a93257c,21.0.0-27-gbbd0d29+ae871e0f33,21.0.0-28-g5fc5e037+feb0e9397b,21.0.0-3-g21c7a62+f4b9c0ff5c,21.0.0-3-g357aad2+57b0bddf0b,21.0.0-3-g4be5c26+d38a93257c,21.0.0-3-g65f322c+3f454acf5d,21.0.0-3-g7d9da8d+75b29a8a7f,21.0.0-3-gaa929c8+9e4ef6332c,21.0.0-3-ge02ed75+4b120a55c4,21.0.0-4-g3300ddd+e70536a077,21.0.0-4-g591bb35+4b120a55c4,21.0.0-4-gc004bbf+4911b9cd27,21.0.0-4-gccdca77+f94adcd104,21.0.0-4-ge8fba5a+2b3a696ff9,21.0.0-5-gb155db7+2c5429117a,21.0.0-5-gdf36809+637e4641ee,21.0.0-6-g00874e7+c9fd7f7160,21.0.0-6-g4e60332+4b120a55c4,21.0.0-7-gc8ca178+40eb9cf840,21.0.0-8-gfbe0b4b+9e4ef6332c,21.0.0-9-g2fd488a+d83b7cd606,w.2021.05
LSST Data Management Base Package
ingest_tests.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 """Base class for writing Gen3 raw data ingest tests.
23 """
24 
25 __all__ = ("IngestTestBase",)
26 
27 import abc
28 import tempfile
29 import unittest
30 import os
31 import shutil
32 
34 from lsst.daf.butler import Butler, ButlerURI
35 from lsst.daf.butler.cli.butler import cli as butlerCli
36 from lsst.daf.butler.cli.utils import LogCliRunner
37 import lsst.obs.base
38 from lsst.utils import doImport
39 from .utils import getInstrument
40 from . import script
41 
42 
43 class IngestTestBase(metaclass=abc.ABCMeta):
44  """Base class for tests of gen3 ingest. Subclass from this, then
45  `unittest.TestCase` to get a working test suite.
46  """
47 
48  ingestDir = ""
49  """Root path to ingest files into. Typically `obs_package/tests/`; the
50  actual directory will be a tempdir under this one.
51  """
52 
53  dataIds = []
54  """list of butler data IDs of files that should have been ingested."""
55 
56  file = ""
57  """Full path to a file to ingest in tests."""
58 
59  filterLabel = None
60  """The lsst.afw.image.FilterLabel that should be returned by the above
61  file."""
62 
63  rawIngestTask = "lsst.obs.base.RawIngestTask"
64  """The task to use in the Ingest test."""
65 
66  curatedCalibrationDatasetTypes = None
67  """List or tuple of Datasets types that should be present after calling
68  writeCuratedCalibrations. If `None` writeCuratedCalibrations will
69  not be called and the test will be skipped."""
70 
71  defineVisitsTask = lsst.obs.base.DefineVisitsTask
72  """The task to use to define visits from groups of exposures.
73  This is ignored if ``visits`` is `None`.
74  """
75 
76  visits = {}
77  """A dictionary mapping visit data IDs the lists of exposure data IDs that
78  are associated with them.
79  If this is empty (but not `None`), visit definition will be run but no
80  visits will be expected (e.g. because no exposures are on-sky
81  observations).
82  """
83 
84  @property
85  @abc.abstractmethod
87  """The fully qualified instrument class name.
88 
89  Returns
90  -------
91  `str`
92  The fully qualified instrument class name.
93  """
94  pass
95 
96  @property
97  def instrumentClass(self):
98  """The instrument class."""
99  return doImport(self.instrumentClassNameinstrumentClassName)
100 
101  @property
102  def instrumentName(self):
103  """The name of the instrument.
104 
105  Returns
106  -------
107  `str`
108  The name of the instrument.
109  """
110  return self.instrumentClassinstrumentClass.getName()
111 
112  @classmethod
113  def setUpClass(cls):
114  # Use a temporary working directory
115  cls.rootroot = tempfile.mkdtemp(dir=cls.ingestDiringestDir)
116  cls._createRepo_createRepo()
117 
118  # Register the instrument and its static metadata
119  cls._registerInstrument_registerInstrument()
120 
121  def setUp(self):
122  # Want a unique run name per test
123  self.outputRunoutputRun = "raw_ingest_" + self.id()
124 
125  @classmethod
126  def tearDownClass(cls):
127  if os.path.exists(cls.rootroot):
128  shutil.rmtree(cls.rootroot, ignore_errors=True)
129 
130  def verifyIngest(self, files=None, cli=False, fullCheck=False):
131  """
132  Test that RawIngestTask ingested the expected files.
133 
134  Parameters
135  ----------
136  files : `list` [`str`], or None
137  List of files to be ingested, or None to use ``self.file``
138  fullCheck : `bool`, optional
139  If `True`, read the full raw dataset and check component
140  consistency. If `False` check that a component can be read
141  but do not read the entire raw exposure.
142 
143  Notes
144  -----
145  Reading all the ingested test data can be expensive. The code paths
146  for reading the second raw are the same as reading the first so
147  we do not gain anything by doing full checks of everything.
148  Only read full pixel data for first dataset from file.
149  Don't even do that if we are requested not to by the caller.
150  This only really affects files that contain multiple datasets.
151  """
152  butler = Butler(self.rootroot, run=self.outputRunoutputRun)
153  datasets = list(butler.registry.queryDatasets("raw", collections=self.outputRunoutputRun))
154  self.assertEqual(len(datasets), len(self.dataIdsdataIds))
155 
156  # Get the URI to the first dataset and check it is inside the
157  # datastore
158  datasetUri = butler.getURI(datasets[0])
159  self.assertIsNotNone(datasetUri.relative_to(butler.datastore.root))
160 
161  for dataId in self.dataIdsdataIds:
162  # Check that we can read metadata from a raw
163  metadata = butler.get("raw.metadata", dataId)
164  if not fullCheck:
165  continue
166  fullCheck = False
167  exposure = butler.get("raw", dataId)
168  self.assertEqual(metadata.toDict(), exposure.getMetadata().toDict())
169 
170  # Since components follow a different code path we check that
171  # WCS match and also we check that at least the shape
172  # of the image is the same (rather than doing per-pixel equality)
173  wcs = butler.get("raw.wcs", dataId)
174  self.assertEqual(wcs, exposure.getWcs())
175 
176  rawImage = butler.get("raw.image", dataId)
177  self.assertEqual(rawImage.getBBox(), exposure.getBBox())
178 
179  # check that the filter label got the correct band
180  filterLabel = butler.get("raw.filterLabel", dataId)
181  self.assertEqual(filterLabel, self.filterLabelfilterLabel)
182 
183  self.checkRepocheckRepo(files=files)
184 
185  def checkRepo(self, files=None):
186  """Check the state of the repository after ingest.
187 
188  This is an optional hook provided for subclasses; by default it does
189  nothing.
190 
191  Parameters
192  ----------
193  files : `list` [`str`], or None
194  List of files to be ingested, or None to use ``self.file``
195  """
196  pass
197 
198  @classmethod
199  def _createRepo(cls):
200  """Use the Click `testing` module to call the butler command line api
201  to create a repository."""
202  runner = LogCliRunner()
203  result = runner.invoke(butlerCli, ["create", cls.rootroot])
204  # Classmethod so assertEqual does not work
205  assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}"
206 
207  def _ingestRaws(self, transfer, file=None):
208  """Use the Click `testing` module to call the butler command line api
209  to ingest raws.
210 
211  Parameters
212  ----------
213  transfer : `str`
214  The external data transfer type.
215  file : `str`
216  Path to a file to ingest instead of the default associated with
217  the object.
218  """
219  if file is None:
220  file = self.filefile
221  runner = LogCliRunner()
222  result = runner.invoke(butlerCli, ["ingest-raws", self.rootroot, file,
223  "--output-run", self.outputRunoutputRun,
224  "--transfer", transfer,
225  "--ingest-task", self.rawIngestTaskrawIngestTask])
226  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
227 
228  @classmethod
229  def _registerInstrument(cls):
230  """Use the Click `testing` module to call the butler command line api
231  to register the instrument."""
232  runner = LogCliRunner()
233  result = runner.invoke(butlerCli, ["register-instrument", cls.rootroot, cls.instrumentClassNameinstrumentClassName])
234  # Classmethod so assertEqual does not work
235  assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}"
236 
237  def _writeCuratedCalibrations(self):
238  """Use the Click `testing` module to call the butler command line api
239  to write curated calibrations."""
240  runner = LogCliRunner()
241  result = runner.invoke(butlerCli, ["write-curated-calibrations", self.rootroot, self.instrumentNameinstrumentName])
242  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
243 
244  def testLink(self):
245  self._ingestRaws_ingestRaws(transfer="link")
246  self.verifyIngestverifyIngest()
247 
248  def testSymLink(self):
249  self._ingestRaws_ingestRaws(transfer="symlink")
250  self.verifyIngestverifyIngest()
251 
252  def testDirect(self):
253  self._ingestRaws_ingestRaws(transfer="direct")
254 
255  # Check that it really did have a URI outside of datastore
256  srcUri = ButlerURI(self.filefile)
257  butler = Butler(self.rootroot, run=self.outputRunoutputRun)
258  datasets = list(butler.registry.queryDatasets("raw", collections=self.outputRunoutputRun))
259  datastoreUri = butler.getURI(datasets[0])
260  self.assertEqual(datastoreUri, srcUri)
261 
262  def testCopy(self):
263  self._ingestRaws_ingestRaws(transfer="copy")
264  # Only test full read of raws for the copy test. No need to do it
265  # in the other tests since the formatter will be the same in all
266  # cases.
267  self.verifyIngestverifyIngest(fullCheck=True)
268 
269  def testHardLink(self):
270  try:
271  self._ingestRaws_ingestRaws(transfer="hardlink")
272  # Running ingest through the Click testing infrastructure causes
273  # the original exception indicating that we can't hard-link
274  # on this filesystem to be turned into a nonzero exit code, which
275  # then trips the test assertion.
276  except (AssertionError, PermissionError) as err:
277  raise unittest.SkipTest("Skipping hard-link test because input data"
278  " is on a different filesystem.") from err
279  self.verifyIngestverifyIngest()
280 
281  def testInPlace(self):
282  """Test that files already in the directory can be added to the
283  registry in-place.
284  """
285  # symlink into repo root manually
286  butler = Butler(self.rootroot, run=self.outputRunoutputRun)
287  pathInStore = "prefix-" + os.path.basename(self.filefile)
288  newPath = butler.datastore.root.join(pathInStore)
289  os.symlink(os.path.abspath(self.filefile), newPath.ospath)
290  self._ingestRaws_ingestRaws(transfer="auto", file=newPath.ospath)
291  self.verifyIngestverifyIngest()
292 
293  # Recreate a butler post-ingest (the earlier one won't see the
294  # ingested files)
295  butler = Butler(self.rootroot, run=self.outputRunoutputRun)
296 
297  # Check that the URI associated with this path is the right one
298  uri = butler.getURI("raw", self.dataIdsdataIds[0])
299  self.assertEqual(uri.relative_to(butler.datastore.root), pathInStore)
300 
302  """Re-ingesting the same data into the repository should fail.
303  """
304  self._ingestRaws_ingestRaws(transfer="symlink")
305  with self.assertRaises(Exception):
306  self._ingestRaws_ingestRaws(transfer="symlink")
307 
309  """Test that we can ingest the curated calibrations, and read them
310  with `loadCamera` both before and after.
311  """
312  if self.curatedCalibrationDatasetTypescuratedCalibrationDatasetTypes is None:
313  raise unittest.SkipTest("Class requests disabling of writeCuratedCalibrations test")
314 
315  butler = Butler(self.rootroot, writeable=False)
316  collection = self.instrumentClassinstrumentClass.makeCalibrationCollectionName()
317 
318  # Trying to load a camera with a data ID not known to the registry
319  # is an error, because we can't get any temporal information.
320  with self.assertRaises(LookupError):
321  lsst.obs.base.loadCamera(butler, {"exposure": 0}, collections=collection)
322 
323  # Ingest raws in order to get some exposure records.
324  self._ingestRaws_ingestRaws(transfer="auto")
325 
326  # Load camera should returned an unversioned camera because there's
327  # nothing in the repo.
328  camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIdsdataIds[0], collections=collection)
329  self.assertFalse(isVersioned)
330  self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
331 
332  self._writeCuratedCalibrations_writeCuratedCalibrations()
333 
334  # Make a new butler instance to make sure we don't have any stale
335  # caches (e.g. of DatasetTypes). Note that we didn't give
336  # _writeCuratedCalibrations the butler instance we had, because it's
337  # trying to test the CLI interface anyway.
338  butler = Butler(self.rootroot, writeable=False)
339 
340  for datasetTypeName in self.curatedCalibrationDatasetTypescuratedCalibrationDatasetTypes:
341  with self.subTest(dtype=datasetTypeName):
342  found = list(
343  butler.registry.queryDatasetAssociations(
344  datasetTypeName,
345  collections=collection,
346  )
347  )
348  self.assertGreater(len(found), 0, f"Checking {datasetTypeName}")
349 
350  # Load camera should returned the versioned camera from the repo.
351  camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIdsdataIds[0], collections=collection)
352  self.assertTrue(isVersioned)
353  self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
354 
355  def testDefineVisits(self):
356  if self.visitsvisits is None:
357  self.skipTest("Expected visits were not defined.")
358  self._ingestRaws_ingestRaws(transfer="link")
359 
360  # Calling defineVisits tests the implementation of the butler command
361  # line interface "define-visits" subcommand. Functions in the script
362  # folder are generally considered protected and should not be used
363  # as public api.
364  script.defineVisits(self.rootroot, config_file=None, collections=self.outputRunoutputRun,
365  instrument=self.instrumentNameinstrumentName)
366 
367  # Test that we got the visits we expected.
368  butler = Butler(self.rootroot, run=self.outputRunoutputRun)
369  visits = butler.registry.queryDataIds(["visit"]).expanded().toSet()
370  self.assertCountEqual(visits, self.visitsvisits.keys())
371  instr = getInstrument(self.instrumentNameinstrumentName, butler.registry)
372  camera = instr.getCamera()
373  for foundVisit, (expectedVisit, expectedExposures) in zip(visits, self.visitsvisits.items()):
374  # Test that this visit is associated with the expected exposures.
375  foundExposures = butler.registry.queryDataIds(["exposure"], dataId=expectedVisit
376  ).expanded().toSet()
377  self.assertCountEqual(foundExposures, expectedExposures)
378  # Test that we have a visit region, and that it contains all of the
379  # detector+visit regions.
380  self.assertIsNotNone(foundVisit.region)
381  detectorVisitDataIds = butler.registry.queryDataIds(["visit", "detector"], dataId=expectedVisit
382  ).expanded().toSet()
383  self.assertEqual(len(detectorVisitDataIds), len(camera))
384  for dataId in detectorVisitDataIds:
385  self.assertTrue(foundVisit.region.contains(dataId.region))
std::vector< SchemaItem< Flag > > * items
An immutable representation of a camera.
Definition: Camera.h:43
def _ingestRaws(self, transfer, file=None)
def verifyIngest(self, files=None, cli=False, fullCheck=False)
std::string const & getName() const noexcept
Return a filter's name.
Definition: Filter.h:78
def getInstrument(instrumentName, registry=None)
Definition: utils.py:131
daf::base::PropertyList * list
Definition: fits.cc:913