LSSTApplications  19.0.0-14-gb0260a2+72efe9b372,20.0.0+7927753e06,20.0.0+8829bf0056,20.0.0+995114c5d2,20.0.0+b6f4b2abd1,20.0.0+bddc4f4cbe,20.0.0-1-g253301a+8829bf0056,20.0.0-1-g2b7511a+0d71a2d77f,20.0.0-1-g5b95a8c+7461dd0434,20.0.0-12-g321c96ea+23efe4bbff,20.0.0-16-gfab17e72e+fdf35455f6,20.0.0-2-g0070d88+ba3ffc8f0b,20.0.0-2-g4dae9ad+ee58a624b3,20.0.0-2-g61b8584+5d3db074ba,20.0.0-2-gb780d76+d529cf1a41,20.0.0-2-ged6426c+226a441f5f,20.0.0-2-gf072044+8829bf0056,20.0.0-2-gf1f7952+ee58a624b3,20.0.0-20-geae50cf+e37fec0aee,20.0.0-25-g3dcad98+544a109665,20.0.0-25-g5eafb0f+ee58a624b3,20.0.0-27-g64178ef+f1f297b00a,20.0.0-3-g4cc78c6+e0676b0dc8,20.0.0-3-g8f21e14+4fd2c12c9a,20.0.0-3-gbd60e8c+187b78b4b8,20.0.0-3-gbecbe05+48431fa087,20.0.0-38-ge4adf513+a12e1f8e37,20.0.0-4-g97dc21a+544a109665,20.0.0-4-gb4befbc+087873070b,20.0.0-4-gf910f65+5d3db074ba,20.0.0-5-gdfe0fee+199202a608,20.0.0-5-gfbfe500+d529cf1a41,20.0.0-6-g64f541c+d529cf1a41,20.0.0-6-g9a5b7a1+a1cd37312e,20.0.0-68-ga3f3dda+5fca18c6a4,20.0.0-9-g4aef684+e18322736b,w.2020.45
LSSTDataManagementBasePackage
ingest_tests.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 """Base class for writing Gen3 raw data ingest tests.
23 """
24 
25 __all__ = ("IngestTestBase",)
26 
27 import abc
28 import tempfile
29 import unittest
30 import os
31 import shutil
32 
34 from lsst.daf.butler import Butler
35 from lsst.daf.butler.cli.butler import cli as butlerCli
36 from lsst.daf.butler.cli.utils import LogCliRunner
37 import lsst.obs.base
38 from lsst.utils import doImport
39 from .utils import getInstrument
40 from . import script
41 
42 
43 class IngestTestBase(metaclass=abc.ABCMeta):
44  """Base class for tests of gen3 ingest. Subclass from this, then
45  `unittest.TestCase` to get a working test suite.
46  """
47 
48  ingestDir = ""
49  """Root path to ingest files into. Typically `obs_package/tests/`; the
50  actual directory will be a tempdir under this one.
51  """
52 
53  dataIds = []
54  """list of butler data IDs of files that should have been ingested."""
55 
56  file = ""
57  """Full path to a file to ingest in tests."""
58 
59  rawIngestTask = "lsst.obs.base.RawIngestTask"
60  """The task to use in the Ingest test."""
61 
62  curatedCalibrationDatasetTypes = None
63  """List or tuple of Datasets types that should be present after calling
64  writeCuratedCalibrations. If `None` writeCuratedCalibrations will
65  not be called and the test will be skipped."""
66 
67  defineVisitsTask = lsst.obs.base.DefineVisitsTask
68  """The task to use to define visits from groups of exposures.
69  This is ignored if ``visits`` is `None`.
70  """
71 
72  visits = {}
73  """A dictionary mapping visit data IDs the lists of exposure data IDs that
74  are associated with them.
75  If this is empty (but not `None`), visit definition will be run but no
76  visits will be expected (e.g. because no exposures are on-sky
77  observations).
78  """
79 
80  @property
81  @abc.abstractmethod
83  """The fully qualified instrument class name.
84 
85  Returns
86  -------
87  `str`
88  The fully qualified instrument class name.
89  """
90  pass
91 
92  @property
93  def instrumentClass(self):
94  """The instrument class."""
95  return doImport(self.instrumentClassName)
96 
97  @property
98  def instrumentName(self):
99  """The name of the instrument.
100 
101  Returns
102  -------
103  `str`
104  The name of the instrument.
105  """
106  return self.instrumentClass.getName()
107 
108  @classmethod
109  def setUpClass(cls):
110  # Use a temporary working directory
111  cls.root = tempfile.mkdtemp(dir=cls.ingestDir)
112  cls._createRepo()
113 
114  # Register the instrument and its static metadata
115  cls._registerInstrument()
116 
117  def setUp(self):
118  # Want a unique run name per test
119  self.outputRun = "raw_ingest_" + self.id()
120 
121  @classmethod
122  def tearDownClass(cls):
123  if os.path.exists(cls.root):
124  shutil.rmtree(cls.root, ignore_errors=True)
125 
126  def verifyIngest(self, files=None, cli=False, fullCheck=False):
127  """
128  Test that RawIngestTask ingested the expected files.
129 
130  Parameters
131  ----------
132  files : `list` [`str`], or None
133  List of files to be ingested, or None to use ``self.file``
134  fullCheck : `bool`, optional
135  If `True`, read the full raw dataset and check component
136  consistency. If `False` check that a component can be read
137  but do not read the entire raw exposure.
138 
139  Notes
140  -----
141  Reading all the ingested test data can be expensive. The code paths
142  for reading the second raw are the same as reading the first so
143  we do not gain anything by doing full checks of everything.
144  Only read full pixel data for first dataset from file.
145  Don't even do that if we are requested not to by the caller.
146  This only really affects files that contain multiple datasets.
147  """
148  butler = Butler(self.root, run=self.outputRun)
149  datasets = butler.registry.queryDatasets("raw", collections=self.outputRun)
150  self.assertEqual(len(list(datasets)), len(self.dataIds))
151 
152  for dataId in self.dataIds:
153  # Check that we can read metadata from a raw
154  metadata = butler.get("raw.metadata", dataId)
155  if not fullCheck:
156  continue
157  fullCheck = False
158  exposure = butler.get("raw", dataId)
159  self.assertEqual(metadata.toDict(), exposure.getMetadata().toDict())
160 
161  # Since components follow a different code path we check that
162  # WCS match and also we check that at least the shape
163  # of the image is the same (rather than doing per-pixel equality)
164  wcs = butler.get("raw.wcs", dataId)
165  self.assertEqual(wcs, exposure.getWcs())
166 
167  rawImage = butler.get("raw.image", dataId)
168  self.assertEqual(rawImage.getBBox(), exposure.getBBox())
169 
170  self.checkRepo(files=files)
171 
172  def checkRepo(self, files=None):
173  """Check the state of the repository after ingest.
174 
175  This is an optional hook provided for subclasses; by default it does
176  nothing.
177 
178  Parameters
179  ----------
180  files : `list` [`str`], or None
181  List of files to be ingested, or None to use ``self.file``
182  """
183  pass
184 
185  @classmethod
186  def _createRepo(cls):
187  """Use the Click `testing` module to call the butler command line api
188  to create a repository."""
189  runner = LogCliRunner()
190  result = runner.invoke(butlerCli, ["create", cls.root])
191  # Classmethod so assertEqual does not work
192  assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}"
193 
194  def _ingestRaws(self, transfer, file=None):
195  """Use the Click `testing` module to call the butler command line api
196  to ingest raws.
197 
198  Parameters
199  ----------
200  transfer : `str`
201  The external data transfer type.
202  file : `str`
203  Path to a file to ingest instead of the default associated with
204  the object.
205  """
206  if file is None:
207  file = self.file
208  runner = LogCliRunner()
209  result = runner.invoke(butlerCli, ["ingest-raws", self.root, file,
210  "--output-run", self.outputRun,
211  "--transfer", transfer,
212  "--ingest-task", self.rawIngestTask])
213  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
214 
215  @classmethod
216  def _registerInstrument(cls):
217  """Use the Click `testing` module to call the butler command line api
218  to register the instrument."""
219  runner = LogCliRunner()
220  result = runner.invoke(butlerCli, ["register-instrument", cls.root, cls.instrumentClassName])
221  # Classmethod so assertEqual does not work
222  assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}"
223 
224  def _writeCuratedCalibrations(self):
225  """Use the Click `testing` module to call the butler command line api
226  to write curated calibrations."""
227  runner = LogCliRunner()
228  result = runner.invoke(butlerCli, ["write-curated-calibrations", self.root, self.instrumentName])
229  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
230 
231  def testLink(self):
232  self._ingestRaws(transfer="link")
233  self.verifyIngest()
234 
235  def testSymLink(self):
236  self._ingestRaws(transfer="symlink")
237  self.verifyIngest()
238 
239  def testCopy(self):
240  self._ingestRaws(transfer="copy")
241  # Only test full read of raws for the copy test. No need to do it
242  # in the other tests since the formatter will be the same in all
243  # cases.
244  self.verifyIngest(fullCheck=True)
245 
246  def testHardLink(self):
247  try:
248  self._ingestRaws(transfer="hardlink")
249  self.verifyIngest()
250  except PermissionError as err:
251  raise unittest.SkipTest("Skipping hard-link test because input data"
252  " is on a different filesystem.") from err
253 
254  def testInPlace(self):
255  """Test that files already in the directory can be added to the
256  registry in-place.
257  """
258  # symlink into repo root manually
259  butler = Butler(self.root, run=self.outputRun)
260  pathInStore = "prefix-" + os.path.basename(self.file)
261  newPath = butler.datastore.root.join(pathInStore)
262  os.symlink(os.path.abspath(self.file), newPath.ospath)
263  self._ingestRaws(transfer="auto", file=newPath.ospath)
264  self.verifyIngest()
265 
266  # Recreate a butler post-ingest (the earlier one won't see the
267  # ingested files)
268  butler = Butler(self.root, run=self.outputRun)
269 
270  # Check that the URI associated with this path is the right one
271  uri = butler.getURI("raw", self.dataIds[0])
272  self.assertEqual(uri.relative_to(butler.datastore.root), pathInStore)
273 
275  """Re-ingesting the same data into the repository should fail.
276  """
277  self._ingestRaws(transfer="symlink")
278  with self.assertRaises(Exception):
279  self._ingestRaws(transfer="symlink")
280 
282  """Test that we can ingest the curated calibrations, and read them
283  with `loadCamera` both before and after.
284  """
285  if self.curatedCalibrationDatasetTypes is None:
286  raise unittest.SkipTest("Class requests disabling of writeCuratedCalibrations test")
287 
288  butler = Butler(self.root, writeable=False)
289  collection = self.instrumentClass.makeCalibrationCollectionName()
290 
291  # Trying to load a camera with a data ID not known to the registry
292  # is an error, because we can't get any temporal information.
293  with self.assertRaises(LookupError):
294  lsst.obs.base.loadCamera(butler, {"exposure": 0}, collections=collection)
295 
296  # Ingest raws in order to get some exposure records.
297  self._ingestRaws(transfer="auto")
298 
299  # Load camera should returned an unversioned camera because there's
300  # nothing in the repo.
301  camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection)
302  self.assertFalse(isVersioned)
303  self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
304 
306 
307  # Make a new butler instance to make sure we don't have any stale
308  # caches (e.g. of DatasetTypes). Note that we didn't give
309  # _writeCuratedCalibrations the butler instance we had, because it's
310  # trying to test the CLI interface anyway.
311  butler = Butler(self.root, writeable=False)
312 
313  for datasetTypeName in self.curatedCalibrationDatasetTypes:
314  with self.subTest(dtype=datasetTypeName):
315  found = list(
316  butler.registry.queryDatasetAssociations(
317  datasetTypeName,
318  collections=collection,
319  )
320  )
321  self.assertGreater(len(found), 0, f"Checking {datasetTypeName}")
322 
323  # Load camera should returned the versioned camera from the repo.
324  camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection)
325  self.assertTrue(isVersioned)
326  self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
327 
328  def testDefineVisits(self):
329  if self.visits is None:
330  self.skipTest("Expected visits were not defined.")
331  self._ingestRaws(transfer="link")
332 
333  # Calling defineVisits tests the implementation of the butler command
334  # line interface "define-visits" subcommand. Functions in the script
335  # folder are generally considered protected and should not be used
336  # as public api.
337  script.defineVisits(self.root, config_file=None, collections=self.outputRun,
338  instrument=self.instrumentName)
339 
340  # Test that we got the visits we expected.
341  butler = Butler(self.root, run=self.outputRun)
342  visits = butler.registry.queryDataIds(["visit"]).expanded().toSet()
343  self.assertCountEqual(visits, self.visits.keys())
344  instr = getInstrument(self.instrumentName, butler.registry)
345  camera = instr.getCamera()
346  for foundVisit, (expectedVisit, expectedExposures) in zip(visits, self.visits.items()):
347  # Test that this visit is associated with the expected exposures.
348  foundExposures = butler.registry.queryDataIds(["exposure"], dataId=expectedVisit
349  ).expanded().toSet()
350  self.assertCountEqual(foundExposures, expectedExposures)
351  # Test that we have a visit region, and that it contains all of the
352  # detector+visit regions.
353  self.assertIsNotNone(foundVisit.region)
354  detectorVisitDataIds = butler.registry.queryDataIds(["visit", "detector"], dataId=expectedVisit
355  ).expanded().toSet()
356  self.assertEqual(len(detectorVisitDataIds), len(camera))
357  for dataId in detectorVisitDataIds:
358  self.assertTrue(foundVisit.region.contains(dataId.region))
lsst.obs.base.ingest_tests.IngestTestBase._ingestRaws
def _ingestRaws(self, transfer, file=None)
Definition: ingest_tests.py:194
lsst::afw::cameraGeom::Camera
An immutable representation of a camera.
Definition: Camera.h:43
lsst.obs.base.ingest_tests.IngestTestBase.outputRun
outputRun
Definition: ingest_tests.py:119
lsst.obs.base.ingest_tests.IngestTestBase.setUp
def setUp(self)
Definition: ingest_tests.py:117
lsst.obs.base.ingest_tests.IngestTestBase._createRepo
def _createRepo(cls)
Definition: ingest_tests.py:186
lsst.obs.base.utils.getInstrument
def getInstrument(instrumentName, registry=None)
Definition: utils.py:131
lsst.obs.base.ingest_tests.IngestTestBase.testHardLink
def testHardLink(self)
Definition: ingest_tests.py:246
astshim.keyMap.keyMapContinued.keys
def keys(self)
Definition: keyMapContinued.py:6
lsst.obs.base.ingest_tests.IngestTestBase.testInPlace
def testInPlace(self)
Definition: ingest_tests.py:254
lsst.obs.base.ingest_tests.IngestTestBase.testCopy
def testCopy(self)
Definition: ingest_tests.py:239
lsst.obs.base.ingest_tests.IngestTestBase.visits
dictionary visits
Definition: ingest_tests.py:72
lsst.obs.base.ingest_tests.IngestTestBase.ingestDir
string ingestDir
Definition: ingest_tests.py:48
lsst.obs.base.ingest_tests.IngestTestBase.instrumentName
def instrumentName(self)
Definition: ingest_tests.py:98
lsst.obs.base.ingest_tests.IngestTestBase.dataIds
list dataIds
Definition: ingest_tests.py:53
lsst.obs.base.ingest_tests.IngestTestBase.testSymLink
def testSymLink(self)
Definition: ingest_tests.py:235
lsst.obs.base.ingest_tests.IngestTestBase.testWriteCuratedCalibrations
def testWriteCuratedCalibrations(self)
Definition: ingest_tests.py:281
lsst.obs.base.ingest_tests.IngestTestBase.rawIngestTask
string rawIngestTask
Definition: ingest_tests.py:59
lsst.obs.base.ingest_tests.IngestTestBase.setUpClass
def setUpClass(cls)
Definition: ingest_tests.py:109
lsst.pex.config.config.doImport
doImport
Definition: config.py:48
lsst::utils
Definition: Backtrace.h:29
lsst.obs.base.ingest_tests.IngestTestBase.verifyIngest
def verifyIngest(self, files=None, cli=False, fullCheck=False)
Definition: ingest_tests.py:126
lsst.obs.base.defineVisits.DefineVisitsTask
Definition: defineVisits.py:281
lsst.obs.base.ingest_tests.IngestTestBase.instrumentClass
def instrumentClass(self)
Definition: ingest_tests.py:93
lsst.obs.base.ingest_tests.IngestTestBase.tearDownClass
def tearDownClass(cls)
Definition: ingest_tests.py:122
lsst::afw::cameraGeom
Definition: Amplifier.h:33
items
std::vector< SchemaItem< Flag > > * items
Definition: BaseColumnView.cc:142
list
daf::base::PropertyList * list
Definition: fits.cc:913
lsst.obs.base.ingest_tests.IngestTestBase.curatedCalibrationDatasetTypes
curatedCalibrationDatasetTypes
Definition: ingest_tests.py:62
lsst.obs.base.ingest_tests.IngestTestBase.root
root
Definition: ingest_tests.py:111
lsst.obs.base.ingest_tests.IngestTestBase._registerInstrument
def _registerInstrument(cls)
Definition: ingest_tests.py:216
lsst.obs.base.ingest_tests.IngestTestBase.testLink
def testLink(self)
Definition: ingest_tests.py:231
lsst.obs.base.ingest_tests.IngestTestBase.file
string file
Definition: ingest_tests.py:56
lsst.obs.base.ingest_tests.IngestTestBase.testDefineVisits
def testDefineVisits(self)
Definition: ingest_tests.py:328
lsst.obs.base.ingest_tests.IngestTestBase._writeCuratedCalibrations
def _writeCuratedCalibrations(self)
Definition: ingest_tests.py:224
lsst.obs.base.ingest_tests.IngestTestBase.checkRepo
def checkRepo(self, files=None)
Definition: ingest_tests.py:172
lsst.obs.base.ingest_tests.IngestTestBase.testFailOnConflict
def testFailOnConflict(self)
Definition: ingest_tests.py:274
lsst.obs.base.ingest_tests.IngestTestBase
Definition: ingest_tests.py:43
lsst.obs.base.ingest_tests.IngestTestBase.instrumentClassName
def instrumentClassName(self)
Definition: ingest_tests.py:82
lsst.obs.base
Definition: __init__.py:1