LSSTApplications  18.0.0+106,18.0.0+50,19.0.0,19.0.0+1,19.0.0+10,19.0.0+11,19.0.0+13,19.0.0+17,19.0.0+2,19.0.0-1-g20d9b18+6,19.0.0-1-g425ff20,19.0.0-1-g5549ca4,19.0.0-1-g580fafe+6,19.0.0-1-g6fe20d0+1,19.0.0-1-g7011481+9,19.0.0-1-g8c57eb9+6,19.0.0-1-gb5175dc+11,19.0.0-1-gdc0e4a7+9,19.0.0-1-ge272bc4+6,19.0.0-1-ge3aa853,19.0.0-10-g448f008b,19.0.0-12-g6990b2c,19.0.0-2-g0d9f9cd+11,19.0.0-2-g3d9e4fb2+11,19.0.0-2-g5037de4,19.0.0-2-gb96a1c4+3,19.0.0-2-gd955cfd+15,19.0.0-3-g2d13df8,19.0.0-3-g6f3c7dc,19.0.0-4-g725f80e+11,19.0.0-4-ga671dab3b+1,19.0.0-4-gad373c5+3,19.0.0-5-ga2acb9c+2,19.0.0-5-gfe96e6c+2,w.2020.01
LSSTDataManagementBasePackage
rootRepoConverter.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 from __future__ import annotations
22 
23 __all__ = ["RootRepoConverter"]
24 
25 import os
26 import re
27 import itertools
28 from typing import TYPE_CHECKING, Iterator, Tuple, List
29 
30 from lsst.daf.butler import DatasetType, DatasetRef, FileDataset
31 from .calibRepoConverter import CURATED_CALIBRATION_DATASET_TYPES
32 from .standardRepoConverter import StandardRepoConverter
33 
34 SKYMAP_DATASET_TYPES = {
35  coaddName: f"{coaddName}Coadd_skyMap" for coaddName in ("deep", "goodSeeing", "dcr")
36 }
37 
38 if TYPE_CHECKING:
39  from lsst.daf.butler import SkyPixDimension
40  from ..ingest import RawExposureData
41 
42 
44  """A specialization of `RepoConverter` for root data repositories.
45 
46  `RootRepoConverter` adds support for raw images (mostly delegated to the
47  parent task's `RawIngestTask` subtask) and reference catalogs.
48 
49  Parameters
50  ----------
51  kwds
52  Keyword arguments are forwarded to (and required by) `RepoConverter`.
53  """
54 
55  def __init__(self, **kwds):
56  super().__init__(**kwds)
57  self._exposureData: List[RawExposureData] = []
58  self._refCats: List[Tuple[str, SkyPixDimension]] = []
59 
60  def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool:
61  # Docstring inherited from RepoConverter.
62  return (
63  super().isDatasetTypeSpecial(datasetTypeName) or
64  datasetTypeName in ("raw", "ref_cat", "ref_cat_config") or
65  # in Gen2, some of these are in the root repo, not a calib repo
66  datasetTypeName in CURATED_CALIBRATION_DATASET_TYPES
67  )
68 
69  def isDirectorySpecial(self, subdirectory: str) -> bool:
70  # Docstring inherited from RepoConverter.
71  return subdirectory == "ref_cats"
72 
73  def prep(self):
74  # Docstring inherited from RepoConverter.
75  # Gather information about raws.
76  if self.task.raws is not None:
77  self.task.log.info(f"Preparing raws from root {self.root}.")
78  if self.subset is not None:
79  dataRefs = itertools.chain.from_iterable(
80  self.butler2.subset("raw", visit=visit) for visit in self.subset.visits
81  )
82  else:
83  dataRefs = self.butler2.subset("raw")
84  self._exposureData.extend(self.task.raws.prep(dataRef.getUri() for dataRef in dataRefs))
85  # Gather information about reference catalogs.
86  if self.task.isDatasetTypeIncluded("ref_cat"):
87  from lsst.meas.algorithms import DatasetConfig as RefCatDatasetConfig
88  for refCat in os.listdir(os.path.join(self.root, "ref_cats")):
89  self.task.log.info(f"Preparing ref_cat {refCat} from root {self.root}.")
90  path = os.path.join(self.root, "ref_cats", refCat)
91  configFile = os.path.join(path, "config.py")
92  if not os.path.exists(configFile):
93  continue
94  if not self.task.isDatasetTypeIncluded(refCat):
95  # While the Gen2 dataset type for reference catalogs is
96  # just "ref_cat", in Gen3 we use the name of the reference
97  # catalog as its dataset type name.
98  continue
99  onDiskConfig = RefCatDatasetConfig()
100  onDiskConfig.load(configFile)
101  if onDiskConfig.indexer.name != "HTM":
102  raise ValueError(f"Reference catalog '{refCat}' uses unsupported "
103  f"pixelization '{onDiskConfig.indexer.name}'.")
104  level = onDiskConfig.indexer["HTM"].depth
105  try:
106  dimension = self.task.universe[f"htm{level}"]
107  except KeyError as err:
108  raise ValueError(f"Reference catalog {refCat} uses HTM level {level}, but no htm{level} "
109  f"skypix dimension is configured for this registry.") from err
110  self.task.useSkyPix(dimension)
111  self._refCats.append((refCat, dimension))
112  super().prep()
113 
115  # Docstring inherited from RepoConverter.
116  self.task.log.info(f"Inserting observation dimension records from {self.root}.")
117  records = {"visit": [], "exposure": [], "visit_detector_region": []}
118  for exposure in self._exposureData:
119  for dimension, recordsForDimension in exposure.records.items():
120  records[dimension].extend(recordsForDimension)
121  self.task.raws.insertDimensionData(records)
122 
123  def iterDatasets(self) -> Iterator[FileDataset]:
124  # Docstring inherited from RepoConverter.
125  # Iterate over reference catalog files.
126  for refCat, dimension in self._refCats:
127  datasetType = DatasetType(refCat, dimensions=[dimension], universe=self.task.universe,
128  storageClass="SimpleCatalog")
129  if self.subset is None:
130  regex = re.compile(r"(\d+)\.fits")
131  for fileName in os.listdir(os.path.join(self.root, "ref_cats", refCat)):
132  m = regex.match(fileName)
133  if m is not None:
134  htmId = int(m.group(1))
135  dataId = self.task.registry.expandDataId({dimension: htmId})
136  yield FileDataset(path=os.path.join(self.root, "ref_cats", refCat, fileName),
137  ref=DatasetRef(datasetType, dataId))
138  else:
139  for htmId in self.subset.skypix[dimension]:
140  dataId = self.task.registry.expandDataId({dimension: htmId})
141  yield FileDataset(path=os.path.join(self.root, "ref_cats", refCat, f"{htmId}.fits"),
142  ref=DatasetRef(datasetType, dataId))
143  yield from super().iterDatasets()
144 
145  def ingest(self):
146  # Docstring inherited from RepoConverter.
147  if self.task.raws is not None:
148  self.task.log.info(f"Ingesting raws from root {self.root}.")
149  self.task.registry.registerDatasetType(self.task.raws.datasetType)
150  # We need te delegate to RawIngestTask to actually ingest raws,
151  # rather than just including those datasets in iterDatasets for
152  # the base class to handle, because we don't want to assume we
153  # can use the Datastore-configured Formatter for raw data.
154  refs = []
155  butler, collections = self.getButler("raw")
156  for exposure in self._exposureData:
157  refs.extend(self.task.raws.ingestExposureDatasets(exposure))
158  for collection in collections:
159  self.task.registry.associate(collection, refs)
160  super().ingest()
std::shared_ptr< FrameSet > append(FrameSet const &first, FrameSet const &second)
Construct a FrameSet that performs two transformations in series.
Definition: functional.cc:33
Fit spatial kernel using approximate fluxes for candidates, and solving a linear system of equations...