LSST Applications g180d380827+0f66a164bb,g2079a07aa2+86d27d4dc4,g2305ad1205+7d304bc7a0,g29320951ab+500695df56,g2bbee38e9b+0e5473021a,g337abbeb29+0e5473021a,g33d1c0ed96+0e5473021a,g3a166c0a6a+0e5473021a,g3ddfee87b4+e42ea45bea,g48712c4677+36a86eeaa5,g487adcacf7+2dd8f347ac,g50ff169b8f+96c6868917,g52b1c1532d+585e252eca,g591dd9f2cf+c70619cc9d,g5a732f18d5+53520f316c,g5ea96fc03c+341ea1ce94,g64a986408d+f7cd9c7162,g858d7b2824+f7cd9c7162,g8a8a8dda67+585e252eca,g99cad8db69+469ab8c039,g9ddcbc5298+9a081db1e4,ga1e77700b3+15fc3df1f7,gb0e22166c9+60f28cb32d,gba4ed39666+c2a2e4ac27,gbb8dafda3b+c92fc63c7e,gbd866b1f37+f7cd9c7162,gc120e1dc64+02c66aa596,gc28159a63d+0e5473021a,gc3e9b769f7+b0068a2d9f,gcf0d15dbbd+e42ea45bea,gdaeeff99f8+f9a426f77a,ge6526c86ff+84383d05b3,ge79ae78c31+0e5473021a,gee10cc3b42+585e252eca,gff1a9f87cc+f7cd9c7162,w.2024.17
LSST Data Management Base Package
Loading...
Searching...
No Matches
apdb.py
Go to the documentation of this file.
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22from __future__ import annotations
23
24__all__ = ["ApdbConfig", "Apdb"]
25
26import os
27from abc import ABC, abstractmethod
28from collections.abc import Iterable, Mapping
29from typing import TYPE_CHECKING, cast
30
31import astropy.time
32import pandas
33from lsst.pex.config import Config, ConfigurableField, Field
34from lsst.resources import ResourcePath, ResourcePathExpression
35from lsst.sphgeom import Region
36
37from .apdbIndex import ApdbIndex
38from .apdbSchema import ApdbTables
39from .factory import make_apdb
40from .schema_model import Table
41
42if TYPE_CHECKING:
43 from .apdbMetadata import ApdbMetadata
44 from .versionTuple import VersionTuple
45
46
47def _data_file_name(basename: str) -> str:
48 """Return path name of a data file in sdm_schemas package."""
49 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename)
50
51
53 """Part of Apdb configuration common to all implementations."""
54
55 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12)
56 read_forced_sources_months = Field[int](
57 doc="Number of months of history to read from DiaForcedSource", default=12
58 )
59 schema_file = Field[str](
60 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml")
61 )
62 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema")
63 extra_schema_file = Field[str](
64 doc="Location of (YAML) configuration file with extra schema, "
65 "definitions in this file are merged with the definitions in "
66 "'schema_file', extending or replacing parts of the schema.",
67 default=None,
68 optional=True,
69 deprecated="This field is deprecated, its value is not used.",
70 )
71 use_insert_id = Field[bool](
72 doc=(
73 "If True, make and fill additional tables used for replication. "
74 "Databases created with earlier versions of APDB may not have these tables, "
75 "and corresponding methods will not work for them."
76 ),
77 default=False,
78 )
79 replica_chunk_seconds = Field[int](
80 default=600,
81 doc="Time extent for replica chunks, new chunks are created every specified number of seconds.",
82 )
83
84
85class Apdb(ABC):
86 """Abstract interface for APDB."""
87
88 ConfigClass = ApdbConfig
89
90 @classmethod
91 def from_config(cls, config: ApdbConfig) -> Apdb:
92 """Create Ppdb instance from configuration object.
93
94 Parameters
95 ----------
96 config : `ApdbConfig`
97 Configuration object, type of this object determines type of the
98 Apdb implementation.
99
100 Returns
101 -------
102 apdb : `apdb`
103 Instance of `Apdb` class.
104 """
105 return make_apdb(config)
106
107 @classmethod
108 def from_uri(cls, uri: ResourcePathExpression) -> Apdb:
109 """Make Apdb instance from a serialized configuration.
110
111 Parameters
112 ----------
113 uri : `~lsst.resources.ResourcePathExpression`
114 URI or local file path pointing to a file with serialized
115 configuration, or a string with a "label:" prefix. In the latter
116 case, the configuration will be looked up from an APDB index file
117 using the label name that follows the prefix. The APDB index file's
118 location is determined by the ``DAX_APDB_INDEX_URI`` environment
119 variable.
120
121 Returns
122 -------
123 apdb : `apdb`
124 Instance of `Apdb` class, the type of the returned instance is
125 determined by configuration.
126 """
127 if isinstance(uri, str) and uri.startswith("label:"):
128 tag, _, label = uri.partition(":")
129 index = ApdbIndex()
130 # Current format for config files is "pex_config"
131 format = "pex_config"
132 uri = index.get_apdb_uri(label, format)
133 path = ResourcePath(uri)
134 config_str = path.read().decode()
135 # Assume that this is ApdbConfig, make_apdb will raise if not.
136 config = cast(ApdbConfig, Config._fromPython(config_str))
137 return make_apdb(config)
138
139 @classmethod
140 @abstractmethod
141 def apdbImplementationVersion(cls) -> VersionTuple:
142 """Return version number for current APDB implementation.
143
144 Returns
145 -------
146 version : `VersionTuple`
147 Version of the code defined in implementation class.
148 """
149 raise NotImplementedError()
150
151 @abstractmethod
152 def apdbSchemaVersion(self) -> VersionTuple:
153 """Return schema version number as defined in config file.
154
155 Returns
156 -------
157 version : `VersionTuple`
158 Version of the schema defined in schema config file.
159 """
160 raise NotImplementedError()
161
162 @abstractmethod
163 def tableDef(self, table: ApdbTables) -> Table | None:
164 """Return table schema definition for a given table.
165
166 Parameters
167 ----------
168 table : `ApdbTables`
169 One of the known APDB tables.
170
171 Returns
172 -------
173 tableSchema : `.schema_model.Table` or `None`
174 Table schema description, `None` is returned if table is not
175 defined by this implementation.
176 """
177 raise NotImplementedError()
178
179 @abstractmethod
180 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
181 """Return catalog of DiaObject instances from a given region.
182
183 This method returns only the last version of each DiaObject. Some
184 records in a returned catalog may be outside the specified region, it
185 is up to a client to ignore those records or cleanup the catalog before
186 futher use.
187
188 Parameters
189 ----------
190 region : `lsst.sphgeom.Region`
191 Region to search for DIAObjects.
192
193 Returns
194 -------
195 catalog : `pandas.DataFrame`
196 Catalog containing DiaObject records for a region that may be a
197 superset of the specified region.
198 """
199 raise NotImplementedError()
200
201 @abstractmethod
203 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time
204 ) -> pandas.DataFrame | None:
205 """Return catalog of DiaSource instances from a given region.
206
207 Parameters
208 ----------
209 region : `lsst.sphgeom.Region`
210 Region to search for DIASources.
211 object_ids : iterable [ `int` ], optional
212 List of DiaObject IDs to further constrain the set of returned
213 sources. If `None` then returned sources are not constrained. If
214 list is empty then empty catalog is returned with a correct
215 schema.
216 visit_time : `astropy.time.Time`
217 Time of the current visit.
218
219 Returns
220 -------
221 catalog : `pandas.DataFrame`, or `None`
222 Catalog containing DiaSource records. `None` is returned if
223 ``read_sources_months`` configuration parameter is set to 0.
224
225 Notes
226 -----
227 This method returns DiaSource catalog for a region with additional
228 filtering based on DiaObject IDs. Only a subset of DiaSource history
229 is returned limited by ``read_sources_months`` config parameter, w.r.t.
230 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
231 always returned with the correct schema (columns/types). If
232 ``object_ids`` is `None` then no filtering is performed and some of the
233 returned records may be outside the specified region.
234 """
235 raise NotImplementedError()
236
237 @abstractmethod
239 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time
240 ) -> pandas.DataFrame | None:
241 """Return catalog of DiaForcedSource instances from a given region.
242
243 Parameters
244 ----------
245 region : `lsst.sphgeom.Region`
246 Region to search for DIASources.
247 object_ids : iterable [ `int` ], optional
248 List of DiaObject IDs to further constrain the set of returned
249 sources. If list is empty then empty catalog is returned with a
250 correct schema. If `None` then returned sources are not
251 constrained. Some implementations may not support latter case.
252 visit_time : `astropy.time.Time`
253 Time of the current visit.
254
255 Returns
256 -------
257 catalog : `pandas.DataFrame`, or `None`
258 Catalog containing DiaSource records. `None` is returned if
259 ``read_forced_sources_months`` configuration parameter is set to 0.
260
261 Raises
262 ------
263 NotImplementedError
264 May be raised by some implementations if ``object_ids`` is `None`.
265
266 Notes
267 -----
268 This method returns DiaForcedSource catalog for a region with
269 additional filtering based on DiaObject IDs. Only a subset of DiaSource
270 history is returned limited by ``read_forced_sources_months`` config
271 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an
272 empty catalog is always returned with the correct schema
273 (columns/types). If ``object_ids`` is `None` then no filtering is
274 performed and some of the returned records may be outside the specified
275 region.
276 """
277 raise NotImplementedError()
278
279 @abstractmethod
280 def containsVisitDetector(self, visit: int, detector: int) -> bool:
281 """Test whether data for a given visit-detector is present in the APDB.
282
283 Parameters
284 ----------
285 visit, detector : `int`
286 The ID of the visit-detector to search for.
287
288 Returns
289 -------
290 present : `bool`
291 `True` if some DiaObject, DiaSource, or DiaForcedSource records
292 exist for the specified observation, `False` otherwise.
293 """
294 raise NotImplementedError()
295
296 @abstractmethod
297 def getSSObjects(self) -> pandas.DataFrame:
298 """Return catalog of SSObject instances.
299
300 Returns
301 -------
302 catalog : `pandas.DataFrame`
303 Catalog containing SSObject records, all existing records are
304 returned.
305 """
306 raise NotImplementedError()
307
308 @abstractmethod
309 def store(
310 self,
311 visit_time: astropy.time.Time,
312 objects: pandas.DataFrame,
313 sources: pandas.DataFrame | None = None,
314 forced_sources: pandas.DataFrame | None = None,
315 ) -> None:
316 """Store all three types of catalogs in the database.
317
318 Parameters
319 ----------
320 visit_time : `astropy.time.Time`
321 Time of the visit.
322 objects : `pandas.DataFrame`
323 Catalog with DiaObject records.
324 sources : `pandas.DataFrame`, optional
325 Catalog with DiaSource records.
326 forced_sources : `pandas.DataFrame`, optional
327 Catalog with DiaForcedSource records.
328
329 Notes
330 -----
331 This methods takes DataFrame catalogs, their schema must be
332 compatible with the schema of APDB table:
333
334 - column names must correspond to database table columns
335 - types and units of the columns must match database definitions,
336 no unit conversion is performed presently
337 - columns that have default values in database schema can be
338 omitted from catalog
339 - this method knows how to fill interval-related columns of DiaObject
340 (validityStart, validityEnd) they do not need to appear in a
341 catalog
342 - source catalogs have ``diaObjectId`` column associating sources
343 with objects
344 """
345 raise NotImplementedError()
346
347 @abstractmethod
348 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
349 """Store or update SSObject catalog.
350
351 Parameters
352 ----------
353 objects : `pandas.DataFrame`
354 Catalog with SSObject records.
355
356 Notes
357 -----
358 If SSObjects with matching IDs already exist in the database, their
359 records will be updated with the information from provided records.
360 """
361 raise NotImplementedError()
362
363 @abstractmethod
364 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
365 """Associate DiaSources with SSObjects, dis-associating them
366 from DiaObjects.
367
368 Parameters
369 ----------
370 idMap : `Mapping`
371 Maps DiaSource IDs to their new SSObject IDs.
372
373 Raises
374 ------
375 ValueError
376 Raised if DiaSource ID does not exist in the database.
377 """
378 raise NotImplementedError()
379
380 @abstractmethod
381 def dailyJob(self) -> None:
382 """Implement daily activities like cleanup/vacuum.
383
384 What should be done during daily activities is determined by
385 specific implementation.
386 """
387 raise NotImplementedError()
388
389 @abstractmethod
390 def countUnassociatedObjects(self) -> int:
391 """Return the number of DiaObjects that have only one DiaSource
392 associated with them.
393
394 Used as part of ap_verify metrics.
395
396 Returns
397 -------
398 count : `int`
399 Number of DiaObjects with exactly one associated DiaSource.
400
401 Notes
402 -----
403 This method can be very inefficient or slow in some implementations.
404 """
405 raise NotImplementedError()
406
407 @classmethod
408 def makeField(cls, doc: str) -> ConfigurableField:
409 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
410
411 Parameters
412 ----------
413 doc : `str`
414 Help text for the field.
415
416 Returns
417 -------
418 configurableField : `lsst.pex.config.ConfigurableField`
419 A `~lsst.pex.config.ConfigurableField` for Apdb.
420 """
421 return ConfigurableField(doc=doc, target=cls)
422
423 @property
424 @abstractmethod
425 def metadata(self) -> ApdbMetadata:
426 """Object controlling access to APDB metadata (`ApdbMetadata`)."""
427 raise NotImplementedError()
ApdbMetadata metadata(self)
Definition apdb.py:425
ConfigurableField makeField(cls, str doc)
Definition apdb.py:408
bool containsVisitDetector(self, int visit, int detector)
Definition apdb.py:280
pandas.DataFrame getSSObjects(self)
Definition apdb.py:297
pandas.DataFrame getDiaObjects(self, Region region)
Definition apdb.py:180
pandas.DataFrame|None getDiaSources(self, Region region, Iterable[int]|None object_ids, astropy.time.Time visit_time)
Definition apdb.py:204
Apdb from_uri(cls, ResourcePathExpression uri)
Definition apdb.py:108
None dailyJob(self)
Definition apdb.py:381
Apdb from_config(cls, ApdbConfig config)
Definition apdb.py:91
None store(self, astropy.time.Time visit_time, pandas.DataFrame objects, pandas.DataFrame|None sources=None, pandas.DataFrame|None forced_sources=None)
Definition apdb.py:315
VersionTuple apdbImplementationVersion(cls)
Definition apdb.py:141
VersionTuple apdbSchemaVersion(self)
Definition apdb.py:152
Table|None tableDef(self, ApdbTables table)
Definition apdb.py:163
None storeSSObjects(self, pandas.DataFrame objects)
Definition apdb.py:348
int countUnassociatedObjects(self)
Definition apdb.py:390
None reassignDiaSources(self, Mapping[int, int] idMap)
Definition apdb.py:364
pandas.DataFrame|None getDiaForcedSources(self, Region region, Iterable[int]|None object_ids, astropy.time.Time visit_time)
Definition apdb.py:240
str _data_file_name(str basename)
Definition apdb.py:47