LSST Applications g063fba187b+cac8b7c890,g0f08755f38+6aee506743,g1653933729+a8ce1bb630,g168dd56ebc+a8ce1bb630,g1a2382251a+b4475c5878,g1dcb35cd9c+8f9bc1652e,g20f6ffc8e0+6aee506743,g217e2c1bcf+73dee94bd0,g28da252d5a+1f19c529b9,g2bbee38e9b+3f2625acfc,g2bc492864f+3f2625acfc,g3156d2b45e+6e55a43351,g32e5bea42b+1bb94961c2,g347aa1857d+3f2625acfc,g35bb328faa+a8ce1bb630,g3a166c0a6a+3f2625acfc,g3e281a1b8c+c5dd892a6c,g3e8969e208+a8ce1bb630,g414038480c+5927e1bc1e,g41af890bb2+8a9e676b2a,g7af13505b9+809c143d88,g80478fca09+6ef8b1810f,g82479be7b0+f568feb641,g858d7b2824+6aee506743,g89c8672015+f4add4ffd5,g9125e01d80+a8ce1bb630,ga5288a1d22+2903d499ea,gb58c049af0+d64f4d3760,gc28159a63d+3f2625acfc,gcab2d0539d+b12535109e,gcf0d15dbbd+46a3f46ba9,gda6a2b7d83+46a3f46ba9,gdaeeff99f8+1711a396fd,ge79ae78c31+3f2625acfc,gef2f8181fd+0a71e47438,gf0baf85859+c1f95f4921,gfa517265be+6aee506743,gfa999e8aa5+17cd334064,w.2024.51
LSST Data Management Base Package
Loading...
Searching...
No Matches
apdb.py
Go to the documentation of this file.
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22from __future__ import annotations
23
24__all__ = ["ApdbConfig", "Apdb"]
25
26import os
27from abc import ABC, abstractmethod
28from collections.abc import Iterable, Mapping
29from typing import TYPE_CHECKING, cast
30
31import astropy.time
32import pandas
33from lsst.pex.config import Config, ConfigurableField, Field
34from lsst.resources import ResourcePath, ResourcePathExpression
35from lsst.sphgeom import Region
36
37from .apdbIndex import ApdbIndex
38from .apdbSchema import ApdbTables
39from .factory import make_apdb
40from .schema_model import Table
41
42if TYPE_CHECKING:
43 from .apdbMetadata import ApdbMetadata
44
45
46def _data_file_name(basename: str) -> str:
47 """Return path name of a data file in sdm_schemas package."""
48 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename)
49
50
52 """Part of Apdb configuration common to all implementations."""
53
54 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12)
55 read_forced_sources_months = Field[int](
56 doc="Number of months of history to read from DiaForcedSource", default=12
57 )
58 schema_file = Field[str](
59 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml")
60 )
61 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema")
62 extra_schema_file = Field[str](
63 doc="Location of (YAML) configuration file with extra schema, "
64 "definitions in this file are merged with the definitions in "
65 "'schema_file', extending or replacing parts of the schema.",
66 default=None,
67 optional=True,
68 deprecated="This field is deprecated, its value is not used.",
69 )
70 use_insert_id = Field[bool](
71 doc=(
72 "If True, make and fill additional tables used for replication. "
73 "Databases created with earlier versions of APDB may not have these tables, "
74 "and corresponding methods will not work for them."
75 ),
76 default=False,
77 )
78 replica_chunk_seconds = Field[int](
79 default=600,
80 doc="Time extent for replica chunks, new chunks are created every specified number of seconds.",
81 )
82
83
84class Apdb(ABC):
85 """Abstract interface for APDB."""
86
87 ConfigClass = ApdbConfig
88
89 @classmethod
90 def from_config(cls, config: ApdbConfig) -> Apdb:
91 """Create Ppdb instance from configuration object.
92
93 Parameters
94 ----------
95 config : `ApdbConfig`
96 Configuration object, type of this object determines type of the
97 Apdb implementation.
98
99 Returns
100 -------
101 apdb : `apdb`
102 Instance of `Apdb` class.
103 """
104 return make_apdb(config)
105
106 @classmethod
107 def from_uri(cls, uri: ResourcePathExpression) -> Apdb:
108 """Make Apdb instance from a serialized configuration.
109
110 Parameters
111 ----------
112 uri : `~lsst.resources.ResourcePathExpression`
113 URI or local file path pointing to a file with serialized
114 configuration, or a string with a "label:" prefix. In the latter
115 case, the configuration will be looked up from an APDB index file
116 using the label name that follows the prefix. The APDB index file's
117 location is determined by the ``DAX_APDB_INDEX_URI`` environment
118 variable.
119
120 Returns
121 -------
122 apdb : `apdb`
123 Instance of `Apdb` class, the type of the returned instance is
124 determined by configuration.
125 """
126 if isinstance(uri, str) and uri.startswith("label:"):
127 tag, _, label = uri.partition(":")
128 index = ApdbIndex()
129 # Current format for config files is "pex_config"
130 format = "pex_config"
131 uri = index.get_apdb_uri(label, format)
132 path = ResourcePath(uri)
133 config_str = path.read().decode()
134 # Assume that this is ApdbConfig, make_apdb will raise if not.
135 config = cast(ApdbConfig, Config._fromPython(config_str))
136 return make_apdb(config)
137
138 @abstractmethod
139 def tableDef(self, table: ApdbTables) -> Table | None:
140 """Return table schema definition for a given table.
141
142 Parameters
143 ----------
144 table : `ApdbTables`
145 One of the known APDB tables.
146
147 Returns
148 -------
149 tableSchema : `.schema_model.Table` or `None`
150 Table schema description, `None` is returned if table is not
151 defined by this implementation.
152 """
153 raise NotImplementedError()
154
155 @abstractmethod
156 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
157 """Return catalog of DiaObject instances from a given region.
158
159 This method returns only the last version of each DiaObject,
160 and may return only the subset of the DiaObject columns needed
161 for AP association. Some
162 records in a returned catalog may be outside the specified region, it
163 is up to a client to ignore those records or cleanup the catalog before
164 futher use.
165
166 Parameters
167 ----------
168 region : `lsst.sphgeom.Region`
169 Region to search for DIAObjects.
170
171 Returns
172 -------
173 catalog : `pandas.DataFrame`
174 Catalog containing DiaObject records for a region that may be a
175 superset of the specified region.
176 """
177 raise NotImplementedError()
178
179 @abstractmethod
181 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time
182 ) -> pandas.DataFrame | None:
183 """Return catalog of DiaSource instances from a given region.
184
185 Parameters
186 ----------
187 region : `lsst.sphgeom.Region`
188 Region to search for DIASources.
189 object_ids : iterable [ `int` ], optional
190 List of DiaObject IDs to further constrain the set of returned
191 sources. If `None` then returned sources are not constrained. If
192 list is empty then empty catalog is returned with a correct
193 schema.
194 visit_time : `astropy.time.Time`
195 Time of the current visit.
196
197 Returns
198 -------
199 catalog : `pandas.DataFrame`, or `None`
200 Catalog containing DiaSource records. `None` is returned if
201 ``read_sources_months`` configuration parameter is set to 0.
202
203 Notes
204 -----
205 This method returns DiaSource catalog for a region with additional
206 filtering based on DiaObject IDs. Only a subset of DiaSource history
207 is returned limited by ``read_sources_months`` config parameter, w.r.t.
208 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
209 always returned with the correct schema (columns/types). If
210 ``object_ids`` is `None` then no filtering is performed and some of the
211 returned records may be outside the specified region.
212 """
213 raise NotImplementedError()
214
215 @abstractmethod
217 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time
218 ) -> pandas.DataFrame | None:
219 """Return catalog of DiaForcedSource instances from a given region.
220
221 Parameters
222 ----------
223 region : `lsst.sphgeom.Region`
224 Region to search for DIASources.
225 object_ids : iterable [ `int` ], optional
226 List of DiaObject IDs to further constrain the set of returned
227 sources. If list is empty then empty catalog is returned with a
228 correct schema. If `None` then returned sources are not
229 constrained. Some implementations may not support latter case.
230 visit_time : `astropy.time.Time`
231 Time of the current visit.
232
233 Returns
234 -------
235 catalog : `pandas.DataFrame`, or `None`
236 Catalog containing DiaSource records. `None` is returned if
237 ``read_forced_sources_months`` configuration parameter is set to 0.
238
239 Raises
240 ------
241 NotImplementedError
242 May be raised by some implementations if ``object_ids`` is `None`.
243
244 Notes
245 -----
246 This method returns DiaForcedSource catalog for a region with
247 additional filtering based on DiaObject IDs. Only a subset of DiaSource
248 history is returned limited by ``read_forced_sources_months`` config
249 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an
250 empty catalog is always returned with the correct schema
251 (columns/types). If ``object_ids`` is `None` then no filtering is
252 performed and some of the returned records may be outside the specified
253 region.
254 """
255 raise NotImplementedError()
256
257 @abstractmethod
258 def containsVisitDetector(self, visit: int, detector: int) -> bool:
259 """Test whether any sources for a given visit-detector are present in
260 the APDB.
261
262 Parameters
263 ----------
264 visit, detector : `int`
265 The ID of the visit-detector to search for.
266
267 Returns
268 -------
269 present : `bool`
270 `True` if at least one DiaSource or DiaForcedSource record
271 may exist for the specified observation, `False` otherwise.
272 """
273 raise NotImplementedError()
274
275 @abstractmethod
276 def getSSObjects(self) -> pandas.DataFrame:
277 """Return catalog of SSObject instances.
278
279 Returns
280 -------
281 catalog : `pandas.DataFrame`
282 Catalog containing SSObject records, all existing records are
283 returned.
284 """
285 raise NotImplementedError()
286
287 @abstractmethod
288 def store(
289 self,
290 visit_time: astropy.time.Time,
291 objects: pandas.DataFrame,
292 sources: pandas.DataFrame | None = None,
293 forced_sources: pandas.DataFrame | None = None,
294 ) -> None:
295 """Store all three types of catalogs in the database.
296
297 Parameters
298 ----------
299 visit_time : `astropy.time.Time`
300 Time of the visit.
301 objects : `pandas.DataFrame`
302 Catalog with DiaObject records.
303 sources : `pandas.DataFrame`, optional
304 Catalog with DiaSource records.
305 forced_sources : `pandas.DataFrame`, optional
306 Catalog with DiaForcedSource records.
307
308 Notes
309 -----
310 This methods takes DataFrame catalogs, their schema must be
311 compatible with the schema of APDB table:
312
313 - column names must correspond to database table columns
314 - types and units of the columns must match database definitions,
315 no unit conversion is performed presently
316 - columns that have default values in database schema can be
317 omitted from catalog
318 - this method knows how to fill interval-related columns of DiaObject
319 (validityStart, validityEnd) they do not need to appear in a
320 catalog
321 - source catalogs have ``diaObjectId`` column associating sources
322 with objects
323
324 This operation need not be atomic, but DiaSources and DiaForcedSources
325 will not be stored until all DiaObjects are stored.
326 """
327 raise NotImplementedError()
328
329 @abstractmethod
330 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
331 """Store or update SSObject catalog.
332
333 Parameters
334 ----------
335 objects : `pandas.DataFrame`
336 Catalog with SSObject records.
337
338 Notes
339 -----
340 If SSObjects with matching IDs already exist in the database, their
341 records will be updated with the information from provided records.
342 """
343 raise NotImplementedError()
344
345 @abstractmethod
346 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
347 """Associate DiaSources with SSObjects, dis-associating them
348 from DiaObjects.
349
350 Parameters
351 ----------
352 idMap : `Mapping`
353 Maps DiaSource IDs to their new SSObject IDs.
354
355 Raises
356 ------
357 ValueError
358 Raised if DiaSource ID does not exist in the database.
359 """
360 raise NotImplementedError()
361
362 @abstractmethod
363 def dailyJob(self) -> None:
364 """Implement daily activities like cleanup/vacuum.
365
366 What should be done during daily activities is determined by
367 specific implementation.
368 """
369 raise NotImplementedError()
370
371 @abstractmethod
372 def countUnassociatedObjects(self) -> int:
373 """Return the number of DiaObjects that have only one DiaSource
374 associated with them.
375
376 Used as part of ap_verify metrics.
377
378 Returns
379 -------
380 count : `int`
381 Number of DiaObjects with exactly one associated DiaSource.
382
383 Notes
384 -----
385 This method can be very inefficient or slow in some implementations.
386 """
387 raise NotImplementedError()
388
389 @classmethod
390 def makeField(cls, doc: str) -> ConfigurableField:
391 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
392
393 Parameters
394 ----------
395 doc : `str`
396 Help text for the field.
397
398 Returns
399 -------
400 configurableField : `lsst.pex.config.ConfigurableField`
401 A `~lsst.pex.config.ConfigurableField` for Apdb.
402 """
403 return ConfigurableField(doc=doc, target=cls)
404
405 @property
406 @abstractmethod
407 def metadata(self) -> ApdbMetadata:
408 """Object controlling access to APDB metadata (`ApdbMetadata`)."""
409 raise NotImplementedError()
ApdbMetadata metadata(self)
Definition apdb.py:407
ConfigurableField makeField(cls, str doc)
Definition apdb.py:390
bool containsVisitDetector(self, int visit, int detector)
Definition apdb.py:258
pandas.DataFrame getSSObjects(self)
Definition apdb.py:276
pandas.DataFrame getDiaObjects(self, Region region)
Definition apdb.py:156
pandas.DataFrame|None getDiaSources(self, Region region, Iterable[int]|None object_ids, astropy.time.Time visit_time)
Definition apdb.py:182
Apdb from_uri(cls, ResourcePathExpression uri)
Definition apdb.py:107
None dailyJob(self)
Definition apdb.py:363
Apdb from_config(cls, ApdbConfig config)
Definition apdb.py:90
None store(self, astropy.time.Time visit_time, pandas.DataFrame objects, pandas.DataFrame|None sources=None, pandas.DataFrame|None forced_sources=None)
Definition apdb.py:294
Table|None tableDef(self, ApdbTables table)
Definition apdb.py:139
None storeSSObjects(self, pandas.DataFrame objects)
Definition apdb.py:330
int countUnassociatedObjects(self)
Definition apdb.py:372
None reassignDiaSources(self, Mapping[int, int] idMap)
Definition apdb.py:346
pandas.DataFrame|None getDiaForcedSources(self, Region region, Iterable[int]|None object_ids, astropy.time.Time visit_time)
Definition apdb.py:218
str _data_file_name(str basename)
Definition apdb.py:46