LSST Applications 26.0.0,g0265f82a02+6660c170cc,g07994bdeae+30b05a742e,g0a0026dc87+17526d298f,g0a60f58ba1+17526d298f,g0e4bf8285c+96dd2c2ea9,g0ecae5effc+c266a536c8,g1e7d6db67d+6f7cb1f4bb,g26482f50c6+6346c0633c,g2bbee38e9b+6660c170cc,g2cc88a2952+0a4e78cd49,g3273194fdb+f6908454ef,g337abbeb29+6660c170cc,g337c41fc51+9a8f8f0815,g37c6e7c3d5+7bbafe9d37,g44018dc512+6660c170cc,g4a941329ef+4f7594a38e,g4c90b7bd52+5145c320d2,g58be5f913a+bea990ba40,g635b316a6c+8d6b3a3e56,g67924a670a+bfead8c487,g6ae5381d9b+81bc2a20b4,g93c4d6e787+26b17396bd,g98cecbdb62+ed2cb6d659,g98ffbb4407+81bc2a20b4,g9ddcbc5298+7f7571301f,ga1e77700b3+99e9273977,gae46bcf261+6660c170cc,gb2715bf1a1+17526d298f,gc86a011abf+17526d298f,gcf0d15dbbd+96dd2c2ea9,gdaeeff99f8+0d8dbea60f,gdb4ec4c597+6660c170cc,ge23793e450+96dd2c2ea9,gf041782ebf+171108ac67
LSST Data Management Base Package
Loading...
Searching...
No Matches
apdb.py
Go to the documentation of this file.
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22from __future__ import annotations
23
24__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"]
25
26import os
27from abc import ABC, abstractmethod
28from collections.abc import Iterable, Mapping
29from dataclasses import dataclass
30from typing import Optional
31from uuid import UUID, uuid4
32
33import lsst.daf.base as dafBase
34import pandas
35from felis.simple import Table
36from lsst.pex.config import Config, ConfigurableField, Field
37from lsst.sphgeom import Region
38
39from .apdbSchema import ApdbTables
40
41
42def _data_file_name(basename: str) -> str:
43 """Return path name of a data file in sdm_schemas package."""
44 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename)
45
46
48 """Part of Apdb configuration common to all implementations."""
49
50 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12)
51 read_forced_sources_months = Field[int](
52 doc="Number of months of history to read from DiaForcedSource", default=12
53 )
54 schema_file = Field[str](
55 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml")
56 )
57 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema")
58 extra_schema_file = Field[str](
59 doc="Location of (YAML) configuration file with extra schema, "
60 "definitions in this file are merged with the definitions in "
61 "'schema_file', extending or replacing parts of the schema.",
62 default=None,
63 optional=True,
64 deprecated="This field is deprecated, its value is not used.",
65 )
66 use_insert_id = Field[bool](
67 doc=(
68 "If True (default), make and fill additional tables used for getHistory methods. "
69 "Databases created with earlier versions of APDB may not have these tables, "
70 "and corresponding methods will not work for them."
71 ),
72 default=True,
73 )
74
75
76class ApdbTableData(ABC):
77 """Abstract class for representing table data."""
78
79 @abstractmethod
80 def column_names(self) -> list[str]:
81 """Return ordered sequence of column names in the table.
82
83 Returns
84 -------
85 names : `list` [`str`]
86 Column names.
87 """
88 raise NotImplementedError()
89
90 @abstractmethod
91 def rows(self) -> Iterable[tuple]:
92 """Return table rows, each row is a tuple of values.
93
94 Returns
95 -------
96 rows : `iterable` [`tuple`]
97 Iterable of tuples.
98 """
99 raise NotImplementedError()
100
101
102@dataclass(frozen=True)
104 """Class used to identify single insert operation.
105
106 Instances of this class are used to identify the units of transfer from
107 APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to
108 `store` method.
109 """
110
111 id: UUID
112
113 @classmethod
114 def new_insert_id(cls) -> ApdbInsertId:
115 """Generate new unique insert identifier."""
116 return ApdbInsertId(id=uuid4())
117
118
119class Apdb(ABC):
120 """Abstract interface for APDB."""
121
122 ConfigClass = ApdbConfig
123
124 @abstractmethod
125 def tableDef(self, table: ApdbTables) -> Optional[Table]:
126 """Return table schema definition for a given table.
127
128 Parameters
129 ----------
130 table : `ApdbTables`
131 One of the known APDB tables.
132
133 Returns
134 -------
135 tableSchema : `felis.simple.Table` or `None`
136 Table schema description, `None` is returned if table is not
137 defined by this implementation.
138 """
139 raise NotImplementedError()
140
141 @abstractmethod
142 def makeSchema(self, drop: bool = False) -> None:
143 """Create or re-create whole database schema.
144
145 Parameters
146 ----------
147 drop : `bool`
148 If True then drop all tables before creating new ones.
149 """
150 raise NotImplementedError()
151
152 @abstractmethod
153 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
154 """Returns catalog of DiaObject instances from a given region.
155
156 This method returns only the last version of each DiaObject. Some
157 records in a returned catalog may be outside the specified region, it
158 is up to a client to ignore those records or cleanup the catalog before
159 futher use.
160
161 Parameters
162 ----------
163 region : `lsst.sphgeom.Region`
164 Region to search for DIAObjects.
165
166 Returns
167 -------
168 catalog : `pandas.DataFrame`
169 Catalog containing DiaObject records for a region that may be a
170 superset of the specified region.
171 """
172 raise NotImplementedError()
173
174 @abstractmethod
176 self, region: Region, object_ids: Optional[Iterable[int]], visit_time: dafBase.DateTime
177 ) -> Optional[pandas.DataFrame]:
178 """Return catalog of DiaSource instances from a given region.
179
180 Parameters
181 ----------
182 region : `lsst.sphgeom.Region`
183 Region to search for DIASources.
184 object_ids : iterable [ `int` ], optional
185 List of DiaObject IDs to further constrain the set of returned
186 sources. If `None` then returned sources are not constrained. If
187 list is empty then empty catalog is returned with a correct
188 schema.
189 visit_time : `lsst.daf.base.DateTime`
190 Time of the current visit.
191
192 Returns
193 -------
194 catalog : `pandas.DataFrame`, or `None`
195 Catalog containing DiaSource records. `None` is returned if
196 ``read_sources_months`` configuration parameter is set to 0.
197
198 Notes
199 -----
200 This method returns DiaSource catalog for a region with additional
201 filtering based on DiaObject IDs. Only a subset of DiaSource history
202 is returned limited by ``read_sources_months`` config parameter, w.r.t.
203 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
204 always returned with the correct schema (columns/types). If
205 ``object_ids`` is `None` then no filtering is performed and some of the
206 returned records may be outside the specified region.
207 """
208 raise NotImplementedError()
209
210 @abstractmethod
212 self, region: Region, object_ids: Optional[Iterable[int]], visit_time: dafBase.DateTime
213 ) -> Optional[pandas.DataFrame]:
214 """Return catalog of DiaForcedSource instances from a given region.
215
216 Parameters
217 ----------
218 region : `lsst.sphgeom.Region`
219 Region to search for DIASources.
220 object_ids : iterable [ `int` ], optional
221 List of DiaObject IDs to further constrain the set of returned
222 sources. If list is empty then empty catalog is returned with a
223 correct schema. If `None` then returned sources are not
224 constrained. Some implementations may not support latter case.
225 visit_time : `lsst.daf.base.DateTime`
226 Time of the current visit.
227
228 Returns
229 -------
230 catalog : `pandas.DataFrame`, or `None`
231 Catalog containing DiaSource records. `None` is returned if
232 ``read_forced_sources_months`` configuration parameter is set to 0.
233
234 Raises
235 ------
236 NotImplementedError
237 May be raised by some implementations if ``object_ids`` is `None`.
238
239 Notes
240 -----
241 This method returns DiaForcedSource catalog for a region with additional
242 filtering based on DiaObject IDs. Only a subset of DiaSource history
243 is returned limited by ``read_forced_sources_months`` config parameter,
244 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog
245 is always returned with the correct schema (columns/types). If
246 ``object_ids`` is `None` then no filtering is performed and some of the
247 returned records may be outside the specified region.
248 """
249 raise NotImplementedError()
250
251 @abstractmethod
252 def getInsertIds(self) -> list[ApdbInsertId] | None:
253 """Return collection of insert identifiers known to the database.
254
255 Returns
256 -------
257 ids : `list` [`ApdbInsertId`] or `None`
258 List of identifiers, they may be time-ordered if database supports
259 ordering. `None` is returned if database is not configured to store
260 insert identifiers.
261 """
262 raise NotImplementedError()
263
264 @abstractmethod
265 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None:
266 """Remove insert identifiers from the database.
267
268 Parameters
269 -------
270 ids : `iterable` [`ApdbInsertId`]
271 Insert identifiers, can include items returned from `getInsertIds`.
272
273 Notes
274 -----
275 This method causes Apdb to forget about specified identifiers. If there
276 are any auxiliary data associated with the identifiers, it is also
277 removed from database (but data in regular tables is not removed).
278 This method should be called after successful transfer of data from
279 APDB to PPDB to free space used by history.
280 """
281 raise NotImplementedError()
282
283 @abstractmethod
284 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
285 """Returns catalog of DiaObject instances from a given time period
286 including the history of each DiaObject.
287
288 Parameters
289 ----------
290 ids : `iterable` [`ApdbInsertId`]
291 Insert identifiers, can include items returned from `getInsertIds`.
292
293 Returns
294 -------
295 data : `ApdbTableData`
296 Catalog containing DiaObject records. In addition to all regular
297 columns it will contain ``insert_id`` column.
298
299 Notes
300 -----
301 This part of API may not be very stable and can change before the
302 implementation finalizes.
303 """
304 raise NotImplementedError()
305
306 @abstractmethod
307 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
308 """Returns catalog of DiaSource instances from a given time period.
309
310 Parameters
311 ----------
312 ids : `iterable` [`ApdbInsertId`]
313 Insert identifiers, can include items returned from `getInsertIds`.
314
315 Returns
316 -------
317 data : `ApdbTableData`
318 Catalog containing DiaSource records. In addition to all regular
319 columns it will contain ``insert_id`` column.
320
321 Notes
322 -----
323 This part of API may not be very stable and can change before the
324 implementation finalizes.
325 """
326 raise NotImplementedError()
327
328 @abstractmethod
329 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
330 """Returns catalog of DiaForcedSource instances from a given time
331 period.
332
333 Parameters
334 ----------
335 ids : `iterable` [`ApdbInsertId`]
336 Insert identifiers, can include items returned from `getInsertIds`.
337
338 Returns
339 -------
340 data : `ApdbTableData`
341 Catalog containing DiaForcedSource records. In addition to all
342 regular columns it will contain ``insert_id`` column.
343
344 Notes
345 -----
346 This part of API may not be very stable and can change before the
347 implementation finalizes.
348 """
349 raise NotImplementedError()
350
351 @abstractmethod
352 def getSSObjects(self) -> pandas.DataFrame:
353 """Returns catalog of SSObject instances.
354
355 Returns
356 -------
357 catalog : `pandas.DataFrame`
358 Catalog containing SSObject records, all existing records are
359 returned.
360 """
361 raise NotImplementedError()
362
363 @abstractmethod
364 def store(
365 self,
366 visit_time: dafBase.DateTime,
367 objects: pandas.DataFrame,
368 sources: Optional[pandas.DataFrame] = None,
369 forced_sources: Optional[pandas.DataFrame] = None,
370 ) -> None:
371 """Store all three types of catalogs in the database.
372
373 Parameters
374 ----------
375 visit_time : `lsst.daf.base.DateTime`
376 Time of the visit.
377 objects : `pandas.DataFrame`
378 Catalog with DiaObject records.
379 sources : `pandas.DataFrame`, optional
380 Catalog with DiaSource records.
381 forced_sources : `pandas.DataFrame`, optional
382 Catalog with DiaForcedSource records.
383
384 Notes
385 -----
386 This methods takes DataFrame catalogs, their schema must be
387 compatible with the schema of APDB table:
388
389 - column names must correspond to database table columns
390 - types and units of the columns must match database definitions,
391 no unit conversion is performed presently
392 - columns that have default values in database schema can be
393 omitted from catalog
394 - this method knows how to fill interval-related columns of DiaObject
395 (validityStart, validityEnd) they do not need to appear in a
396 catalog
397 - source catalogs have ``diaObjectId`` column associating sources
398 with objects
399 """
400 raise NotImplementedError()
401
402 @abstractmethod
403 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
404 """Store or update SSObject catalog.
405
406 Parameters
407 ----------
408 objects : `pandas.DataFrame`
409 Catalog with SSObject records.
410
411 Notes
412 -----
413 If SSObjects with matching IDs already exist in the database, their
414 records will be updated with the information from provided records.
415 """
416 raise NotImplementedError()
417
418 @abstractmethod
419 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
420 """Associate DiaSources with SSObjects, dis-associating them
421 from DiaObjects.
422
423 Parameters
424 ----------
425 idMap : `Mapping`
426 Maps DiaSource IDs to their new SSObject IDs.
427
428 Raises
429 ------
430 ValueError
431 Raised if DiaSource ID does not exist in the database.
432 """
433 raise NotImplementedError()
434
435 @abstractmethod
436 def dailyJob(self) -> None:
437 """Implement daily activities like cleanup/vacuum.
438
439 What should be done during daily activities is determined by
440 specific implementation.
441 """
442 raise NotImplementedError()
443
444 @abstractmethod
445 def countUnassociatedObjects(self) -> int:
446 """Return the number of DiaObjects that have only one DiaSource
447 associated with them.
448
449 Used as part of ap_verify metrics.
450
451 Returns
452 -------
453 count : `int`
454 Number of DiaObjects with exactly one associated DiaSource.
455
456 Notes
457 -----
458 This method can be very inefficient or slow in some implementations.
459 """
460 raise NotImplementedError()
461
462 @classmethod
463 def makeField(cls, doc: str) -> ConfigurableField:
464 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
465
466 Parameters
467 ----------
468 doc : `str`
469 Help text for the field.
470
471 Returns
472 -------
473 configurableField : `lsst.pex.config.ConfigurableField`
475 """
476 return ConfigurableField(doc=doc, target=cls)
table::Key< int > from
table::Key< int > to
Class for handling dates/times, including MJD, UTC, and TAI.
Definition DateTime.h:64
ConfigurableField makeField(cls, str doc)
Definition apdb.py:463
None deleteInsertIds(self, Iterable[ApdbInsertId] ids)
Definition apdb.py:265
Optional[pandas.DataFrame] getDiaSources(self, Region region, Optional[Iterable[int]] object_ids, dafBase.DateTime visit_time)
Definition apdb.py:177
list[ApdbInsertId]|None getInsertIds(self)
Definition apdb.py:252
None store(self, dafBase.DateTime visit_time, pandas.DataFrame objects, Optional[pandas.DataFrame] sources=None, Optional[pandas.DataFrame] forced_sources=None)
Definition apdb.py:370
Optional[Table] tableDef(self, ApdbTables table)
Definition apdb.py:125
pandas.DataFrame getSSObjects(self)
Definition apdb.py:352
pandas.DataFrame getDiaObjects(self, Region region)
Definition apdb.py:153
ApdbTableData getDiaSourcesHistory(self, Iterable[ApdbInsertId] ids)
Definition apdb.py:307
Optional[pandas.DataFrame] getDiaForcedSources(self, Region region, Optional[Iterable[int]] object_ids, dafBase.DateTime visit_time)
Definition apdb.py:213
None dailyJob(self)
Definition apdb.py:436
ApdbTableData getDiaObjectsHistory(self, Iterable[ApdbInsertId] ids)
Definition apdb.py:284
None makeSchema(self, bool drop=False)
Definition apdb.py:142
None storeSSObjects(self, pandas.DataFrame objects)
Definition apdb.py:403
int countUnassociatedObjects(self)
Definition apdb.py:445
None reassignDiaSources(self, Mapping[int, int] idMap)
Definition apdb.py:419
ApdbTableData getDiaForcedSourcesHistory(self, Iterable[ApdbInsertId] ids)
Definition apdb.py:329
ApdbInsertId new_insert_id(cls)
Definition apdb.py:114
Iterable[tuple] rows(self)
Definition apdb.py:91
list[str] column_names(self)
Definition apdb.py:80
Region is a minimal interface for 2-dimensional regions on the unit sphere.
Definition Region.h:79
str _data_file_name(str basename)
Definition apdb.py:42