LSST Applications g0b6bd0c080+a72a5dd7e6,g1182afd7b4+2a019aa3bb,g17e5ecfddb+2b8207f7de,g1d67935e3f+06cf436103,g38293774b4+ac198e9f13,g396055baef+6a2097e274,g3b44f30a73+6611e0205b,g480783c3b1+98f8679e14,g48ccf36440+89c08d0516,g4b93dc025c+98f8679e14,g5c4744a4d9+a302e8c7f0,g613e996a0d+e1c447f2e0,g6c8d09e9e7+25247a063c,g7271f0639c+98f8679e14,g7a9cd813b8+124095ede6,g9d27549199+a302e8c7f0,ga1cf026fa3+ac198e9f13,ga32aa97882+7403ac30ac,ga786bb30fb+7a139211af,gaa63f70f4e+9994eb9896,gabf319e997+ade567573c,gba47b54d5d+94dc90c3ea,gbec6a3398f+06cf436103,gc6308e37c7+07dd123edb,gc655b1545f+ade567573c,gcc9029db3c+ab229f5caf,gd01420fc67+06cf436103,gd877ba84e5+06cf436103,gdb4cecd868+6f279b5b48,ge2d134c3d5+cc4dbb2e3f,ge448b5faa6+86d1ceac1d,gecc7e12556+98f8679e14,gf3ee170dca+25247a063c,gf4ac96e456+ade567573c,gf9f5ea5b4d+ac198e9f13,gff490e6085+8c2580be5c,w.2022.27
LSST Data Management Base Package
apdb.py
Go to the documentation of this file.
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22from __future__ import annotations
23
24__all__ = ["ApdbConfig", "Apdb"]
25
26from abc import ABC, abstractmethod
27import os
28import pandas
29from typing import Iterable, Mapping, Optional
30
31import lsst.daf.base as dafBase
32from lsst.pex.config import Config, ConfigurableField, Field
33from lsst.sphgeom import Region
34from .apdbSchema import ApdbTables, TableDef
35
36
37def _data_file_name(basename: str) -> str:
38 """Return path name of a data file in sdm_schemas package.
39 """
40 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename)
41
42
44 """Part of Apdb configuration common to all implementations.
45 """
46 read_sources_months = Field(
47 dtype=int,
48 doc="Number of months of history to read from DiaSource",
49 default=12
50 )
51 read_forced_sources_months = Field(
52 dtype=int,
53 doc="Number of months of history to read from DiaForcedSource",
54 default=12
55 )
56 schema_file = Field(
57 dtype=str,
58 doc="Location of (YAML) configuration file with standard schema",
59 default=_data_file_name("apdb.yaml")
60 )
61 schema_name = Field(
62 dtype=str,
63 doc="Name of the schema in YAML configuration file.",
64 default="ApdbSchema"
65 )
66 extra_schema_file = Field(
67 dtype=str,
68 doc="Location of (YAML) configuration file with extra schema, "
69 "definitions in this file are merged with the definitions in "
70 "'schema_file', extending or replacing parts of the schema.",
71 default=None,
72 optional=True,
73 deprecated="This field is deprecated, its value is not used."
74 )
75
76
77class Apdb(ABC):
78 """Abstract interface for APDB.
79 """
80
81 ConfigClass = ApdbConfig
82
83 @abstractmethod
84 def tableDef(self, table: ApdbTables) -> Optional[TableDef]:
85 """Return table schema definition for a given table.
86
87 Parameters
88 ----------
89 table : `ApdbTables`
90 One of the known APDB tables.
91
92 Returns
93 -------
94 tableSchema : `TableDef` or `None`
95 Table schema description, `None` is returned if table is not
96 defined by this implementation.
97 """
98 raise NotImplementedError()
99
100 @abstractmethod
101 def makeSchema(self, drop: bool = False) -> None:
102 """Create or re-create whole database schema.
103
104 Parameters
105 ----------
106 drop : `bool`
107 If True then drop all tables before creating new ones.
108 """
109 raise NotImplementedError()
110
111 @abstractmethod
112 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
113 """Returns catalog of DiaObject instances from a given region.
114
115 This method returns only the last version of each DiaObject. Some
116 records in a returned catalog may be outside the specified region, it
117 is up to a client to ignore those records or cleanup the catalog before
118 futher use.
119
120 Parameters
121 ----------
122 region : `lsst.sphgeom.Region`
123 Region to search for DIAObjects.
124
125 Returns
126 -------
127 catalog : `pandas.DataFrame`
128 Catalog containing DiaObject records for a region that may be a
129 superset of the specified region.
130 """
131 raise NotImplementedError()
132
133 @abstractmethod
134 def getDiaSources(self, region: Region,
135 object_ids: Optional[Iterable[int]],
136 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
137 """Return catalog of DiaSource instances from a given region.
138
139 Parameters
140 ----------
141 region : `lsst.sphgeom.Region`
142 Region to search for DIASources.
143 object_ids : iterable [ `int` ], optional
144 List of DiaObject IDs to further constrain the set of returned
145 sources. If `None` then returned sources are not constrained. If
146 list is empty then empty catalog is returned with a correct
147 schema.
148 visit_time : `lsst.daf.base.DateTime`
149 Time of the current visit.
150
151 Returns
152 -------
153 catalog : `pandas.DataFrame`, or `None`
154 Catalog containing DiaSource records. `None` is returned if
155 ``read_sources_months`` configuration parameter is set to 0.
156
157 Notes
158 -----
159 This method returns DiaSource catalog for a region with additional
160 filtering based on DiaObject IDs. Only a subset of DiaSource history
161 is returned limited by ``read_sources_months`` config parameter, w.r.t.
162 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
163 always returned with the correct schema (columns/types). If
164 ``object_ids`` is `None` then no filtering is performed and some of the
165 returned records may be outside the specified region.
166 """
167 raise NotImplementedError()
168
169 @abstractmethod
170 def getDiaForcedSources(self, region: Region,
171 object_ids: Optional[Iterable[int]],
172 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
173 """Return catalog of DiaForcedSource instances from a given region.
174
175 Parameters
176 ----------
177 region : `lsst.sphgeom.Region`
178 Region to search for DIASources.
179 object_ids : iterable [ `int` ], optional
180 List of DiaObject IDs to further constrain the set of returned
181 sources. If list is empty then empty catalog is returned with a
182 correct schema. If `None` then returned sources are not
183 constrained. Some implementations may not support latter case.
184 visit_time : `lsst.daf.base.DateTime`
185 Time of the current visit.
186
187 Returns
188 -------
189 catalog : `pandas.DataFrame`, or `None`
190 Catalog containing DiaSource records. `None` is returned if
191 ``read_forced_sources_months`` configuration parameter is set to 0.
192
193 Raises
194 ------
195 NotImplementedError
196 May be raised by some implementations if ``object_ids`` is `None`.
197
198 Notes
199 -----
200 This method returns DiaForcedSource catalog for a region with additional
201 filtering based on DiaObject IDs. Only a subset of DiaSource history
202 is returned limited by ``read_forced_sources_months`` config parameter,
203 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog
204 is always returned with the correct schema (columns/types). If
205 ``object_ids`` is `None` then no filtering is performed and some of the
206 returned records may be outside the specified region.
207 """
208 raise NotImplementedError()
209
210 @abstractmethod
212 start_time: dafBase.DateTime,
213 end_time: dafBase.DateTime,
214 region: Optional[Region] = None) -> pandas.DataFrame:
215 """Returns catalog of DiaObject instances from a given time period
216 including the history of each DiaObject.
217
218 Parameters
219 ----------
220 start_time : `dafBase.DateTime`
221 Starting time for DiaObject history search. DiaObject record is
222 selected when its ``validityStart`` falls into an interval
223 between ``start_time`` (inclusive) and ``end_time`` (exclusive).
224 end_time : `dafBase.DateTime`
225 Upper limit on time for DiaObject history search.
226 region : `lsst.sphgeom.Region`, optional
227 Region to search for DiaObjects, if not specified then whole sky
228 is searched. If region is specified then some returned records may
229 fall outside of this region.
230
231 Returns
232 -------
233 catalog : `pandas.DataFrame`
234 Catalog containing DiaObject records.
235
236 Notes
237 -----
238 This part of API may not be very stable and can change before the
239 implementation finalizes.
240 """
241 raise NotImplementedError()
242
243 @abstractmethod
245 start_time: dafBase.DateTime,
246 end_time: dafBase.DateTime,
247 region: Optional[Region] = None) -> pandas.DataFrame:
248 """Returns catalog of DiaSource instances from a given time period.
249
250 Parameters
251 ----------
252 start_time : `dafBase.DateTime`
253 Starting time for DiaSource history search. DiaSource record is
254 selected when its ``midPointTai`` falls into an interval between
255 ``start_time`` (inclusive) and ``end_time`` (exclusive).
256 end_time : `dafBase.DateTime`
257 Upper limit on time for DiaSource history search.
258 region : `lsst.sphgeom.Region`, optional
259 Region to search for DiaSources, if not specified then whole sky
260 is searched. If region is specified then some returned records may
261 fall outside of this region.
262
263 Returns
264 -------
265 catalog : `pandas.DataFrame`
266 Catalog containing DiaObject records.
267
268 Notes
269 -----
270 This part of API may not be very stable and can change before the
271 implementation finalizes.
272 """
273 raise NotImplementedError()
274
275 @abstractmethod
277 start_time: dafBase.DateTime,
278 end_time: dafBase.DateTime,
279 region: Optional[Region] = None) -> pandas.DataFrame:
280 """Returns catalog of DiaForcedSource instances from a given time
281 period.
282
283 Parameters
284 ----------
285 start_time : `dafBase.DateTime`
286 Starting time for DiaForcedSource history search. DiaForcedSource
287 record is selected when its ``midPointTai`` falls into an interval
288 between ``start_time`` (inclusive) and ``end_time`` (exclusive).
289 end_time : `dafBase.DateTime`
290 Upper limit on time for DiaForcedSource history search.
291 region : `lsst.sphgeom.Region`, optional
292 Region to search for DiaForcedSources, if not specified then whole
293 sky is searched. If region is specified then some returned records
294 may fall outside of this region.
295
296 Returns
297 -------
298 catalog : `pandas.DataFrame`
299 Catalog containing DiaObject records.
300
301 Notes
302 -----
303 This part of API may not be very stable and can change before the
304 implementation finalizes. Some implementations may not support region
305 filtering, they will return records from the whole sky.
306 """
307 raise NotImplementedError()
308
309 @abstractmethod
310 def getSSObjects(self) -> pandas.DataFrame:
311 """Returns catalog of SSObject instances.
312
313 Returns
314 -------
315 catalog : `pandas.DataFrame`
316 Catalog containing SSObject records, all existing records are
317 returned.
318 """
319 raise NotImplementedError()
320
321 @abstractmethod
322 def store(self,
323 visit_time: dafBase.DateTime,
324 objects: pandas.DataFrame,
325 sources: Optional[pandas.DataFrame] = None,
326 forced_sources: Optional[pandas.DataFrame] = None) -> None:
327 """Store all three types of catalogs in the database.
328
329 Parameters
330 ----------
331 visit_time : `lsst.daf.base.DateTime`
332 Time of the visit.
333 objects : `pandas.DataFrame`
334 Catalog with DiaObject records.
335 sources : `pandas.DataFrame`, optional
336 Catalog with DiaSource records.
337 forced_sources : `pandas.DataFrame`, optional
338 Catalog with DiaForcedSource records.
339
340 Notes
341 -----
342 This methods takes DataFrame catalogs, their schema must be
343 compatible with the schema of APDB table:
344
345 - column names must correspond to database table columns
346 - types and units of the columns must match database definitions,
347 no unit conversion is performed presently
348 - columns that have default values in database schema can be
349 omitted from catalog
350 - this method knows how to fill interval-related columns of DiaObject
351 (validityStart, validityEnd) they do not need to appear in a
352 catalog
353 - source catalogs have ``diaObjectId`` column associating sources
354 with objects
355 """
356 raise NotImplementedError()
357
358 @abstractmethod
359 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
360 """Store or update SSObject catalog.
361
362 Parameters
363 ----------
364 objects : `pandas.DataFrame`
365 Catalog with SSObject records.
366
367 Notes
368 -----
369 If SSObjects with matching IDs already exist in the database, their
370 records will be updated with the information from provided records.
371 """
372 raise NotImplementedError()
373
374 @abstractmethod
375 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
376 """Associate DiaSources with SSObjects, dis-associating them
377 from DiaObjects.
378
379 Parameters
380 ----------
381 idMap : `Mapping`
382 Maps DiaSource IDs to their new SSObject IDs.
383
384 Raises
385 ------
386 ValueError
387 Raised if DiaSource ID does not exist in the database.
388 """
389 raise NotImplementedError()
390
391 @abstractmethod
392 def dailyJob(self) -> None:
393 """Implement daily activities like cleanup/vacuum.
394
395 What should be done during daily activities is determined by
396 specific implementation.
397 """
398 raise NotImplementedError()
399
400 @abstractmethod
401 def countUnassociatedObjects(self) -> int:
402 """Return the number of DiaObjects that have only one DiaSource
403 associated with them.
404
405 Used as part of ap_verify metrics.
406
407 Returns
408 -------
409 count : `int`
410 Number of DiaObjects with exactly one associated DiaSource.
411
412 Notes
413 -----
414 This method can be very inefficient or slow in some implementations.
415 """
416 raise NotImplementedError()
417
418 @classmethod
419 def makeField(cls, doc: str) -> ConfigurableField:
420 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
421
422 Parameters
423 ----------
424 doc : `str`
425 Help text for the field.
426
427 Returns
428 -------
429 configurableField : `lsst.pex.config.ConfigurableField`
431 """
432 return ConfigurableField(doc=doc, target=cls)
Class for handling dates/times, including MJD, UTC, and TAI.
Definition: DateTime.h:64
ConfigurableField makeField(cls, str doc)
Definition: apdb.py:419
Optional[pandas.DataFrame] getDiaSources(self, Region region, Optional[Iterable[int]] object_ids, dafBase.DateTime visit_time)
Definition: apdb.py:136
Optional[TableDef] tableDef(self, ApdbTables table)
Definition: apdb.py:84
pandas.DataFrame getDiaSourcesHistory(self, dafBase.DateTime start_time, dafBase.DateTime end_time, Optional[Region] region=None)
Definition: apdb.py:247
None store(self, dafBase.DateTime visit_time, pandas.DataFrame objects, Optional[pandas.DataFrame] sources=None, Optional[pandas.DataFrame] forced_sources=None)
Definition: apdb.py:326
pandas.DataFrame getSSObjects(self)
Definition: apdb.py:310
pandas.DataFrame getDiaObjects(self, Region region)
Definition: apdb.py:112
Optional[pandas.DataFrame] getDiaForcedSources(self, Region region, Optional[Iterable[int]] object_ids, dafBase.DateTime visit_time)
Definition: apdb.py:172
None dailyJob(self)
Definition: apdb.py:392
None makeSchema(self, bool drop=False)
Definition: apdb.py:101
pandas.DataFrame getDiaForcedSourcesHistory(self, dafBase.DateTime start_time, dafBase.DateTime end_time, Optional[Region] region=None)
Definition: apdb.py:279
pandas.DataFrame getDiaObjectsHistory(self, dafBase.DateTime start_time, dafBase.DateTime end_time, Optional[Region] region=None)
Definition: apdb.py:214
None storeSSObjects(self, pandas.DataFrame objects)
Definition: apdb.py:359
int countUnassociatedObjects(self)
Definition: apdb.py:401
None reassignDiaSources(self, Mapping[int, int] idMap)
Definition: apdb.py:375
Region is a minimal interface for 2-dimensional regions on the unit sphere.
Definition: Region.h:79