LSST Applications  21.0.0-172-gfb10e10a+18fedfabac,22.0.0+297cba6710,22.0.0+80564b0ff1,22.0.0+8d77f4f51a,22.0.0+a28f4c53b1,22.0.0+dcf3732eb2,22.0.1-1-g7d6de66+2a20fdde0d,22.0.1-1-g8e32f31+297cba6710,22.0.1-1-geca5380+7fa3b7d9b6,22.0.1-12-g44dc1dc+2a20fdde0d,22.0.1-15-g6a90155+515f58c32b,22.0.1-16-g9282f48+790f5f2caa,22.0.1-2-g92698f7+dcf3732eb2,22.0.1-2-ga9b0f51+7fa3b7d9b6,22.0.1-2-gd1925c9+bf4f0e694f,22.0.1-24-g1ad7a390+a9625a72a8,22.0.1-25-g5bf6245+3ad8ecd50b,22.0.1-25-gb120d7b+8b5510f75f,22.0.1-27-g97737f7+2a20fdde0d,22.0.1-32-gf62ce7b1+aa4237961e,22.0.1-4-g0b3f228+2a20fdde0d,22.0.1-4-g243d05b+871c1b8305,22.0.1-4-g3a563be+32dcf1063f,22.0.1-4-g44f2e3d+9e4ab0f4fa,22.0.1-42-gca6935d93+ba5e5ca3eb,22.0.1-5-g15c806e+85460ae5f3,22.0.1-5-g58711c4+611d128589,22.0.1-5-g75bb458+99c117b92f,22.0.1-6-g1c63a23+7fa3b7d9b6,22.0.1-6-g50866e6+84ff5a128b,22.0.1-6-g8d3140d+720564cf76,22.0.1-6-gd805d02+cc5644f571,22.0.1-8-ge5750ce+85460ae5f3,master-g6e05de7fdc+babf819c66,master-g99da0e417a+8d77f4f51a,w.2021.48
LSST Data Management Base Package
apdb.py
Go to the documentation of this file.
1 # This file is part of dax_apdb.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 
22 from __future__ import annotations
23 
24 __all__ = ["ApdbConfig", "Apdb"]
25 
26 from abc import ABC, abstractmethod
27 import os
28 import pandas
29 from typing import Iterable, Optional
30 
31 import lsst.daf.base as dafBase
32 from lsst.pex.config import Config, ConfigurableField, Field
33 from lsst.sphgeom import Region
34 from .apdbSchema import ApdbTables, TableDef
35 
36 
37 def _data_file_name(basename: str) -> str:
38  """Return path name of a data file in dax_apdb package.
39  """
40  return os.path.join("${DAX_APDB_DIR}", "data", basename)
41 
42 
44  """Part of Apdb configuration common to all implementations.
45  """
46  read_sources_months = Field(
47  dtype=int,
48  doc="Number of months of history to read from DiaSource",
49  default=12
50  )
51  read_forced_sources_months = Field(
52  dtype=int,
53  doc="Number of months of history to read from DiaForcedSource",
54  default=12
55  )
56  schema_file = Field(
57  dtype=str,
58  doc="Location of (YAML) configuration file with standard schema",
59  default=_data_file_name("apdb-schema.yaml")
60  )
61  extra_schema_file = Field(
62  dtype=str,
63  doc="Location of (YAML) configuration file with extra schema, "
64  "definitions in this file are merged with the definitions in "
65  "'schema_file', extending or replacing parts of the schema.",
66  default=_data_file_name("apdb-schema-extra.yaml")
67  )
68 
69 
70 class Apdb(ABC):
71  """Abstract interface for APDB.
72  """
73 
74  ConfigClass = ApdbConfig
75 
76  @abstractmethod
77  def tableDef(self, table: ApdbTables) -> Optional[TableDef]:
78  """Return table schema definition for a given table.
79 
80  Parameters
81  ----------
82  table : `ApdbTables`
83  One of the known APDB tables.
84 
85  Returns
86  -------
87  tableSchema : `TableDef` or `None`
88  Table schema description, `None` is returned if table is not
89  defined by this implementation.
90  """
91  raise NotImplementedError()
92 
93  @abstractmethod
94  def makeSchema(self, drop: bool = False) -> None:
95  """Create or re-create whole database schema.
96 
97  Parameters
98  ----------
99  drop : `bool`
100  If True then drop all tables before creating new ones.
101  """
102  raise NotImplementedError()
103 
104  @abstractmethod
105  def getDiaObjects(self, region: Region) -> pandas.DataFrame:
106  """Returns catalog of DiaObject instances from a given region.
107 
108  This method returns only the last version of each DiaObject. Some
109  records in a returned catalog may be outside the specified region, it
110  is up to a client to ignore those records or cleanup the catalog before
111  futher use.
112 
113  Parameters
114  ----------
115  region : `lsst.sphgeom.Region`
116  Region to search for DIAObjects.
117 
118  Returns
119  -------
120  catalog : `pandas.DataFrame`
121  Catalog containing DiaObject records for a region that may be a
122  superset of the specified region.
123  """
124  raise NotImplementedError()
125 
126  @abstractmethod
127  def getDiaSources(self, region: Region,
128  object_ids: Optional[Iterable[int]],
129  visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
130  """Return catalog of DiaSource instances from a given region.
131 
132  Parameters
133  ----------
134  region : `lsst.sphgeom.Region`
135  Region to search for DIASources.
136  object_ids : iterable [ `int` ], optional
137  List of DiaObject IDs to further constrain the set of returned
138  sources. If `None` then returned sources are not constrained. If
139  list is empty then empty catalog is returned with a correct
140  schema.
141  visit_time : `lsst.daf.base.DateTime`
142  Time of the current visit.
143 
144  Returns
145  -------
146  catalog : `pandas.DataFrame`, or `None`
147  Catalog containing DiaSource records. `None` is returned if
148  ``read_sources_months`` configuration parameter is set to 0.
149 
150  Notes
151  -----
152  This method returns DiaSource catalog for a region with additional
153  filtering based on DiaObject IDs. Only a subset of DiaSource history
154  is returned limited by ``read_sources_months`` config parameter, w.r.t.
155  ``visit_time``. If ``object_ids`` is empty then an empty catalog is
156  always returned with the correct schema (columns/types). If
157  ``object_ids`` is `None` then no filtering is performed and some of the
158  returned records may be outside the specified region.
159  """
160  raise NotImplementedError()
161 
162  @abstractmethod
163  def getDiaForcedSources(self, region: Region,
164  object_ids: Optional[Iterable[int]],
165  visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
166  """Return catalog of DiaForcedSource instances from a given region.
167 
168  Parameters
169  ----------
170  region : `lsst.sphgeom.Region`
171  Region to search for DIASources.
172  object_ids : iterable [ `int` ], optional
173  List of DiaObject IDs to further constrain the set of returned
174  sources. If list is empty then empty catalog is returned with a
175  correct schema. If `None` then returned sources are not
176  constrained. Some implementations may not support latter case.
177  visit_time : `lsst.daf.base.DateTime`
178  Time of the current visit.
179 
180  Returns
181  -------
182  catalog : `pandas.DataFrame`, or `None`
183  Catalog containing DiaSource records. `None` is returned if
184  ``read_forced_sources_months`` configuration parameter is set to 0.
185 
186  Raises
187  ------
188  NotImplementedError
189  May be raised by some implementations if ``object_ids`` is `None`.
190 
191  Notes
192  -----
193  This method returns DiaForcedSource catalog for a region with additional
194  filtering based on DiaObject IDs. Only a subset of DiaSource history
195  is returned limited by ``read_forced_sources_months`` config parameter,
196  w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog
197  is always returned with the correct schema (columns/types). If
198  ``object_ids`` is `None` then no filtering is performed and some of the
199  returned records may be outside the specified region.
200  """
201  raise NotImplementedError()
202 
203  @abstractmethod
204  def store(self,
205  visit_time: dafBase.DateTime,
206  objects: pandas.DataFrame,
207  sources: Optional[pandas.DataFrame] = None,
208  forced_sources: Optional[pandas.DataFrame] = None) -> None:
209  """Store all three types of catalogs in the database.
210 
211  Parameters
212  ----------
213  visit_time : `lsst.daf.base.DateTime`
214  Time of the visit.
215  objects : `pandas.DataFrame`
216  Catalog with DiaObject records.
217  sources : `pandas.DataFrame`, optional
218  Catalog with DiaSource records.
219  forced_sources : `pandas.DataFrame`, optional
220  Catalog with DiaForcedSource records.
221 
222  Notes
223  -----
224  This methods takes DataFrame catalogs, their schema must be
225  compatible with the schema of APDB table:
226 
227  - column names must correspond to database table columns
228  - types and units of the columns must match database definitions,
229  no unit conversion is performed presently
230  - columns that have default values in database schema can be
231  omitted from catalog
232  - this method knows how to fill interval-related columns of DiaObject
233  (validityStart, validityEnd) they do not need to appear in a
234  catalog
235  - source catalogs have ``diaObjectId`` column associating sources
236  with objects
237  """
238  raise NotImplementedError()
239 
240  @abstractmethod
241  def dailyJob(self) -> None:
242  """Implement daily activities like cleanup/vacuum.
243 
244  What should be done during daily activities is determined by
245  specific implementation.
246  """
247  raise NotImplementedError()
248 
249  @abstractmethod
250  def countUnassociatedObjects(self) -> int:
251  """Return the number of DiaObjects that have only one DiaSource
252  associated with them.
253 
254  Used as part of ap_verify metrics.
255 
256  Returns
257  -------
258  count : `int`
259  Number of DiaObjects with exactly one associated DiaSource.
260 
261  Notes
262  -----
263  This method can be very inefficient or slow in some implementations.
264  """
265  raise NotImplementedError()
266 
267  @classmethod
268  def makeField(cls, doc: str) -> ConfigurableField:
269  """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
270 
271  Parameters
272  ----------
273  doc : `str`
274  Help text for the field.
275 
276  Returns
277  -------
278  configurableField : `lsst.pex.config.ConfigurableField`
279  A `~lsst.pex.config.ConfigurableField` for Apdb.
280  """
281  return ConfigurableField(doc=doc, target=cls)
Class for handling dates/times, including MJD, UTC, and TAI.
Definition: DateTime.h:64
ConfigurableField makeField(cls, str doc)
Definition: apdb.py:268
Optional[pandas.DataFrame] getDiaSources(self, Region region, Optional[Iterable[int]] object_ids, dafBase.DateTime visit_time)
Definition: apdb.py:129
Optional[TableDef] tableDef(self, ApdbTables table)
Definition: apdb.py:77
None store(self, dafBase.DateTime visit_time, pandas.DataFrame objects, Optional[pandas.DataFrame] sources=None, Optional[pandas.DataFrame] forced_sources=None)
Definition: apdb.py:208
pandas.DataFrame getDiaObjects(self, Region region)
Definition: apdb.py:105
Optional[pandas.DataFrame] getDiaForcedSources(self, Region region, Optional[Iterable[int]] object_ids, dafBase.DateTime visit_time)
Definition: apdb.py:165
None dailyJob(self)
Definition: apdb.py:241
None makeSchema(self, bool drop=False)
Definition: apdb.py:94
int countUnassociatedObjects(self)
Definition: apdb.py:250