LSST Applications g0f08755f38+82efc23009,g12f32b3c4e+e7bdf1200e,g1653933729+a8ce1bb630,g1a0ca8cf93+50eff2b06f,g28da252d5a+52db39f6a5,g2bbee38e9b+37c5a29d61,g2bc492864f+37c5a29d61,g2cdde0e794+c05ff076ad,g3156d2b45e+41e33cbcdc,g347aa1857d+37c5a29d61,g35bb328faa+a8ce1bb630,g3a166c0a6a+37c5a29d61,g3e281a1b8c+fb992f5633,g414038480c+7f03dfc1b0,g41af890bb2+11b950c980,g5fbc88fb19+17cd334064,g6b1c1869cb+12dd639c9a,g781aacb6e4+a8ce1bb630,g80478fca09+72e9651da0,g82479be7b0+04c31367b4,g858d7b2824+82efc23009,g9125e01d80+a8ce1bb630,g9726552aa6+8047e3811d,ga5288a1d22+e532dc0a0b,gae0086650b+a8ce1bb630,gb58c049af0+d64f4d3760,gc28159a63d+37c5a29d61,gcf0d15dbbd+2acd6d4d48,gd7358e8bfb+778a810b6e,gda3e153d99+82efc23009,gda6a2b7d83+2acd6d4d48,gdaeeff99f8+1711a396fd,ge2409df99d+6b12de1076,ge79ae78c31+37c5a29d61,gf0baf85859+d0a5978c5a,gf3967379c6+4954f8c433,gfb92a5be7c+82efc23009,gfec2e1e490+2aaed99252,w.2024.46
LSST Data Management Base Package
Loading...
Searching...
No Matches
apdbSchema.py
Go to the documentation of this file.
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22"""Module containing methods and classes for generic APDB schema operations.
23
24The code in this module is independent of the specific technology used to
25implement APDB.
26"""
27
28from __future__ import annotations
29
30__all__ = ["ApdbTables", "ApdbSchema"]
31
32import enum
33import logging
34import os
35from collections.abc import Mapping, MutableMapping
36
37import felis.datamodel
38import numpy
39import yaml
40from felis.datamodel import Schema as FelisSchema
41
42from .schema_model import ExtraDataTypes, Schema, Table
43from .versionTuple import VersionTuple
44
45_LOG = logging.getLogger(__name__)
46
47# In most cases column types are determined by Cassandra driver, but in some
48# cases we need to create Pandas Dataframe ourselves and we use this map to
49# infer types of columns from their YAML schema. Note that Cassandra saves
50# timestamps with millisecond precision, but pandas maps datetime type to
51# "datetime64[ns]".
52_dtype_map: Mapping[felis.datamodel.DataType | ExtraDataTypes, type | str] = {
53 felis.datamodel.DataType.double: numpy.float64,
54 felis.datamodel.DataType.float: numpy.float32,
55 felis.datamodel.DataType.timestamp: "datetime64[ns]",
56 felis.datamodel.DataType.long: numpy.int64,
57 felis.datamodel.DataType.int: numpy.int32,
58 felis.datamodel.DataType.short: numpy.int16,
59 felis.datamodel.DataType.byte: numpy.int8,
60 felis.datamodel.DataType.binary: object,
61 felis.datamodel.DataType.char: object,
62 felis.datamodel.DataType.text: object,
63 felis.datamodel.DataType.string: object,
64 felis.datamodel.DataType.unicode: object,
65 felis.datamodel.DataType.boolean: bool,
66}
67
68
69@enum.unique
70class ApdbTables(enum.Enum):
71 """Names of the tables in APDB schema."""
72
73 DiaObject = "DiaObject"
74 """Name of the table for DIAObject records."""
75
76 DiaSource = "DiaSource"
77 """Name of the table for DIASource records."""
78
79 DiaForcedSource = "DiaForcedSource"
80 """Name of the table for DIAForcedSource records."""
81
82 DiaObjectLast = "DiaObjectLast"
83 """Name of the table for the last version of DIAObject records.
84
85 This table may be optional for some implementations.
86 """
87
88 SSObject = "SSObject"
89 """Name of the table for SSObject records."""
90
91 DiaObject_To_Object_Match = "DiaObject_To_Object_Match"
92 """Name of the table for DiaObject_To_Object_Match records."""
93
94 metadata = "metadata"
95 """Name of the metadata table, this table may not always exist."""
96
97 def table_name(self, prefix: str = "") -> str:
98 """Return full table name."""
99 return prefix + self.value
100
101
103 """Class for management of APDB schema.
104
105 Attributes
106 ----------
107 tableSchemas : `dict`
108 Maps table name to `TableDef` instance.
109
110 Parameters
111 ----------
112 schema_file : `str`
113 Name of the YAML schema file.
114 schema_name : `str`, optional
115 Name of the schema in YAML files.
116 """
117
119 self,
120 schema_file: str,
121 schema_name: str = "ApdbSchema",
122 ):
123 # build complete table schema
124 self.tableSchemas, self._schemaVersion = self._buildSchemas(schema_file, schema_name)
125
126 def column_dtype(self, felis_type: felis.datamodel.DataType | ExtraDataTypes) -> type | str:
127 """Return Pandas data type for a given Felis column type.
128
129 Parameters
130 ----------
131 felis_type : `felis.datamodel.DataType`
132 Felis type, on of the enums defined in `felis.datamodel` module.
133
134 Returns
135 -------
136 column_dtype : `type` or `str`
137 Type that can be used for columns in Pandas.
138
139 Raises
140 ------
141 TypeError
142 Raised if type is cannot be handled.
143 """
144 try:
145 return _dtype_map[felis_type]
146 except KeyError:
147 raise TypeError(f"Unexpected Felis type: {felis_type}")
148
149 def schemaVersion(self) -> VersionTuple:
150 """Return schema version as defined in YAML schema file.
151
152 Returns
153 -------
154 version : `VersionTuple`
155 Version number read from YAML file, if YAML file does not define
156 schema version then "0.1.0" is returned.
157 """
158 if self._schemaVersion is None:
159 return VersionTuple(0, 1, 0)
160 else:
161 return self._schemaVersion
162
163 @classmethod
165 cls, schema_file: str, schema_name: str = "ApdbSchema"
166 ) -> tuple[Mapping[ApdbTables, Table], VersionTuple | None]:
167 """Create schema definitions for all tables.
168
169 Reads YAML schema and builds a dictionary containing
170 `.schema_model.Table` instances for each table.
171
172 Parameters
173 ----------
174 schema_file : `str`
175 Name of YAML file with ``felis`` schema.
176 schema_name : `str`, optional
177 Name of the schema in YAML files.
178
179 Returns
180 -------
181 tables : `dict`
182 Mapping of table names to `.schema_model.Table` instances.
183 version : `VersionTuple` or `None`
184 Schema version defined in schema file, `None` if version is not
185 defined.
186 """
187 schema_file = os.path.expandvars(schema_file)
188 with open(schema_file) as yaml_stream:
189 schemas_list = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader))
190 schemas_list = [schema for schema in schemas_list if schema.get("name") == schema_name]
191 if not schemas_list:
192 raise ValueError(f"Schema file {schema_file!r} does not define schema {schema_name!r}")
193 elif len(schemas_list) > 1:
194 raise ValueError(f"Schema file {schema_file!r} defines multiple schemas {schema_name!r}")
195 felis_schema: FelisSchema = felis.datamodel.Schema.model_validate(schemas_list[0])
196 schema = Schema.from_felis(felis_schema)
197
198 # convert all dicts into classes
199 tables: MutableMapping[ApdbTables, Table] = {}
200 for table in schema.tables:
201 try:
202 table_enum = ApdbTables(table.name)
203 except ValueError:
204 # There may be other tables in the schema that do not belong
205 # to APDB.
206 continue
207 else:
208 tables[table_enum] = table
209
210 version: VersionTuple | None = None
211 if schema.version is not None:
212 version = VersionTuple.fromString(schema.version.current)
213
214 return tables, version
type|str column_dtype(self, felis.datamodel.DataType|ExtraDataTypes felis_type)
__init__(self, str schema_file, str schema_name="ApdbSchema")
tuple[Mapping[ApdbTables, Table], VersionTuple|None] _buildSchemas(cls, str schema_file, str schema_name="ApdbSchema")
str table_name(self, str prefix="")
Definition apdbSchema.py:97