LSST Applications g06d8191974+b5247657d3,g180d380827+b23588344e,g2079a07aa2+86d27d4dc4,g2305ad1205+0130fb9023,g29320951ab+7714a6b20a,g2bbee38e9b+0e5473021a,g337abbeb29+0e5473021a,g33d1c0ed96+0e5473021a,g3a166c0a6a+0e5473021a,g3ddfee87b4+8783ab7716,g48712c4677+72a8b1060b,g487adcacf7+bbaada240a,g50ff169b8f+96c6868917,g52b1c1532d+585e252eca,g591dd9f2cf+ecccb6240b,g5a732f18d5+53520f316c,g5ea96fc03c+33ab2bc355,g64a986408d+b5247657d3,g858d7b2824+b5247657d3,g8a8a8dda67+585e252eca,g99cad8db69+1453026da9,g9ddcbc5298+9a081db1e4,ga1e77700b3+15fc3df1f7,gb0e22166c9+60f28cb32d,gba4ed39666+c2a2e4ac27,gbb8dafda3b+3751ca9c65,gc120e1dc64+c91d1388df,gc28159a63d+0e5473021a,gc3e9b769f7+241adb7c58,gcf0d15dbbd+8783ab7716,gdaeeff99f8+f9a426f77a,ge6526c86ff+acdbe9a537,ge79ae78c31+0e5473021a,gee10cc3b42+585e252eca,gff1a9f87cc+b5247657d3,w.2024.17
LSST Data Management Base Package
Loading...
Searching...
No Matches
apdbSchema.py
Go to the documentation of this file.
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22"""Module containing methods and classes for generic APDB schema operations.
23
24The code in this module is independent of the specific technology used to
25implement APDB.
26"""
27
28from __future__ import annotations
29
30__all__ = ["ApdbTables", "ApdbSchema"]
31
32import enum
33import logging
34import os
35from collections.abc import Mapping, MutableMapping
36
37import felis.datamodel
38import numpy
39import yaml
40
41from .schema_model import ExtraDataTypes, Schema, Table
42from .versionTuple import VersionTuple
43
44_LOG = logging.getLogger(__name__)
45
46# In most cases column types are determined by Cassandra driver, but in some
47# cases we need to create Pandas Dataframe ourselves and we use this map to
48# infer types of columns from their YAML schema.
49_dtype_map: Mapping[felis.datamodel.DataType | ExtraDataTypes, type | str] = {
50 felis.datamodel.DataType.double: numpy.float64,
51 felis.datamodel.DataType.float: numpy.float32,
52 felis.datamodel.DataType.timestamp: "datetime64[ms]",
53 felis.datamodel.DataType.long: numpy.int64,
54 felis.datamodel.DataType.int: numpy.int32,
55 felis.datamodel.DataType.short: numpy.int16,
56 felis.datamodel.DataType.byte: numpy.int8,
57 felis.datamodel.DataType.binary: object,
58 felis.datamodel.DataType.char: object,
59 felis.datamodel.DataType.text: object,
60 felis.datamodel.DataType.string: object,
61 felis.datamodel.DataType.unicode: object,
62 felis.datamodel.DataType.boolean: bool,
63}
64
65
66@enum.unique
67class ApdbTables(enum.Enum):
68 """Names of the tables in APDB schema."""
69
70 DiaObject = "DiaObject"
71 """Name of the table for DIAObject records."""
72
73 DiaSource = "DiaSource"
74 """Name of the table for DIASource records."""
75
76 DiaForcedSource = "DiaForcedSource"
77 """Name of the table for DIAForcedSource records."""
78
79 DiaObjectLast = "DiaObjectLast"
80 """Name of the table for the last version of DIAObject records.
81
82 This table may be optional for some implementations.
83 """
84
85 SSObject = "SSObject"
86 """Name of the table for SSObject records."""
87
88 DiaObject_To_Object_Match = "DiaObject_To_Object_Match"
89 """Name of the table for DiaObject_To_Object_Match records."""
90
91 metadata = "metadata"
92 """Name of the metadata table, this table may not always exist."""
93
94 def table_name(self, prefix: str = "") -> str:
95 """Return full table name."""
96 return prefix + self.value
97
98
100 """Class for management of APDB schema.
101
102 Attributes
103 ----------
104 tableSchemas : `dict`
105 Maps table name to `TableDef` instance.
106
107 Parameters
108 ----------
109 schema_file : `str`
110 Name of the YAML schema file.
111 schema_name : `str`, optional
112 Name of the schema in YAML files.
113 """
114
116 self,
117 schema_file: str,
118 schema_name: str = "ApdbSchema",
119 ):
120 # build complete table schema
121 self.tableSchemas, self._schemaVersion = self._buildSchemas(schema_file, schema_name)
122
123 def column_dtype(self, felis_type: felis.datamodel.DataType | ExtraDataTypes) -> type | str:
124 """Return Pandas data type for a given Felis column type.
125
126 Parameters
127 ----------
128 felis_type : `felis.datamodel.DataType`
129 Felis type, on of the enums defined in `felis.datamodel` module.
130
131 Returns
132 -------
133 column_dtype : `type` or `str`
134 Type that can be used for columns in Pandas.
135
136 Raises
137 ------
138 TypeError
139 Raised if type is cannot be handled.
140 """
141 try:
142 return _dtype_map[felis_type]
143 except KeyError:
144 raise TypeError(f"Unexpected Felis type: {felis_type}")
145
146 def schemaVersion(self) -> VersionTuple:
147 """Return schema version as defined in YAML schema file.
148
149 Returns
150 -------
151 version : `VersionTuple`
152 Version number read from YAML file, if YAML file does not define
153 schema version then "0.1.0" is returned.
154 """
155 if self._schemaVersion is None:
156 return VersionTuple(0, 1, 0)
157 else:
158 return self._schemaVersion
159
160 @classmethod
162 cls, schema_file: str, schema_name: str = "ApdbSchema"
163 ) -> tuple[Mapping[ApdbTables, Table], VersionTuple | None]:
164 """Create schema definitions for all tables.
165
166 Reads YAML schema and builds a dictionary containing
167 `.schema_model.Table` instances for each table.
168
169 Parameters
170 ----------
171 schema_file : `str`
172 Name of YAML file with ``felis`` schema.
173 schema_name : `str`, optional
174 Name of the schema in YAML files.
175
176 Returns
177 -------
178 tables : `dict`
179 Mapping of table names to `.schema_model.Table` instances.
180 version : `VersionTuple` or `None`
181 Schema version defined in schema file, `None` if version is not
182 defined.
183 """
184 schema_file = os.path.expandvars(schema_file)
185 with open(schema_file) as yaml_stream:
186 schemas_list = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader))
187 schemas_list = [schema for schema in schemas_list if schema.get("name") == schema_name]
188 if not schemas_list:
189 raise ValueError(f"Schema file {schema_file!r} does not define schema {schema_name!r}")
190 elif len(schemas_list) > 1:
191 raise ValueError(f"Schema file {schema_file!r} defines multiple schemas {schema_name!r}")
192 felis_schema = felis.datamodel.Schema.model_validate(schemas_list[0])
193 schema = Schema.from_felis(felis_schema)
194
195 # convert all dicts into classes
196 tables: MutableMapping[ApdbTables, Table] = {}
197 for table in schema.tables:
198 try:
199 table_enum = ApdbTables(table.name)
200 except ValueError:
201 # There may be other tables in the schema that do not belong
202 # to APDB.
203 continue
204 else:
205 tables[table_enum] = table
206
207 version: VersionTuple | None = None
208 if schema.version is not None:
209 version = VersionTuple.fromString(schema.version.current)
210
211 return tables, version
type|str column_dtype(self, felis.datamodel.DataType|ExtraDataTypes felis_type)
__init__(self, str schema_file, str schema_name="ApdbSchema")
tuple[Mapping[ApdbTables, Table], VersionTuple|None] _buildSchemas(cls, str schema_file, str schema_name="ApdbSchema")
str table_name(self, str prefix="")
Definition apdbSchema.py:94