Loading [MathJax]/extensions/tex2jax.js
LSST Applications g04a91732dc+cc8870d3f5,g07dc498a13+5aa0b8792f,g0fba68d861+80045be308,g1409bbee79+5aa0b8792f,g1a7e361dbc+5aa0b8792f,g1fd858c14a+f64bc332a9,g208c678f98+1ae86710ed,g35bb328faa+fcb1d3bbc8,g4d2262a081+47ad8a29a8,g4d39ba7253+9633a327c1,g4e0f332c67+5d362be553,g53246c7159+fcb1d3bbc8,g60b5630c4e+9633a327c1,g668ecb457e+25d63fd678,g78460c75b0+2f9a1b4bcd,g786e29fd12+cf7ec2a62a,g7b71ed6315+fcb1d3bbc8,g8852436030+8b64ca622a,g89139ef638+5aa0b8792f,g89e1512fd8+04325574d3,g8d6b6b353c+9633a327c1,g9125e01d80+fcb1d3bbc8,g989de1cb63+5aa0b8792f,g9f33ca652e+b196626af7,ga9baa6287d+9633a327c1,gaaedd4e678+5aa0b8792f,gabe3b4be73+1e0a283bba,gb1101e3267+71e32094df,gb58c049af0+f03b321e39,gb90eeb9370+2807b1ad02,gcf25f946ba+8b64ca622a,gd315a588df+a39986a76f,gd6cbbdb0b4+c8606af20c,gd9a9a58781+fcb1d3bbc8,gde0f65d7ad+4e42d81ab7,ge278dab8ac+932305ba37,ge82c20c137+76d20ab76d,gfe73954cf8+a1301e4c20,w.2025.11
LSST Data Management Base Package
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
apdbCassandraReplica.py
Go to the documentation of this file.
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22from __future__ import annotations
23
24__all__ = ["ApdbCassandraReplica"]
25
26import logging
27from collections.abc import Iterable, Mapping
28from typing import TYPE_CHECKING, Any, cast
29
30import astropy.time
31from lsst.utils.iteration import chunk_iterable
32
33from ..apdbReplica import ApdbReplica, ApdbTableData, ReplicaChunk
34from ..monitor import MonAgent
35from ..timer import Timer
36from ..versionTuple import VersionTuple
37from .apdbCassandraSchema import ApdbCassandraSchema, ExtraTables
38from .cassandra_utils import ApdbCassandraTableData, PreparedStatementCache
39
40if TYPE_CHECKING:
41 from .apdbCassandra import ApdbCassandra
42
43_LOG = logging.getLogger(__name__)
44
45_MON = MonAgent(__name__)
46
47VERSION = VersionTuple(1, 0, 0)
48"""Version for the code controlling replication tables. This needs to be
49updated following compatibility rules when schema produced by this code
50changes.
51"""
52
53
55 """Implementation of `ApdbReplica` for Cassandra backend.
56
57 Parameters
58 ----------
59 apdb : `ApdbCassandra`
60 Instance of ApbdCassandra for database.
61 schema : `ApdbCassandraSchema`
62 Instance of ApdbCassandraSchema for database.
63 session
64 Instance of cassandra session type.
65 """
66
67 def __init__(self, apdb: ApdbCassandra, schema: ApdbCassandraSchema, session: Any):
68 # Note that ApdbCassandra instance must stay alive while this object
69 # exists, so we keep reference to it.
70 self._apdb = apdb
71 self._schema = schema
72 self._session = session
73 self._config = apdb.config
74
75 # Cache for prepared statements
77
78 def _timer(self, name: str, *, tags: Mapping[str, str | int] | None = None) -> Timer:
79 """Create `Timer` instance given its name."""
80 return Timer(name, _MON, tags=tags)
81
82 @classmethod
83 def apdbReplicaImplementationVersion(cls) -> VersionTuple:
84 # Docstring inherited from base class.
85 return VERSION
86
87 def getReplicaChunks(self) -> list[ReplicaChunk] | None:
88 # docstring is inherited from a base class
89 if not self._schema.has_replica_chunks:
90 return None
91
92 # everything goes into a single partition
93 partition = 0
94
95 table_name = self._schema.tableName(ExtraTables.ApdbReplicaChunks)
96 # We want to avoid timezone mess so return timestamps as milliseconds.
97 query = (
98 "SELECT toUnixTimestamp(last_update_time), apdb_replica_chunk, unique_id "
99 f'FROM "{self._config.keyspace}"."{table_name}" WHERE partition = ?'
100 )
101
102 with self._timer("chunks_select_time") as timer:
103 result = self._session.execute(
104 self._preparer.prepare(query),
105 (partition,),
106 timeout=self._config.connection_config.read_timeout,
107 execution_profile="read_tuples",
108 )
109 # order by last_update_time
110 rows = sorted(result)
111 timer.add_values(row_count=len(rows))
112 return [
114 id=row[1],
115 last_update_time=astropy.time.Time(row[0] / 1000, format="unix_tai"),
116 unique_id=row[2],
117 )
118 for row in rows
119 ]
120
121 def deleteReplicaChunks(self, chunks: Iterable[int]) -> None:
122 # docstring is inherited from a base class
123 if not self._schema.has_replica_chunks:
124 raise ValueError("APDB is not configured for replication")
125
126 # There is 64k limit on number of markers in Cassandra CQL
127 for chunk_ids in chunk_iterable(chunks, 20_000):
128 chunk_list = list(chunk_ids)
129 params = ",".join("?" * len(chunk_ids))
130
131 # everything goes into a single partition
132 partition = 0
133
134 table_name = self._schema.tableName(ExtraTables.ApdbReplicaChunks)
135 query = (
136 f'DELETE FROM "{self._config.keyspace}"."{table_name}" '
137 f"WHERE partition = ? AND apdb_replica_chunk IN ({params})"
138 )
139
140 with self._timer("chunks_delete_time") as timer:
141 self._session.execute(
142 self._preparer.prepare(query),
143 [partition] + chunk_list,
144 timeout=self._config.connection_config.remove_timeout,
145 )
146 timer.add_values(row_count=len(chunk_list))
147
148 # Also remove those chunk_ids from Dia*Chunks tables.
149 for table in (
150 ExtraTables.DiaObjectChunks,
151 ExtraTables.DiaSourceChunks,
152 ExtraTables.DiaForcedSourceChunks,
153 ):
154 table_name = self._schema.tableName(table)
155 query = (
156 f'DELETE FROM "{self._config.keyspace}"."{table_name}"'
157 f" WHERE apdb_replica_chunk IN ({params})"
158 )
159 with self._timer("table_chunk_detele_time", tags={"table": table_name}) as timer:
160 self._session.execute(
161 self._preparer.prepare(query),
162 chunk_list,
163 timeout=self._config.connection_config.remove_timeout,
164 )
165 timer.add_values(row_count=len(chunk_list))
166
167 def getDiaObjectsChunks(self, chunks: Iterable[int]) -> ApdbTableData:
168 # docstring is inherited from a base class
169 return self._get_chunks(ExtraTables.DiaObjectChunks, chunks)
170
171 def getDiaSourcesChunks(self, chunks: Iterable[int]) -> ApdbTableData:
172 # docstring is inherited from a base class
173 return self._get_chunks(ExtraTables.DiaSourceChunks, chunks)
174
175 def getDiaForcedSourcesChunks(self, chunks: Iterable[int]) -> ApdbTableData:
176 # docstring is inherited from a base class
177 return self._get_chunks(ExtraTables.DiaForcedSourceChunks, chunks)
178
179 def _get_chunks(self, table: ExtraTables, chunks: Iterable[int]) -> ApdbTableData:
180 """Return records from a particular table given set of insert IDs."""
181 if not self._schema.has_replica_chunks:
182 raise ValueError("APDB is not configured for replication")
183
184 # We do not expect too may chunks in this query.
185 chunks = list(chunks)
186 params = ",".join("?" * len(chunks))
187
188 table_name = self._schema.tableName(table)
189 # I know that chunk table schema has only regular APDB columns plus
190 # apdb_replica_chunk column, and this is exactly what we need to return
191 # from this method, so selecting a star is fine here.
192 query = (
193 f'SELECT * FROM "{self._config.keyspace}"."{table_name}" WHERE apdb_replica_chunk IN ({params})'
194 )
195 statement = self._preparer.prepare(query)
196
197 with self._timer("table_chunk_select_time", tags={"table": table_name}) as timer:
198 result = self._session.execute(statement, chunks, execution_profile="read_raw")
199 table_data = cast(ApdbCassandraTableData, result._current_rows)
200 timer.add_values(row_count=len(table_data.rows()))
201 return table_data
__init__(self, ApdbCassandra apdb, ApdbCassandraSchema schema, Any session)
Timer _timer(self, str name, *, Mapping[str, str|int]|None tags=None)
ApdbTableData _get_chunks(self, ExtraTables table, Iterable[int] chunks)