LSST Applications 27.0.0,g0265f82a02+469cd937ee,g02d81e74bb+21ad69e7e1,g1470d8bcf6+cbe83ee85a,g2079a07aa2+e67c6346a6,g212a7c68fe+04a9158687,g2305ad1205+94392ce272,g295015adf3+81dd352a9d,g2bbee38e9b+469cd937ee,g337abbeb29+469cd937ee,g3939d97d7f+72a9f7b576,g487adcacf7+71499e7cba,g50ff169b8f+5929b3527e,g52b1c1532d+a6fc98d2e7,g591dd9f2cf+df404f777f,g5a732f18d5+be83d3ecdb,g64a986408d+21ad69e7e1,g858d7b2824+21ad69e7e1,g8a8a8dda67+a6fc98d2e7,g99cad8db69+f62e5b0af5,g9ddcbc5298+d4bad12328,ga1e77700b3+9c366c4306,ga8c6da7877+71e4819109,gb0e22166c9+25ba2f69a1,gb6a65358fc+469cd937ee,gbb8dafda3b+69d3c0e320,gc07e1c2157+a98bf949bb,gc120e1dc64+615ec43309,gc28159a63d+469cd937ee,gcf0d15dbbd+72a9f7b576,gdaeeff99f8+a38ce5ea23,ge6526c86ff+3a7c1ac5f1,ge79ae78c31+469cd937ee,gee10cc3b42+a6fc98d2e7,gf1cff7945b+21ad69e7e1,gfbcc870c63+9a11dc8c8f
LSST Data Management Base Package
Loading...
Searching...
No Matches
apdbCassandraReplica.py
Go to the documentation of this file.
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22from __future__ import annotations
23
24__all__ = ["ApdbCassandraReplica"]
25
26import logging
27from collections.abc import Iterable
28from typing import TYPE_CHECKING, Any, cast
29
30import astropy.time
31from lsst.utils.iteration import chunk_iterable
32
33from ..apdbReplica import ApdbReplica, ApdbTableData, ReplicaChunk
34from ..timer import Timer
35from ..versionTuple import VersionTuple
36from .apdbCassandraSchema import ApdbCassandraSchema, ExtraTables
37from .cassandra_utils import ApdbCassandraTableData, PreparedStatementCache
38
39if TYPE_CHECKING:
40 from .apdbCassandra import ApdbCassandra
41
42_LOG = logging.getLogger(__name__)
43
44VERSION = VersionTuple(1, 0, 0)
45"""Version for the code controlling replication tables. This needs to be
46updated following compatibility rules when schema produced by this code
47changes.
48"""
49
50
52 """Implementation of `ApdbReplica` for Cassandra backend.
53
54 Parameters
55 ----------
56 apdb : `ApdbCassandra`
57 Instance of ApbdCassandra for database.
58 schema : `ApdbCassandraSchema`
59 Instance of ApdbCassandraSchema for database.
60 session
61 Instance of cassandra session type.
62 """
63
64 def __init__(self, apdb: ApdbCassandra, schema: ApdbCassandraSchema, session: Any):
65 # Note that ApdbCassandra instance must stay alive while this object
66 # exists, so we keep reference to it.
67 self._apdb = apdb
68 self._schema = schema
69 self._session = session
70 self._config = apdb.config
71
72 # Cache for prepared statements
74
75 @classmethod
76 def apdbReplicaImplementationVersion(cls) -> VersionTuple:
77 # Docstring inherited from base class.
78 return VERSION
79
80 def getReplicaChunks(self) -> list[ReplicaChunk] | None:
81 # docstring is inherited from a base class
82 if not self._schema.has_replica_chunks:
83 return None
84
85 # everything goes into a single partition
86 partition = 0
87
88 table_name = self._schema.tableName(ExtraTables.ApdbReplicaChunks)
89 # We want to avoid timezone mess so return timestamps as milliseconds.
90 query = (
91 "SELECT toUnixTimestamp(last_update_time), apdb_replica_chunk, unique_id "
92 f'FROM "{self._config.keyspace}"."{table_name}" WHERE partition = ?'
93 )
94
95 result = self._session.execute(
96 self._preparer.prepare(query),
97 (partition,),
98 timeout=self._config.read_timeout,
99 execution_profile="read_tuples",
100 )
101 # order by last_update_time
102 rows = sorted(result)
103 return [
105 id=row[1],
106 last_update_time=astropy.time.Time(row[0] / 1000, format="unix_tai"),
107 unique_id=row[2],
108 )
109 for row in rows
110 ]
111
112 def deleteReplicaChunks(self, chunks: Iterable[int]) -> None:
113 # docstring is inherited from a base class
114 if not self._schema.has_replica_chunks:
115 raise ValueError("APDB is not configured for replication")
116
117 # There is 64k limit on number of markers in Cassandra CQL
118 for chunk_ids in chunk_iterable(chunks, 20_000):
119 params = ",".join("?" * len(chunk_ids))
120
121 # everything goes into a single partition
122 partition = 0
123
124 table_name = self._schema.tableName(ExtraTables.ApdbReplicaChunks)
125 query = (
126 f'DELETE FROM "{self._config.keyspace}"."{table_name}" '
127 f"WHERE partition = ? AND apdb_replica_chunk IN ({params})"
128 )
129
130 self._session.execute(
131 self._preparer.prepare(query),
132 [partition] + list(chunk_ids),
133 timeout=self._config.remove_timeout,
134 )
135
136 # Also remove those chunk_ids from Dia*Chunks tables.
137 for table in (
138 ExtraTables.DiaObjectChunks,
139 ExtraTables.DiaSourceChunks,
140 ExtraTables.DiaForcedSourceChunks,
141 ):
142 table_name = self._schema.tableName(table)
143 query = (
144 f'DELETE FROM "{self._config.keyspace}"."{table_name}"'
145 f" WHERE apdb_replica_chunk IN ({params})"
146 )
147 self._session.execute(
148 self._preparer.prepare(query),
149 chunk_ids,
150 timeout=self._config.remove_timeout,
151 )
152
153 def getDiaObjectsChunks(self, chunks: Iterable[int]) -> ApdbTableData:
154 # docstring is inherited from a base class
155 return self._get_chunks(ExtraTables.DiaObjectChunks, chunks)
156
157 def getDiaSourcesChunks(self, chunks: Iterable[int]) -> ApdbTableData:
158 # docstring is inherited from a base class
159 return self._get_chunks(ExtraTables.DiaSourceChunks, chunks)
160
161 def getDiaForcedSourcesChunks(self, chunks: Iterable[int]) -> ApdbTableData:
162 # docstring is inherited from a base class
163 return self._get_chunks(ExtraTables.DiaForcedSourceChunks, chunks)
164
165 def _get_chunks(self, table: ExtraTables, chunks: Iterable[int]) -> ApdbTableData:
166 """Return records from a particular table given set of insert IDs."""
167 if not self._schema.has_replica_chunks:
168 raise ValueError("APDB is not configured for replication")
169
170 # We do not expect too may chunks in this query.
171 chunks = list(chunks)
172 params = ",".join("?" * len(chunks))
173
174 table_name = self._schema.tableName(table)
175 # I know that chunk table schema has only regular APDB columns plus
176 # apdb_replica_chunk column, and this is exactly what we need to return
177 # from this method, so selecting a star is fine here.
178 query = (
179 f'SELECT * FROM "{self._config.keyspace}"."{table_name}" WHERE apdb_replica_chunk IN ({params})'
180 )
181 statement = self._preparer.prepare(query)
182
183 with Timer(f"{table_name} select chunk", self._config.timer):
184 result = self._session.execute(statement, chunks, execution_profile="read_raw")
185 table_data = cast(ApdbCassandraTableData, result._current_rows)
186 return table_data
__init__(self, ApdbCassandra apdb, ApdbCassandraSchema schema, Any session)
ApdbTableData _get_chunks(self, ExtraTables table, Iterable[int] chunks)