LSST Applications 28.0.0,g1653933729+a8ce1bb630,g1a997c3884+a8ce1bb630,g28da252d5a+5bd70b7e6d,g2bbee38e9b+638fca75ac,g2bc492864f+638fca75ac,g3156d2b45e+07302053f8,g347aa1857d+638fca75ac,g35bb328faa+a8ce1bb630,g3a166c0a6a+638fca75ac,g3e281a1b8c+7bbb0b2507,g4005a62e65+17cd334064,g414038480c+5b5cd4fff3,g41af890bb2+4ffae9de63,g4e1a3235cc+0f1912dca3,g6249c6f860+3c3976f90c,g80478fca09+46aba80bd6,g82479be7b0+77990446f6,g858d7b2824+78ba4d1ce1,g89c8672015+f667a5183b,g9125e01d80+a8ce1bb630,ga5288a1d22+2a6264e9ca,gae0086650b+a8ce1bb630,gb58c049af0+d64f4d3760,gc22bb204ba+78ba4d1ce1,gc28159a63d+638fca75ac,gcf0d15dbbd+32ddb6096f,gd6b7c0dfd1+3e339405e9,gda3e153d99+78ba4d1ce1,gda6a2b7d83+32ddb6096f,gdaeeff99f8+1711a396fd,gdd5a9049c5+b18c39e5e3,ge2409df99d+a5e4577cdc,ge33fd446bb+78ba4d1ce1,ge79ae78c31+638fca75ac,gf0baf85859+64e8883e75,gf5289d68f6+e1b046a8d7,gfa443fc69c+91d9ed1ecf,gfda6b12a05+8419469a56
LSST Data Management Base Package
Loading...
Searching...
No Matches
apdbCassandraReplica.py
Go to the documentation of this file.
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21
22from __future__ import annotations
23
24__all__ = ["ApdbCassandraReplica"]
25
26import logging
27from collections.abc import Iterable, Mapping
28from typing import TYPE_CHECKING, Any, cast
29
30import astropy.time
31from lsst.utils.iteration import chunk_iterable
32
33from ..apdbReplica import ApdbReplica, ApdbTableData, ReplicaChunk
34from ..monitor import MonAgent
35from ..timer import Timer
36from ..versionTuple import VersionTuple
37from .apdbCassandraSchema import ApdbCassandraSchema, ExtraTables
38from .cassandra_utils import ApdbCassandraTableData, PreparedStatementCache
39
40if TYPE_CHECKING:
41 from .apdbCassandra import ApdbCassandra
42
43_LOG = logging.getLogger(__name__)
44
45_MON = MonAgent(__name__)
46
47VERSION = VersionTuple(1, 0, 0)
48"""Version for the code controlling replication tables. This needs to be
49updated following compatibility rules when schema produced by this code
50changes.
51"""
52
53
55 """Implementation of `ApdbReplica` for Cassandra backend.
56
57 Parameters
58 ----------
59 apdb : `ApdbCassandra`
60 Instance of ApbdCassandra for database.
61 schema : `ApdbCassandraSchema`
62 Instance of ApdbCassandraSchema for database.
63 session
64 Instance of cassandra session type.
65 """
66
67 def __init__(self, apdb: ApdbCassandra, schema: ApdbCassandraSchema, session: Any):
68 # Note that ApdbCassandra instance must stay alive while this object
69 # exists, so we keep reference to it.
70 self._apdb = apdb
71 self._schema = schema
72 self._session = session
73 self._config = apdb.config
74
75 # Cache for prepared statements
77
78 self._timer_args_timer_args: list[MonAgent | logging.Logger] = [_MON]
79 if self._config.timer:
80 self._timer_args_timer_args.append(_LOG)
81
82 def _timer(self, name: str, *, tags: Mapping[str, str | int] | None = None) -> Timer:
83 """Create `Timer` instance given its name."""
84 return Timer(name, *self._timer_args_timer_args, tags=tags)
85
86 @classmethod
87 def apdbReplicaImplementationVersion(cls) -> VersionTuple:
88 # Docstring inherited from base class.
89 return VERSION
90
91 def getReplicaChunks(self) -> list[ReplicaChunk] | None:
92 # docstring is inherited from a base class
93 if not self._schema.has_replica_chunks:
94 return None
95
96 # everything goes into a single partition
97 partition = 0
98
99 table_name = self._schema.tableName(ExtraTables.ApdbReplicaChunks)
100 # We want to avoid timezone mess so return timestamps as milliseconds.
101 query = (
102 "SELECT toUnixTimestamp(last_update_time), apdb_replica_chunk, unique_id "
103 f'FROM "{self._config.keyspace}"."{table_name}" WHERE partition = ?'
104 )
105
106 with self._timer("chunks_select_time") as timer:
107 result = self._session.execute(
108 self._preparer.prepare(query),
109 (partition,),
110 timeout=self._config.read_timeout,
111 execution_profile="read_tuples",
112 )
113 # order by last_update_time
114 rows = sorted(result)
115 timer.add_values(row_count=len(rows))
116 return [
118 id=row[1],
119 last_update_time=astropy.time.Time(row[0] / 1000, format="unix_tai"),
120 unique_id=row[2],
121 )
122 for row in rows
123 ]
124
125 def deleteReplicaChunks(self, chunks: Iterable[int]) -> None:
126 # docstring is inherited from a base class
127 if not self._schema.has_replica_chunks:
128 raise ValueError("APDB is not configured for replication")
129
130 # There is 64k limit on number of markers in Cassandra CQL
131 for chunk_ids in chunk_iterable(chunks, 20_000):
132 chunk_list = list(chunk_ids)
133 params = ",".join("?" * len(chunk_ids))
134
135 # everything goes into a single partition
136 partition = 0
137
138 table_name = self._schema.tableName(ExtraTables.ApdbReplicaChunks)
139 query = (
140 f'DELETE FROM "{self._config.keyspace}"."{table_name}" '
141 f"WHERE partition = ? AND apdb_replica_chunk IN ({params})"
142 )
143
144 with self._timer("chunks_delete_time") as timer:
145 self._session.execute(
146 self._preparer.prepare(query),
147 [partition] + chunk_list,
148 timeout=self._config.remove_timeout,
149 )
150 timer.add_values(row_count=len(chunk_list))
151
152 # Also remove those chunk_ids from Dia*Chunks tables.
153 for table in (
154 ExtraTables.DiaObjectChunks,
155 ExtraTables.DiaSourceChunks,
156 ExtraTables.DiaForcedSourceChunks,
157 ):
158 table_name = self._schema.tableName(table)
159 query = (
160 f'DELETE FROM "{self._config.keyspace}"."{table_name}"'
161 f" WHERE apdb_replica_chunk IN ({params})"
162 )
163 with self._timer("table_chunk_detele_time", tags={"table": table_name}) as timer:
164 self._session.execute(
165 self._preparer.prepare(query),
166 chunk_list,
167 timeout=self._config.remove_timeout,
168 )
169 timer.add_values(row_count=len(chunk_list))
170
171 def getDiaObjectsChunks(self, chunks: Iterable[int]) -> ApdbTableData:
172 # docstring is inherited from a base class
173 return self._get_chunks(ExtraTables.DiaObjectChunks, chunks)
174
175 def getDiaSourcesChunks(self, chunks: Iterable[int]) -> ApdbTableData:
176 # docstring is inherited from a base class
177 return self._get_chunks(ExtraTables.DiaSourceChunks, chunks)
178
179 def getDiaForcedSourcesChunks(self, chunks: Iterable[int]) -> ApdbTableData:
180 # docstring is inherited from a base class
181 return self._get_chunks(ExtraTables.DiaForcedSourceChunks, chunks)
182
183 def _get_chunks(self, table: ExtraTables, chunks: Iterable[int]) -> ApdbTableData:
184 """Return records from a particular table given set of insert IDs."""
185 if not self._schema.has_replica_chunks:
186 raise ValueError("APDB is not configured for replication")
187
188 # We do not expect too may chunks in this query.
189 chunks = list(chunks)
190 params = ",".join("?" * len(chunks))
191
192 table_name = self._schema.tableName(table)
193 # I know that chunk table schema has only regular APDB columns plus
194 # apdb_replica_chunk column, and this is exactly what we need to return
195 # from this method, so selecting a star is fine here.
196 query = (
197 f'SELECT * FROM "{self._config.keyspace}"."{table_name}" WHERE apdb_replica_chunk IN ({params})'
198 )
199 statement = self._preparer.prepare(query)
200
201 with self._timer("table_chunk_select_time", tags={"table": table_name}) as timer:
202 result = self._session.execute(statement, chunks, execution_profile="read_raw")
203 table_data = cast(ApdbCassandraTableData, result._current_rows)
204 timer.add_values(row_count=len(table_data.rows()))
205 return table_data
__init__(self, ApdbCassandra apdb, ApdbCassandraSchema schema, Any session)
Timer _timer(self, str name, *, Mapping[str, str|int]|None tags=None)
ApdbTableData _get_chunks(self, ExtraTables table, Iterable[int] chunks)