doxygen/x_mainDoxyDoc/apdb_cassandra_replica_8py_source.html

# This file is part of dax_apdb.

#

# Developed for the LSST Data Management System.

# This product includes software developed by the LSST Project

# (http://www.lsst.org).

# See the COPYRIGHT file at the top-level directory of this distribution

# for details of code ownership.

#

# This program is free software: you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation, either version 3 of the License, or

# (at your option) any later version.

#

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

# GNU General Public License for more details.

#

# You should have received a copy of the GNU General Public License

# along with this program.  If not, see <http://www.gnu.org/licenses/>.


from __future__ import annotations


__all__ = ["ApdbCassandraReplica"]


import logging

from collections.abc import Iterable, Mapping

from typing import TYPE_CHECKING, Any, cast


import astropy.time

from lsst.utils.iteration import chunk_iterable


from ..apdbReplica import ApdbReplica, ApdbTableData, ReplicaChunk

from ..monitor import MonAgent

from ..timer import Timer

from ..versionTuple import VersionTuple

from .apdbCassandraSchema import ApdbCassandraSchema, ExtraTables

from .cassandra_utils import ApdbCassandraTableData, PreparedStatementCache


if TYPE_CHECKING:

    from .apdbCassandra import ApdbCassandra


_LOG = logging.getLogger(__name__)


_MON = MonAgent(__name__)


VERSION = VersionTuple(1, 0, 0)

"""Version for the code controlling replication tables. This needs to be

updated following compatibility rules when schema produced by this code

changes.

"""


class ApdbCassandraReplica(ApdbReplica):

    """Implementation of `ApdbReplica` for Cassandra backend.


    Parameters

    ----------

    apdb : `ApdbCassandra`

        Instance of ApbdCassandra for database.

    schema : `ApdbCassandraSchema`

        Instance of ApdbCassandraSchema for database.

    session

        Instance of cassandra session type.

    """


    def __init__(self, apdb: ApdbCassandra, schema: ApdbCassandraSchema, session: Any):

        # Note that ApdbCassandra instance must stay alive while this object

        # exists, so we keep reference to it.

        self._apdb = apdb

        self._schema = schema

        self._session = session

        self._config = apdb.config


        # Cache for prepared statements

        self._preparer = PreparedStatementCache(self._session)


        self._timer_args: list[MonAgent | logging.Logger] = [_MON]

        if self._config.timer:

            self._timer_args.append(_LOG)


    def _timer(self, name: str, *, tags: Mapping[str, str | int] | None = None) -> Timer:

        """Create `Timer` instance given its name."""

        return Timer(name, *self._timer_args, tags=tags)


    @classmethod


    def apdbReplicaImplementationVersion(cls) -> VersionTuple:

        # Docstring inherited from base class.

        return VERSION


    def getReplicaChunks(self) -> list[ReplicaChunk] | None:

        # docstring is inherited from a base class

        if not self._schema.has_replica_chunks:

            return None


        # everything goes into a single partition

        partition = 0


        table_name = self._schema.tableName(ExtraTables.ApdbReplicaChunks)

        # We want to avoid timezone mess so return timestamps as milliseconds.

        query = (

            "SELECT toUnixTimestamp(last_update_time), apdb_replica_chunk, unique_id "

            f'FROM "{self._config.keyspace}"."{table_name}" WHERE partition = ?'

        )


        with self._timer("chunks_select_time") as timer:

            result = self._session.execute(

                self._preparer.prepare(query),

                (partition,),

                timeout=self._config.read_timeout,

                execution_profile="read_tuples",

            )

            # order by last_update_time

            rows = sorted(result)

            timer.add_values(row_count=len(rows))

        return [

            ReplicaChunk(

                id=row[1],

                last_update_time=astropy.time.Time(row[0] / 1000, format="unix_tai"),

                unique_id=row[2],

            )

            for row in rows

        ]


    def deleteReplicaChunks(self, chunks: Iterable[int]) -> None:

        # docstring is inherited from a base class

        if not self._schema.has_replica_chunks:

            raise ValueError("APDB is not configured for replication")


        # There is 64k limit on number of markers in Cassandra CQL

        for chunk_ids in chunk_iterable(chunks, 20_000):

            chunk_list = list(chunk_ids)

            params = ",".join("?" * len(chunk_ids))


            # everything goes into a single partition

            partition = 0


            table_name = self._schema.tableName(ExtraTables.ApdbReplicaChunks)

            query = (

                f'DELETE FROM "{self._config.keyspace}"."{table_name}" '

                f"WHERE partition = ? AND apdb_replica_chunk IN ({params})"

            )


            with self._timer("chunks_delete_time") as timer:

                self._session.execute(

                    self._preparer.prepare(query),

                    [partition] + chunk_list,

                    timeout=self._config.remove_timeout,

                )

                timer.add_values(row_count=len(chunk_list))


            # Also remove those chunk_ids from Dia*Chunks tables.

            for table in (

                ExtraTables.DiaObjectChunks,

                ExtraTables.DiaSourceChunks,

                ExtraTables.DiaForcedSourceChunks,

            ):

                table_name = self._schema.tableName(table)

                query = (

                    f'DELETE FROM "{self._config.keyspace}"."{table_name}"'

                    f" WHERE apdb_replica_chunk IN ({params})"

                )

                with self._timer("table_chunk_detele_time", tags={"table": table_name}) as timer:

                    self._session.execute(

                        self._preparer.prepare(query),

                        chunk_list,

                        timeout=self._config.remove_timeout,

                    )

                    timer.add_values(row_count=len(chunk_list))


    def getDiaObjectsChunks(self, chunks: Iterable[int]) -> ApdbTableData:

        # docstring is inherited from a base class

        return self._get_chunks(ExtraTables.DiaObjectChunks, chunks)


    def getDiaSourcesChunks(self, chunks: Iterable[int]) -> ApdbTableData:

        # docstring is inherited from a base class

        return self._get_chunks(ExtraTables.DiaSourceChunks, chunks)


    def getDiaForcedSourcesChunks(self, chunks: Iterable[int]) -> ApdbTableData:

        # docstring is inherited from a base class

        return self._get_chunks(ExtraTables.DiaForcedSourceChunks, chunks)


    def _get_chunks(self, table: ExtraTables, chunks: Iterable[int]) -> ApdbTableData:

        """Return records from a particular table given set of insert IDs."""

        if not self._schema.has_replica_chunks:

            raise ValueError("APDB is not configured for replication")


        # We do not expect too may chunks in this query.

        chunks = list(chunks)

        params = ",".join("?" * len(chunks))


        table_name = self._schema.tableName(table)

        # I know that chunk table schema has only regular APDB columns plus

        # apdb_replica_chunk column, and this is exactly what we need to return

        # from this method, so selecting a star is fine here.

        query = (

            f'SELECT * FROM "{self._config.keyspace}"."{table_name}" WHERE apdb_replica_chunk IN ({params})'

        )

        statement = self._preparer.prepare(query)


        with self._timer("table_chunk_select_time", tags={"table": table_name}) as timer:

            result = self._session.execute(statement, chunks, execution_profile="read_raw")

            table_data = cast(ApdbCassandraTableData, result._current_rows)

            timer.add_values(row_count=len(table_data.rows()))

        return table_data


lsst.dax.apdb.apdbReplica.ApdbReplica
Definition apdbReplica.py:109

lsst.dax.apdb.apdbReplica.ReplicaChunk
Definition apdbReplica.py:71

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica
Definition apdbCassandraReplica.py:54

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica.getDiaObjectsChunks
ApdbTableData getDiaObjectsChunks(self, Iterable[int] chunks)
Definition apdbCassandraReplica.py:171

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica._timer_args
_timer_args
Definition apdbCassandraReplica.py:84

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica.__init__
__init__(self, ApdbCassandra apdb, ApdbCassandraSchema schema, Any session)
Definition apdbCassandraReplica.py:67

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica._timer
Timer _timer(self, str name, *Mapping[str, str|int]|None tags=None)
Definition apdbCassandraReplica.py:82

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica.apdbReplicaImplementationVersion
VersionTuple apdbReplicaImplementationVersion(cls)
Definition apdbCassandraReplica.py:87

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica.getReplicaChunks
list[ReplicaChunk]|None getReplicaChunks(self)
Definition apdbCassandraReplica.py:91

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica._preparer
_preparer
Definition apdbCassandraReplica.py:76

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica._config
_config
Definition apdbCassandraReplica.py:73

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica.deleteReplicaChunks
None deleteReplicaChunks(self, Iterable[int] chunks)
Definition apdbCassandraReplica.py:125

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica.getDiaForcedSourcesChunks
ApdbTableData getDiaForcedSourcesChunks(self, Iterable[int] chunks)
Definition apdbCassandraReplica.py:179

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica._get_chunks
ApdbTableData _get_chunks(self, ExtraTables table, Iterable[int] chunks)
Definition apdbCassandraReplica.py:183

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica.getDiaSourcesChunks
ApdbTableData getDiaSourcesChunks(self, Iterable[int] chunks)
Definition apdbCassandraReplica.py:175

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica._session
_session
Definition apdbCassandraReplica.py:72

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica._schema
_schema
Definition apdbCassandraReplica.py:71

lsst.dax.apdb.cassandra.apdbCassandraReplica.ApdbCassandraReplica._apdb
_apdb
Definition apdbCassandraReplica.py:70

lsst.dax.apdb.cassandra.cassandra_utils.PreparedStatementCache
Definition cassandra_utils.py:85

lsst.dax.apdb.monitor.MonAgent
Definition monitor.py:77

lsst.dax.apdb.timer.Timer
Definition timer.py:41

lsst.dax.apdb.versionTuple.VersionTuple
Definition versionTuple.py:42

collections.abc