Classes
class	ApdbCassandraTableData

class	PreparedStatementCache

class	SessionWrapper

Functions
pandas.DataFrame	pandas_dataframe_factory (list[str] colnames, list[tuple] rows)

ApdbCassandraTableData	raw_data_factory (list[str] colnames, list[tuple] rows)

pandas.DataFrame\|ApdbCassandraTableData\|list	select_concurrent (Session session, list[tuple] statements, str execution_profile, int concurrency)

Any	literal (Any v)

str	quote_id (str columnName)

Variables
bool	CASSANDRA_IMPORTED = True

	_LOG = logging.getLogger(__name__)

Function Documentation

◆ literal()

Any lsst.dax.apdb.cassandra_utils.literal ( Any v )

Transform object into a value for the query.

Definition at line 276 of file cassandra_utils.py.

def literal(v: Any) -> Any:
    """Transform object into a value for the query."""
    if v is None:
        pass
    elif isinstance(v, datetime):
        v = int((v - datetime(1970, 1, 1)) / timedelta(seconds=1)) * 1000
    elif isinstance(v, (bytes, str, UUID, int)):
        pass
    else:
        try:
            if not np.isfinite(v):
                v = None
        except TypeError:
            pass
    return v
 
 

◆ pandas_dataframe_factory()

pandas.DataFrame lsst.dax.apdb.cassandra_utils.pandas_dataframe_factory	(	list[str]	colnames,
		list[tuple]	rows )

Create pandas DataFrame from Cassandra result set.

Parameters
----------
colnames : `list` [ `str` ]
    Names of the columns.
rows : `list` of `tuple`
    Result rows.

Returns
-------
catalog : `pandas.DataFrame`
    DataFrame with the result set.

Notes
-----
When using this method as row factory for Cassandra, the resulting
DataFrame should be accessed in a non-standard way using
`ResultSet._current_rows` attribute.

Definition at line 134 of file cassandra_utils.py.

def pandas_dataframe_factory(colnames: list[str], rows: list[tuple]) -> pandas.DataFrame:
    """Create pandas DataFrame from Cassandra result set.
 
    Parameters
    ----------
    colnames : `list` [ `str` ]
        Names of the columns.
    rows : `list` of `tuple`
        Result rows.
 
    Returns
    -------
    catalog : `pandas.DataFrame`
        DataFrame with the result set.
 
    Notes
    -----
    When using this method as row factory for Cassandra, the resulting
    DataFrame should be accessed in a non-standard way using
    `ResultSet._current_rows` attribute.
    """
    return pandas.DataFrame.from_records(rows, columns=colnames)
 
 

◆ quote_id()

str lsst.dax.apdb.cassandra_utils.quote_id ( str columnName )

Smart quoting for column names. Lower-case names are not quoted.

Definition at line 293 of file cassandra_utils.py.

def quote_id(columnName: str) -> str:
    """Smart quoting for column names. Lower-case names are not quoted."""
    if not columnName.islower():
        columnName = '"' + columnName + '"'
    return columnName

◆ raw_data_factory()

ApdbCassandraTableData lsst.dax.apdb.cassandra_utils.raw_data_factory	(	list[str]	colnames,
		list[tuple]	rows )

Make 2-element tuple containing unmodified data: list of column names
and list of rows.

Parameters
----------
colnames : `list` [ `str` ]
    Names of the columns.
rows : `list` of `tuple`
    Result rows.

Returns
-------
data : `ApdbCassandraTableData`
    Input data wrapped into ApdbCassandraTableData.

Notes
-----
When using this method as row factory for Cassandra, the resulting
object should be accessed in a non-standard way using
`ResultSet._current_rows` attribute.

Definition at line 158 of file cassandra_utils.py.

def raw_data_factory(colnames: list[str], rows: list[tuple]) -> ApdbCassandraTableData:
    """Make 2-element tuple containing unmodified data: list of column names
    and list of rows.
 
    Parameters
    ----------
    colnames : `list` [ `str` ]
        Names of the columns.
    rows : `list` of `tuple`
        Result rows.
 
    Returns
    -------
    data : `ApdbCassandraTableData`
        Input data wrapped into ApdbCassandraTableData.
 
    Notes
    -----
    When using this method as row factory for Cassandra, the resulting
    object should be accessed in a non-standard way using
    `ResultSet._current_rows` attribute.
    """
    return ApdbCassandraTableData(colnames, rows)
 
 

◆ select_concurrent()

pandas.DataFrame \| ApdbCassandraTableData \| list lsst.dax.apdb.cassandra_utils.select_concurrent	(	Session	session,
		list[tuple]	statements,
		str	execution_profile,
		int	concurrency )

Execute bunch of queries concurrently and merge their results into
a single result.

Parameters
----------
statements : `list` [ `tuple` ]
    List of statements and their parameters, passed directly to
    ``execute_concurrent()``.
execution_profile : `str`
    Execution profile name.

Returns
-------
result
    Combined result of multiple statements, type of the result depends on
    specific row factory defined in execution profile. If row factory is
    `pandas_dataframe_factory` then pandas DataFrame is created from a
    combined result. If row factory is `raw_data_factory` then
    `ApdbCassandraTableData` is built from all records. Otherwise a list of
    rows is returned, type of each row is determined by the row factory.

Notes
-----
This method can raise any exception that is raised by one of the provided
statements.

Definition at line 183 of file cassandra_utils.py.

) -> pandas.DataFrame | ApdbCassandraTableData | list:
    """Execute bunch of queries concurrently and merge their results into
    a single result.
 
    Parameters
    ----------
    statements : `list` [ `tuple` ]
        List of statements and their parameters, passed directly to
        ``execute_concurrent()``.
    execution_profile : `str`
        Execution profile name.
 
    Returns
    -------
    result
        Combined result of multiple statements, type of the result depends on
        specific row factory defined in execution profile. If row factory is
        `pandas_dataframe_factory` then pandas DataFrame is created from a
        combined result. If row factory is `raw_data_factory` then
        `ApdbCassandraTableData` is built from all records. Otherwise a list of
        rows is returned, type of each row is determined by the row factory.
 
    Notes
    -----
    This method can raise any exception that is raised by one of the provided
    statements.
    """
    session_wrap = SessionWrapper(session, execution_profile)
    results = execute_concurrent(
        session_wrap,
        statements,
        results_generator=True,
        raise_on_first_error=False,
        concurrency=concurrency,
    )
 
    ep = session.get_execution_profile(execution_profile)
    if ep.row_factory is raw_data_factory:
        # Collect rows into a single list and build Dataframe out of that
        _LOG.debug("making pandas data frame out of rows/columns")
        table_data: ApdbCassandraTableData | None = None
        for success, result in results:
            if success:
                data = result._current_rows
                assert isinstance(data, ApdbCassandraTableData)
                if table_data is None:
                    table_data = data
                else:
                    table_data.append(data)
            else:
                _LOG.error("error returned by query: %s", result)
                raise result
        if table_data is None:
            table_data = ApdbCassandraTableData([], [])
        return table_data
 
    elif ep.row_factory is pandas_dataframe_factory:
        # Merge multiple DataFrames into one
        _LOG.debug("making pandas data frame out of set of data frames")
        dataframes = []
        for success, result in results:
            if success:
                dataframes.append(result._current_rows)
            else:
                _LOG.error("error returned by query: %s", result)
                raise result
        # Concatenate all frames, but skip empty ones.
        non_empty = [df for df in dataframes if not df.empty]
        if not non_empty:
            # If all frames are empty, return the first one.
            catalog = dataframes[0]
        elif len(non_empty) == 1:
            catalog = non_empty[0]
        else:
            catalog = pandas.concat(non_empty)
        _LOG.debug("pandas catalog shape: %s", catalog.shape)
        return catalog
 
    else:
        # Just concatenate all rows into a single collection.
        rows = []
        for success, result in results:
            if success:
                rows.extend(result)
            else:
                _LOG.error("error returned by query: %s", result)
                raise result
        _LOG.debug("number of rows: %s", len(rows))
        return rows
 
 

Variable Documentation

◆ _LOG

lsst.dax.apdb.cassandra_utils._LOG = logging.getLogger(__name__)

protected

Definition at line 56 of file cassandra_utils.py.

◆ CASSANDRA_IMPORTED

bool lsst.dax.apdb.cassandra_utils.CASSANDRA_IMPORTED = True

Definition at line 50 of file cassandra_utils.py.

Classes

Functions

Variables