Inheritance diagram for lsst.dax.ppdb.ppdb.Ppdb:

Public Member Functions
def	__init__ (self, config, afw_schemas=None)

def	lastVisit (self)

def	saveVisit (self, visitId, visitTime)

def	tableRowCount (self)

def	getDiaObjects (self, pixel_ranges, return_pandas=False)

def	getDiaSourcesInRegion (self, pixel_ranges, dt, return_pandas=False)

def	getDiaSources (self, object_ids, dt, return_pandas=False)

def	getDiaForcedSources (self, object_ids, dt, return_pandas=False)

def	storeDiaObjects (self, objs, dt)

def	storeDiaSources (self, sources)

def	storeDiaForcedSources (self, sources)

def	countUnassociatedObjects (self)

def	isVisitProcessed (self, visitInfo)

def	dailyJob (self)

def	makeSchema (self, drop=False, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False)

Public Attributes
	config

Detailed Description

Interface to L1 database, hides all database access details.

The implementation is configured via standard ``pex_config`` mechanism
using `PpdbConfig` configuration class. For an example of different
configurations check config/ folder.

Parameters
----------
config : `PpdbConfig`
afw_schemas : `dict`, optional
    Dictionary with table name for a key and `afw.table.Schema`
    for a value. Columns in schema will be added to standard
    PPDB schema.

Definition at line 210 of file ppdb.py.

Constructor & Destructor Documentation

◆ init()

def lsst.dax.ppdb.ppdb.Ppdb.__init__	(	self,
		config,
		afw_schemas = `None`
	)

Definition at line 226 of file ppdb.py.

     def __init__(self, config, afw_schemas=None):
 
         self.config = config
 
         # logging.getLogger('sqlalchemy').setLevel(logging.INFO)
         _LOG.debug("PPDB Configuration:")
         _LOG.debug("    dia_object_index: %s", self.config.dia_object_index)
         _LOG.debug("    dia_object_nightly: %s", self.config.dia_object_nightly)
         _LOG.debug("    read_sources_months: %s", self.config.read_sources_months)
         _LOG.debug("    read_forced_sources_months: %s", self.config.read_forced_sources_months)
         _LOG.debug("    dia_object_columns: %s", self.config.dia_object_columns)
         _LOG.debug("    object_last_replace: %s", self.config.object_last_replace)
         _LOG.debug("    schema_file: %s", self.config.schema_file)
         _LOG.debug("    extra_schema_file: %s", self.config.extra_schema_file)
         _LOG.debug("    column_map: %s", self.config.column_map)
         _LOG.debug("    schema prefix: %s", self.config.prefix)
 
         # engine is reused between multiple processes, make sure that we don't
         # share connections by disabling pool (by using NullPool class)
         kw = dict(echo=self.config.sql_echo)
         conn_args = dict()
         if not self.config.connection_pool:
             kw.update(poolclass=NullPool)
         if self.config.isolation_level is not None:
             kw.update(isolation_level=self.config.isolation_level)
         if self.config.connection_timeout is not None:
             if self.config.db_url.startswith("sqlite"):
                 conn_args.update(timeout=self.config.connection_timeout)
             elif self.config.db_url.startswith(("postgresql", "mysql")):
                 conn_args.update(connect_timeout=self.config.connection_timeout)
         kw.update(connect_args=conn_args)
         self._engine = sqlalchemy.create_engine(self.config.db_url, **kw)
 
         self._schema = ppdbSchema.PpdbSchema(engine=self._engine,
                                              dia_object_index=self.config.dia_object_index,
                                              dia_object_nightly=self.config.dia_object_nightly,
                                              schema_file=self.config.schema_file,
                                              extra_schema_file=self.config.extra_schema_file,
                                              column_map=self.config.column_map,
                                              afw_schemas=afw_schemas,
                                              prefix=self.config.prefix)
 

Member Function Documentation

◆ countUnassociatedObjects()

def lsst.dax.ppdb.ppdb.Ppdb.countUnassociatedObjects ( self )

Return the number of DiaObjects that have only one DiaSource associated
with them.

Used as part of ap_verify metrics.

Returns
-------
count : `int`
    Number of DiaObjects with exactly one associated DiaSource.

Definition at line 779 of file ppdb.py.

     def countUnassociatedObjects(self):
         """Return the number of DiaObjects that have only one DiaSource associated
         with them.
 
         Used as part of ap_verify metrics.
 
         Returns
         -------
         count : `int`
             Number of DiaObjects with exactly one associated DiaSource.
         """
         # Retrieve the DiaObject table.
         table = self._schema.objects
 
         # Construct the sql statement.
         stmt = sql.select([func.count()]).select_from(table).where(table.c.nDiaSources == 1)
         stmt = stmt.where(table.c.validityEnd == None)  # noqa: E711
 
         # Return the count.
         count = self._engine.scalar(stmt)
 
         return count
 

◆ dailyJob()

def lsst.dax.ppdb.ppdb.Ppdb.dailyJob ( self )

Implement daily activities like cleanup/vacuum.

What should be done during daily cleanup is determined by
configuration/schema.

Definition at line 827 of file ppdb.py.

     def dailyJob(self):
         """Implement daily activities like cleanup/vacuum.
 
         What should be done during daily cleanup is determined by
         configuration/schema.
         """
 
         # move data from DiaObjectNightly into DiaObject
         if self.config.dia_object_nightly:
             with _ansi_session(self._engine) as conn:
                 query = 'INSERT INTO "' + self._schema.objects.name + '" '
                 query += 'SELECT * FROM "' + self._schema.objects_nightly.name + '"'
                 with Timer('DiaObjectNightly copy', self.config.timer):
                     conn.execute(sql.text(query))
 
                 query = 'DELETE FROM "' + self._schema.objects_nightly.name + '"'
                 with Timer('DiaObjectNightly delete', self.config.timer):
                     conn.execute(sql.text(query))
 
         if self._engine.name == 'postgresql':
 
             # do VACUUM on all tables
             _LOG.info("Running VACUUM on all tables")
             connection = self._engine.raw_connection()
             ISOLATION_LEVEL_AUTOCOMMIT = 0
             connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
             cursor = connection.cursor()
             cursor.execute("VACUUM ANALYSE")
 

◆ getDiaForcedSources()

def lsst.dax.ppdb.ppdb.Ppdb.getDiaForcedSources	(	self,
		object_ids,
		dt,
		return_pandas = `False`
	)

Returns catalog of DiaForcedSource instances matching given
DiaObjects.

This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
the schema of L1 database table. Re-mapping of the column names may
be done for some columns (based on column map passed to constructor)
but types or units are not changed.

Parameters
----------
object_ids :
    Collection of DiaObject IDs
dt : `datetime.datetime`
    Time of the current visit
return_pandas : `bool`
    Return a `pandas.DataFrame` instead of
    `lsst.afw.table.SourceCatalog`.

Returns
-------
catalog : `lsst.afw.table.SourceCatalog` or `None`
    Catalog contaning DiaForcedSource records. `None` is returned if
    ``read_sources_months`` configuration parameter is set to 0 or
    when ``object_ids`` is empty.

Definition at line 545 of file ppdb.py.

     def getDiaForcedSources(self, object_ids, dt, return_pandas=False):
         """Returns catalog of DiaForcedSource instances matching given
         DiaObjects.
 
         This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
         the schema of L1 database table. Re-mapping of the column names may
         be done for some columns (based on column map passed to constructor)
         but types or units are not changed.
 
         Parameters
         ----------
         object_ids :
             Collection of DiaObject IDs
         dt : `datetime.datetime`
             Time of the current visit
         return_pandas : `bool`
             Return a `pandas.DataFrame` instead of
             `lsst.afw.table.SourceCatalog`.
 
         Returns
         -------
         catalog : `lsst.afw.table.SourceCatalog` or `None`
             Catalog contaning DiaForcedSource records. `None` is returned if
             ``read_sources_months`` configuration parameter is set to 0 or
             when ``object_ids`` is empty.
         """
 
         if self.config.read_forced_sources_months == 0:
             _LOG.info("Skip DiaForceSources fetching")
             return None
 
         if not object_ids:
             _LOG.info("Skip DiaForceSources fetching - no Objects")
             # this should create a catalog, but the list of columns may be empty
             return None
 
         table = self._schema.forcedSources
         sources = None
 
         with Timer('DiaForcedSource select', self.config.timer):
             with _ansi_session(self._engine) as conn:
                 for ids in _split(sorted(object_ids), 1000):
 
                     query = 'SELECT *  FROM "' + table.name + '" WHERE '
 
                     # select by object id
                     ids = ",".join(str(id) for id in ids)
                     query += '"diaObjectId" IN (' + ids + ') '
 
                     # execute select
                     if return_pandas:
                         df = pandas.read_sql_query(sql.text(query), conn)
                         if sources is None:
                             sources = df
                         else:
                             sources = sources.append(df)
                     else:
                         res = conn.execute(sql.text(query))
                         sources = self._convertResult(res, "DiaForcedSource", sources)
 
         _LOG.debug("found %s DiaForcedSources", len(sources))
         return sources
 

◆ getDiaObjects()

def lsst.dax.ppdb.ppdb.Ppdb.getDiaObjects	(	self,
		pixel_ranges,
		return_pandas = `False`
	)

Returns catalog of DiaObject instances from given region.

Objects are searched based on pixelization index and region is
determined by the set of indices. There is no assumption on a
particular type of index, client is responsible for consistency
when calculating pixelization indices.

This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
the schema of PPDB table. Re-mapping of the column names is done for
some columns (based on column map passed to constructor) but types
or units are not changed.

Returns only the last version of each DiaObject.

Parameters
----------
pixel_ranges : `list` of `tuple`
    Sequence of ranges, range is a tuple (minPixelID, maxPixelID).
    This defines set of pixel indices to be included in result.
return_pandas : `bool`
    Return a `pandas.DataFrame` instead of
    `lsst.afw.table.SourceCatalog`.

Returns
-------
catalog : `lsst.afw.table.SourceCatalog` or `pandas.DataFrame`
    Catalog containing DiaObject records.

Definition at line 343 of file ppdb.py.

     def getDiaObjects(self, pixel_ranges, return_pandas=False):
         """Returns catalog of DiaObject instances from given region.
 
         Objects are searched based on pixelization index and region is
         determined by the set of indices. There is no assumption on a
         particular type of index, client is responsible for consistency
         when calculating pixelization indices.
 
         This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
         the schema of PPDB table. Re-mapping of the column names is done for
         some columns (based on column map passed to constructor) but types
         or units are not changed.
 
         Returns only the last version of each DiaObject.
 
         Parameters
         ----------
         pixel_ranges : `list` of `tuple`
             Sequence of ranges, range is a tuple (minPixelID, maxPixelID).
             This defines set of pixel indices to be included in result.
         return_pandas : `bool`
             Return a `pandas.DataFrame` instead of
             `lsst.afw.table.SourceCatalog`.
 
         Returns
         -------
         catalog : `lsst.afw.table.SourceCatalog` or `pandas.DataFrame`
             Catalog containing DiaObject records.
         """
 
         # decide what columns we need
         if self.config.dia_object_index == 'last_object_table':
             table = self._schema.objects_last
         else:
             table = self._schema.objects
         if not self.config.dia_object_columns:
             query = table.select()
         else:
             columns = [table.c[col] for col in self.config.dia_object_columns]
             query = sql.select(columns)
 
         if self.config.diaobject_index_hint:
             val = self.config.diaobject_index_hint
             query = query.with_hint(table, 'index_rs_asc(%(name)s "{}")'.format(val))
         if self.config.dynamic_sampling_hint > 0:
             val = self.config.dynamic_sampling_hint
             query = query.with_hint(table, 'dynamic_sampling(%(name)s {})'.format(val))
         if self.config.cardinality_hint > 0:
             val = self.config.cardinality_hint
             query = query.with_hint(table, 'FIRST_ROWS_1 cardinality(%(name)s {})'.format(val))
 
         # build selection
         exprlist = []
         for low, upper in pixel_ranges:
             upper -= 1
             if low == upper:
                 exprlist.append(table.c.pixelId == low)
             else:
                 exprlist.append(sql.expression.between(table.c.pixelId, low, upper))
         query = query.where(sql.expression.or_(*exprlist))
 
         # select latest version of objects
         if self.config.dia_object_index != 'last_object_table':
             query = query.where(table.c.validityEnd == None)  # noqa: E711
 
         _LOG.debug("query: %s", query)
 
         if self.config.explain:
             # run the same query with explain
             self._explain(query, self._engine)
 
         # execute select
         with Timer('DiaObject select', self.config.timer):
             with self._engine.begin() as conn:
                 if return_pandas:
                     objects = pandas.read_sql_query(query, conn)
                 else:
                     res = conn.execute(query)
                     objects = self._convertResult(res, "DiaObject")
         _LOG.debug("found %s DiaObjects", len(objects))
         return objects
 

◆ getDiaSources()

def lsst.dax.ppdb.ppdb.Ppdb.getDiaSources	(	self,
		object_ids,
		dt,
		return_pandas = `False`
	)

Returns catalog of DiaSource instances given set of DiaObject IDs.

This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
the schema of PPDB table. Re-mapping of the column names is done for
some columns (based on column map passed to constructor) but types or
units are not changed.

Parameters
----------
object_ids :
    Collection of DiaObject IDs
dt : `datetime.datetime`
    Time of the current visit
return_pandas : `bool`
    Return a `pandas.DataFrame` instead of
    `lsst.afw.table.SourceCatalog`.


Returns
-------
catalog : `lsst.afw.table.SourceCatalog`, `pandas.DataFrame`, or `None`
    Catalog contaning DiaSource records. `None` is returned if
    ``read_sources_months`` configuration parameter is set to 0 or
    when ``object_ids`` is empty.

Definition at line 484 of file ppdb.py.

     def getDiaSources(self, object_ids, dt, return_pandas=False):
         """Returns catalog of DiaSource instances given set of DiaObject IDs.
 
         This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
         the schema of PPDB table. Re-mapping of the column names is done for
         some columns (based on column map passed to constructor) but types or
         units are not changed.
 
         Parameters
         ----------
         object_ids :
             Collection of DiaObject IDs
         dt : `datetime.datetime`
             Time of the current visit
         return_pandas : `bool`
             Return a `pandas.DataFrame` instead of
             `lsst.afw.table.SourceCatalog`.
 
 
         Returns
         -------
         catalog : `lsst.afw.table.SourceCatalog`, `pandas.DataFrame`, or `None`
             Catalog contaning DiaSource records. `None` is returned if
             ``read_sources_months`` configuration parameter is set to 0 or
             when ``object_ids`` is empty.
         """
 
         if self.config.read_sources_months == 0:
             _LOG.info("Skip DiaSources fetching")
             return None
 
         if len(object_ids) <= 0:
             _LOG.info("Skip DiaSources fetching - no Objects")
             # this should create a catalog, but the list of columns may be empty
             return None
 
         table = self._schema.sources
         sources = None
         with Timer('DiaSource select', self.config.timer):
             with _ansi_session(self._engine) as conn:
                 for ids in _split(sorted(object_ids), 1000):
                     query = 'SELECT *  FROM "' + table.name + '" WHERE '
 
                     # select by object id
                     ids = ",".join(str(id) for id in ids)
                     query += '"diaObjectId" IN (' + ids + ') '
 
                     # execute select
                     if return_pandas:
                         df = pandas.read_sql_query(sql.text(query), conn)
                         if sources is None:
                             sources = df
                         else:
                             sources = sources.append(df)
                     else:
                         res = conn.execute(sql.text(query))
                         sources = self._convertResult(res, "DiaSource", sources)
 
         _LOG.debug("found %s DiaSources", len(sources))
         return sources
 

◆ getDiaSourcesInRegion()

def lsst.dax.ppdb.ppdb.Ppdb.getDiaSourcesInRegion	(	self,
		pixel_ranges,
		dt,
		return_pandas = `False`
	)

Returns catalog of DiaSource instances from given region.

Sources are searched based on pixelization index and region is
determined by the set of indices. There is no assumption on a
particular type of index, client is responsible for consistency
when calculating pixelization indices.

This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
the schema of PPDB table. Re-mapping of the column names is done for
some columns (based on column map passed to constructor) but types or
units are not changed.

Parameters
----------
pixel_ranges : `list` of `tuple`
    Sequence of ranges, range is a tuple (minPixelID, maxPixelID).
    This defines set of pixel indices to be included in result.
dt : `datetime.datetime`
    Time of the current visit
return_pandas : `bool`
    Return a `pandas.DataFrame` instead of
    `lsst.afw.table.SourceCatalog`.

Returns
-------
catalog : `lsst.afw.table.SourceCatalog`, `pandas.DataFrame`, or `None`
    Catalog containing DiaSource records. `None` is returned if
    ``read_sources_months`` configuration parameter is set to 0.

Definition at line 425 of file ppdb.py.

     def getDiaSourcesInRegion(self, pixel_ranges, dt, return_pandas=False):
         """Returns catalog of DiaSource instances from given region.
 
         Sources are searched based on pixelization index and region is
         determined by the set of indices. There is no assumption on a
         particular type of index, client is responsible for consistency
         when calculating pixelization indices.
 
         This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
         the schema of PPDB table. Re-mapping of the column names is done for
         some columns (based on column map passed to constructor) but types or
         units are not changed.
 
         Parameters
         ----------
         pixel_ranges : `list` of `tuple`
             Sequence of ranges, range is a tuple (minPixelID, maxPixelID).
             This defines set of pixel indices to be included in result.
         dt : `datetime.datetime`
             Time of the current visit
         return_pandas : `bool`
             Return a `pandas.DataFrame` instead of
             `lsst.afw.table.SourceCatalog`.
 
         Returns
         -------
         catalog : `lsst.afw.table.SourceCatalog`, `pandas.DataFrame`, or `None`
             Catalog containing DiaSource records. `None` is returned if
             ``read_sources_months`` configuration parameter is set to 0.
         """
 
         if self.config.read_sources_months == 0:
             _LOG.info("Skip DiaSources fetching")
             return None
 
         table = self._schema.sources
         query = table.select()
 
         # build selection
         exprlist = []
         for low, upper in pixel_ranges:
             upper -= 1
             if low == upper:
                 exprlist.append(table.c.pixelId == low)
             else:
                 exprlist.append(sql.expression.between(table.c.pixelId, low, upper))
         query = query.where(sql.expression.or_(*exprlist))
 
         # execute select
         with Timer('DiaSource select', self.config.timer):
             with _ansi_session(self._engine) as conn:
                 if return_pandas:
                     sources = pandas.read_sql_query(query, conn)
                 else:
                     res = conn.execute(query)
                     sources = self._convertResult(res, "DiaSource")
         _LOG.debug("found %s DiaSources", len(sources))
         return sources
 

◆ isVisitProcessed()

def lsst.dax.ppdb.ppdb.Ppdb.isVisitProcessed	(	self,
		visitInfo
	)

Test whether data from an image has been loaded into the database.

Used as part of ap_verify metrics.

Parameters
----------
visitInfo : `lsst.afw.image.VisitInfo`
    The metadata for the image of interest.

Returns
-------
isProcessed : `bool`
    `True` if the data are present, `False` otherwise.

Definition at line 802 of file ppdb.py.

     def isVisitProcessed(self, visitInfo):
         """Test whether data from an image has been loaded into the database.
 
         Used as part of ap_verify metrics.
 
         Parameters
         ----------
         visitInfo : `lsst.afw.image.VisitInfo`
             The metadata for the image of interest.
 
         Returns
         -------
         isProcessed : `bool`
             `True` if the data are present, `False` otherwise.
         """
         id = visitInfo.getExposureId()
         table = self._schema.sources
         idField = table.c.ccdVisitId
 
         # Hopefully faster than SELECT DISTINCT
         query = sql.select([idField]).select_from(table) \
             .where(idField == id).limit(1)
 
         return self._engine.scalar(query) is not None
 

◆ lastVisit()

def lsst.dax.ppdb.ppdb.Ppdb.lastVisit ( self )

Returns last visit information or `None` if visits table is empty.

Visits table is used by ap_proto to track visit information, it is
not a part of the regular PPDB schema.

Returns
-------
visit : `Visit` or `None`
    Last stored visit info or `None` if there was nothing stored yet.

Definition at line 268 of file ppdb.py.

     def lastVisit(self):
         """Returns last visit information or `None` if visits table is empty.
 
         Visits table is used by ap_proto to track visit information, it is
         not a part of the regular PPDB schema.
 
         Returns
         -------
         visit : `Visit` or `None`
             Last stored visit info or `None` if there was nothing stored yet.
         """
 
         with self._engine.begin() as conn:
 
             stmnt = sql.select([sql.func.max(self._schema.visits.c.visitId),
                                 sql.func.max(self._schema.visits.c.visitTime)])
             res = conn.execute(stmnt)
             row = res.fetchone()
             if row[0] is None:
                 return None
 
             visitId = row[0]
             visitTime = row[1]
             _LOG.info("lastVisit: visitId: %s visitTime: %s (%s)", visitId,
                       visitTime, type(visitTime))
 
             # get max IDs from corresponding tables
             stmnt = sql.select([sql.func.max(self._schema.objects.c.diaObjectId)])
             lastObjectId = conn.scalar(stmnt)
             stmnt = sql.select([sql.func.max(self._schema.sources.c.diaSourceId)])
             lastSourceId = conn.scalar(stmnt)
 
             return Visit(visitId=visitId, visitTime=visitTime,
                          lastObjectId=lastObjectId, lastSourceId=lastSourceId)
 

◆ makeSchema()

def lsst.dax.ppdb.ppdb.Ppdb.makeSchema	(	self,
		drop = `False`,
		mysql_engine = `'InnoDB'`,
		oracle_tablespace = `None`,
		oracle_iot = `False`
	)

Create or re-create all tables.

Parameters
----------
drop : `bool`
    If True then drop tables before creating new ones.
mysql_engine : `str`, optional
    Name of the MySQL engine to use for new tables.
oracle_tablespace : `str`, optional
    Name of Oracle tablespace.
oracle_iot : `bool`, optional
    Make Index-organized DiaObjectLast table.

Definition at line 856 of file ppdb.py.

     def makeSchema(self, drop=False, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False):
         """Create or re-create all tables.
 
         Parameters
         ----------
         drop : `bool`
             If True then drop tables before creating new ones.
         mysql_engine : `str`, optional
             Name of the MySQL engine to use for new tables.
         oracle_tablespace : `str`, optional
             Name of Oracle tablespace.
         oracle_iot : `bool`, optional
             Make Index-organized DiaObjectLast table.
         """
         self._schema.makeSchema(drop=drop, mysql_engine=mysql_engine,
                                 oracle_tablespace=oracle_tablespace,
                                 oracle_iot=oracle_iot)
 

◆ saveVisit()

def lsst.dax.ppdb.ppdb.Ppdb.saveVisit	(	self,
		visitId,
		visitTime
	)

Store visit information.

This method is only used by ``ap_proto`` script from ``l1dbproto``
and is not intended for production pipelines.

Parameters
----------
visitId : `int`
    Visit identifier
visitTime : `datetime.datetime`
    Visit timestamp.

Definition at line 303 of file ppdb.py.

     def saveVisit(self, visitId, visitTime):
         """Store visit information.
 
         This method is only used by ``ap_proto`` script from ``l1dbproto``
         and is not intended for production pipelines.
 
         Parameters
         ----------
         visitId : `int`
             Visit identifier
         visitTime : `datetime.datetime`
             Visit timestamp.
         """
 
         ins = self._schema.visits.insert().values(visitId=visitId,
                                                   visitTime=visitTime)
         self._engine.execute(ins)
 

◆ storeDiaForcedSources()

def lsst.dax.ppdb.ppdb.Ppdb.storeDiaForcedSources	(	self,
		sources
	)

Store a set of DIAForcedSources from current visit.

This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
compatible with the schema of L1 database table:

  - column names must correspond to database table columns
  - some columns names may be re-mapped based on column map passed to
    constructor
  - types and units of the columns must match database definitions,
    no unit conversion is performed presently
  - columns that have default values in database schema can be
    omitted from afw schema

Parameters
----------
sources : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame`
    Catalog containing DiaForcedSource records

Definition at line 748 of file ppdb.py.

     def storeDiaForcedSources(self, sources):
         """Store a set of DIAForcedSources from current visit.
 
         This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
         compatible with the schema of L1 database table:
 
           - column names must correspond to database table columns
           - some columns names may be re-mapped based on column map passed to
             constructor
           - types and units of the columns must match database definitions,
             no unit conversion is performed presently
           - columns that have default values in database schema can be
             omitted from afw schema
 
         Parameters
         ----------
         sources : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame`
             Catalog containing DiaForcedSource records
         """
 
         # everything to be done in single transaction
         with _ansi_session(self._engine) as conn:
 
             if isinstance(sources, pandas.DataFrame):
                 with Timer("DiaForcedSource insert", self.config.timer):
                     sources.to_sql("DiaForcedSource", conn, if_exists='append',
                                    index=False)
             else:
                 table = self._schema.forcedSources
                 self._storeObjectsAfw(sources, conn, table, "DiaForcedSource")
 

◆ storeDiaObjects()

def lsst.dax.ppdb.ppdb.Ppdb.storeDiaObjects	(	self,
		objs,
		dt
	)

Store catalog of DiaObjects from current visit.

This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
compatible with the schema of PPDB table:

  - column names must correspond to database table columns
  - some columns names are re-mapped based on column map passed to
    constructor
  - types and units of the columns must match database definitions,
    no unit conversion is performed presently
  - columns that have default values in database schema can be
    omitted from afw schema
  - this method knows how to fill interval-related columns
    (validityStart, validityEnd) they do not need to appear in
    afw schema

Parameters
----------
objs : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame`
    Catalog with DiaObject records
dt : `datetime.datetime`
    Time of the visit

Definition at line 608 of file ppdb.py.

     def storeDiaObjects(self, objs, dt):
         """Store catalog of DiaObjects from current visit.
 
         This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
         compatible with the schema of PPDB table:
 
           - column names must correspond to database table columns
           - some columns names are re-mapped based on column map passed to
             constructor
           - types and units of the columns must match database definitions,
             no unit conversion is performed presently
           - columns that have default values in database schema can be
             omitted from afw schema
           - this method knows how to fill interval-related columns
             (validityStart, validityEnd) they do not need to appear in
             afw schema
 
         Parameters
         ----------
         objs : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame`
             Catalog with DiaObject records
         dt : `datetime.datetime`
             Time of the visit
         """
 
         if isinstance(objs, pandas.DataFrame):
             ids = sorted(objs['diaObjectId'])
         else:
             ids = sorted([obj['id'] for obj in objs])
         _LOG.debug("first object ID: %d", ids[0])
 
         # NOTE: workaround for sqlite, need this here to avoid
         # "database is locked" error.
         table = self._schema.objects
 
         # everything to be done in single transaction
         with _ansi_session(self._engine) as conn:
 
             ids = ",".join(str(id) for id in ids)
 
             if self.config.dia_object_index == 'last_object_table':
 
                 # insert and replace all records in LAST table, mysql and postgres have
                 # non-standard features (handled in _storeObjectsAfw)
                 table = self._schema.objects_last
                 do_replace = self.config.object_last_replace
                 # If the input data is of type Pandas, we drop the previous
                 # objects regardless of the do_replace setting due to how
                 # Pandas inserts objects.
                 if not do_replace or isinstance(objs, pandas.DataFrame):
                     query = 'DELETE FROM "' + table.name + '" '
                     query += 'WHERE "diaObjectId" IN (' + ids + ') '
 
                     if self.config.explain:
                         # run the same query with explain
                         self._explain(query, conn)
 
                     with Timer(table.name + ' delete', self.config.timer):
                         res = conn.execute(sql.text(query))
                     _LOG.debug("deleted %s objects", res.rowcount)
 
                 extra_columns = dict(lastNonForcedSource=dt)
                 if isinstance(objs, pandas.DataFrame):
                     with Timer("DiaObjectLast insert", self.config.timer):
                         for col, data in extra_columns.items():
                             objs[col] = data
                         objs.to_sql("DiaObjectLast", conn, if_exists='append',
                                     index=False)
                 else:
                     self._storeObjectsAfw(objs, conn, table, "DiaObjectLast",
                                           replace=do_replace,
                                           extra_columns=extra_columns)
 
             else:
 
                 # truncate existing validity intervals
                 table = self._schema.objects
                 query = 'UPDATE "' + table.name + '" '
                 query += "SET \"validityEnd\" = '" + str(dt) + "' "
                 query += 'WHERE "diaObjectId" IN (' + ids + ') '
                 query += 'AND "validityEnd" IS NULL'
 
                 # _LOG.debug("query: %s", query)
 
                 if self.config.explain:
                     # run the same query with explain
                     self._explain(query, conn)
 
                 with Timer(table.name + ' truncate', self.config.timer):
                     res = conn.execute(sql.text(query))
                 _LOG.debug("truncated %s intervals", res.rowcount)
 
             # insert new versions
             if self.config.dia_object_nightly:
                 table = self._schema.objects_nightly
             else:
                 table = self._schema.objects
             extra_columns = dict(lastNonForcedSource=dt, validityStart=dt,
                                  validityEnd=None)
             if isinstance(objs, pandas.DataFrame):
                 with Timer("DiaObject insert", self.config.timer):
                     for col, data in extra_columns.items():
                         objs[col] = data
                     objs.to_sql("DiaObject", conn, if_exists='append',
                                 index=False)
             else:
                 self._storeObjectsAfw(objs, conn, table, "DiaObject",
                                       extra_columns=extra_columns)
 

◆ storeDiaSources()

def lsst.dax.ppdb.ppdb.Ppdb.storeDiaSources	(	self,
		sources
	)

Store catalog of DIASources from current visit.

This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
compatible with the schema of L1 database table:

  - column names must correspond to database table columns
  - some columns names may be re-mapped based on column map passed to
    constructor
  - types and units of the columns must match database definitions,
    no unit conversion is performed presently
  - columns that have default values in database schema can be
    omitted from afw schema

Parameters
----------
sources : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame`
    Catalog containing DiaSource records

Definition at line 717 of file ppdb.py.

     def storeDiaSources(self, sources):
         """Store catalog of DIASources from current visit.
 
         This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
         compatible with the schema of L1 database table:
 
           - column names must correspond to database table columns
           - some columns names may be re-mapped based on column map passed to
             constructor
           - types and units of the columns must match database definitions,
             no unit conversion is performed presently
           - columns that have default values in database schema can be
             omitted from afw schema
 
         Parameters
         ----------
         sources : `lsst.afw.table.BaseCatalog` or `pandas.DataFrame`
             Catalog containing DiaSource records
         """
 
         # everything to be done in single transaction
         with _ansi_session(self._engine) as conn:
 
             if isinstance(sources, pandas.DataFrame):
                 with Timer("DiaSource insert", self.config.timer):
                     sources.to_sql("DiaSource", conn, if_exists='append',
                                    index=False)
             else:
                 table = self._schema.sources
                 self._storeObjectsAfw(sources, conn, table, "DiaSource")
 

◆ tableRowCount()

def lsst.dax.ppdb.ppdb.Ppdb.tableRowCount ( self )

Returns dictionary with the table names and row counts.

Used by ``ap_proto`` to keep track of the size of the database tables.
Depending on database technology this could be expensive operation.

Returns
-------
row_counts : `dict`
    Dict where key is a table name and value is a row count.

Definition at line 321 of file ppdb.py.

     def tableRowCount(self):
         """Returns dictionary with the table names and row counts.
 
         Used by ``ap_proto`` to keep track of the size of the database tables.
         Depending on database technology this could be expensive operation.
 
         Returns
         -------
         row_counts : `dict`
             Dict where key is a table name and value is a row count.
         """
         res = {}
         tables = [self._schema.objects, self._schema.sources, self._schema.forcedSources]
         if self.config.dia_object_index == 'last_object_table':
             tables.append(self._schema.objects_last)
         for table in tables:
             stmt = sql.select([func.count()]).select_from(table)
             count = self._engine.scalar(stmt)
             res[table.name] = count
 
         return res
 

Member Data Documentation

◆ config

lsst.dax.ppdb.ppdb.Ppdb.config

Definition at line 228 of file ppdb.py.

The documentation for this class was generated from the following file:

/j/snowflake/release/lsstsw/stack/Linux64/dax_ppdb/19.0.0+1/python/lsst/dax/ppdb/ppdb.py

Public Member Functions

Public Attributes