doxygen/xlink_master_2019_02_15_09.16.46/ppdb_8py_source.html

 # This file is part of dax_ppdb.
 #
 # Developed for the LSST Data Management System.
 # This product includes software developed by the LSST Project
 # (http://www.lsst.org).
 # See the COPYRIGHT file at the top-level directory of this distribution
 # for details of code ownership.
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

 """Module defining Ppdb class and related methods.
 """

 __all__ = ["PpdbConfig", "Ppdb", "Visit"]

 from collections import namedtuple
 from contextlib import contextmanager
 from datetime import datetime
 import logging
 import numpy as np
 import os

 import lsst.geom as geom
 import lsst.afw.table as afwTable
 import lsst.pex.config as pexConfig
 from lsst.pex.config import Field, ChoiceField, ListField
 from lsst.utils import getPackageDir
 import sqlalchemy
 from sqlalchemy import (func, sql)
 from sqlalchemy.pool import NullPool
 from . import timer, ppdbSchema


 _LOG = logging.getLogger(__name__.partition(".")[2])  # strip leading "lsst."


 class Timer(object):
     """Timer class defining context manager which tracks execution timing.

     Typical use:

         with Timer("timer_name"):
             do_something

     On exit from block it will print elapsed time.

     See also :py:mod:`timer` module.
     """
     def __init__(self, name, do_logging=True, log_before_cursor_execute=False):
         self._log_before_cursor_execute = log_before_cursor_execute
         self._do_logging = do_logging
         self._timer1 = timer.Timer(name)
         self._timer2 = timer.Timer(name + " (before/after cursor)")

     def __enter__(self):
         """
         Enter context, start timer
         """
 #         event.listen(engine.Engine, "before_cursor_execute", self._start_timer)
 #         event.listen(engine.Engine, "after_cursor_execute", self._stop_timer)
         self._timer1.start()
         return self

     def __exit__(self, exc_type, exc_val, exc_tb):
         """
         Exit context, stop and dump timer
         """
         if exc_type is None:
             self._timer1.stop()
             if self._do_logging:
                 self._timer1.dump()
 #         event.remove(engine.Engine, "before_cursor_execute", self._start_timer)
 #         event.remove(engine.Engine, "after_cursor_execute", self._stop_timer)
         return False

     def _start_timer(self, conn, cursor, statement, parameters, context, executemany):
         """Start counting"""
         if self._log_before_cursor_execute:
             _LOG.info("before_cursor_execute")
         self._timer2.start()

     def _stop_timer(self, conn, cursor, statement, parameters, context, executemany):
         """Stop counting"""
         self._timer2.stop()
         if self._do_logging:
             self._timer2.dump()


 def _split(seq, nItems):
     """Split a sequence into smaller sequences"""
     seq = list(seq)
     while seq:
         yield seq[:nItems]
         del seq[:nItems]


 # Information about single visit
 Visit = namedtuple('Visit', 'visitId visitTime lastObjectId lastSourceId')


 @contextmanager
 def _ansi_session(engine):
     """Returns a connection, makes sure that ANSI mode is set for MySQL
     """
     with engine.begin() as conn:
         if engine.name == 'mysql':
             conn.execute(sql.text("SET SESSION SQL_MODE = 'ANSI'"))
         yield conn
     return


 def _data_file_name(basename):
     """Return path name of a data file.
     """
     return os.path.join(getPackageDir("dax_ppdb"), "data", basename)


 class PpdbConfig(pexConfig.Config):

     db_url = Field(dtype=str, doc="SQLAlchemy database connection URI")
     isolation_level = ChoiceField(dtype=str,
                                   doc="Transaction isolation level",
                                   allowed={"READ_COMMITTED": "Read committed",
                                            "READ_UNCOMMITTED": "Read uncommitted",
                                            "REPEATABLE_READ": "Repeatable read",
                                            "SERIALIZABLE": "Serializable"},
                                   default="READ_COMMITTED")
     connection_pool = Field(dtype=bool,
                             doc=("If False then disable SQLAlchemy connection pool. "
                                  "Do not use connection pool when forking."),
                             default=True)
     connection_timeout = Field(dtype=float,
                                doc="Maximum time to wait time for database lock to be released before "
                                    "exiting. Defaults to sqlachemy defaults if not set.",
                                default=None)
     sql_echo = Field(dtype=bool,
                      doc="If True then pass SQLAlchemy echo option.",
                      default=False)
     dia_object_index = ChoiceField(dtype=str,
                                    doc="Indexing mode for DiaObject table",
                                    allowed={'baseline': "Index defined in baseline schema",
                                             'pix_id_iov': "(pixelId, objectId, iovStart) PK",
                                             'last_object_table': "Separate DiaObjectLast table"},
                                    default='baseline')
     dia_object_nightly = Field(dtype=bool,
                                doc="Use separate nightly table for DiaObject",
                                default=False)
     read_sources_months = Field(dtype=int,
                                 doc="Number of months of history to read from DiaSource",
                                 default=12)
     read_forced_sources_months = Field(dtype=int,
                                        doc="Number of months of history to read from DiaForcedSource",
                                        default=12)
     dia_object_columns = ListField(dtype=str,
                                    doc="List of columns to read from DiaObject, by default read all columns",
                                    default=[])
     object_last_replace = Field(dtype=bool,
                                 doc="If True (default) then use \"upsert\" for DiaObjectsLast table",
                                 default=True)
     schema_file = Field(dtype=str,
                         doc="Location of (YAML) configuration file with standard schema",
                         default=_data_file_name("ppdb-schema.yaml"))
     extra_schema_file = Field(dtype=str,
                               doc="Location of (YAML) configuration file with extra schema",
                               default=_data_file_name("ppdb-schema-extra.yaml"))
     column_map = Field(dtype=str,
                        doc="Location of (YAML) configuration file with column mapping",
                        default=_data_file_name("ppdb-afw-map.yaml"))
     prefix = Field(dtype=str,
                    doc="Prefix to add to table names and index names",
                    default="")
     explain = Field(dtype=bool,
                     doc="If True then run EXPLAIN SQL command on each executed query",
                     default=False)
     timer = Field(dtype=bool,
                   doc="If True then print/log timing information",
                   default=False)
     diaobject_index_hint = Field(dtype=str,
                                  doc="Name of the index to use with Oracle index hint",
                                  default=None)
     dynamic_sampling_hint = Field(dtype=int,
                                   doc="If non-zero then use dynamic_sampling hint",
                                   default=0)
     cardinality_hint = Field(dtype=int,
                              doc="If non-zero then use cardinality hint",
                              default=0)

     def validate(self):
         if self.isolation_level == "READ_COMMITTED" and self.db_url.startswith("sqlite"):
             raise ValueError("Attempting to run Ppdb with SQLITE and isolation level 'READ_COMMITTED.' "
                              "Use 'READ_UNCOMMITTED' instead.")


 class Ppdb(object):
     """Interface to L1 database, hides all database access details.

     The implementation is configured via standard ``pex_config`` mechanism
     using `PpdbConfig` configuration class. For an example of different
     configurations check config/ folder.

     Parameters
     ----------
     config : `PpdbConfig`
     afw_schemas : `dict`, optional
         Dictionary with table name for a key and `afw.table.Schema`
         for a value. Columns in schema will be added to standard
         PPDB schema.
     """

     def __init__(self, config, afw_schemas=None):

         self.config = config

         # logging.getLogger('sqlalchemy').setLevel(logging.INFO)
         _LOG.info("PPDB Configuration:")
         _LOG.info("    dia_object_index: %s", self.config.dia_object_index)
         _LOG.info("    dia_object_nightly: %s", self.config.dia_object_nightly)
         _LOG.info("    read_sources_months: %s", self.config.read_sources_months)
         _LOG.info("    read_forced_sources_months: %s", self.config.read_forced_sources_months)
         _LOG.info("    dia_object_columns: %s", self.config.dia_object_columns)
         _LOG.info("    object_last_replace: %s", self.config.object_last_replace)
         _LOG.info("    schema_file: %s", self.config.schema_file)
         _LOG.info("    extra_schema_file: %s", self.config.extra_schema_file)
         _LOG.info("    column_map: %s", self.config.column_map)
         _LOG.info("    schema prefix: %s", self.config.prefix)

         # engine is reused between multiple processes, make sure that we don't
         # share connections by disabling pool (by using NullPool class)
         kw = dict(echo=self.config.sql_echo)
         conn_args = dict()
         if not self.config.connection_pool:
             kw.update(poolclass=NullPool)
         if self.config.isolation_level is not None:
             kw.update(isolation_level=self.config.isolation_level)
         if self.config.connection_timeout is not None:
             if self.config.db_url.startswith("sqlite"):
                 conn_args.update(timeout=self.config.connection_timeout)
             elif self.config.db_url.startswith(("postgresql", "mysql")):
                 conn_args.update(connect_timeout=self.config.connection_timeout)
         kw.update(connect_args=conn_args)
         self._engine = sqlalchemy.create_engine(self.config.db_url, **kw)

         self._schema = ppdbSchema.PpdbSchema(engine=self._engine,
                                              dia_object_index=self.config.dia_object_index,
                                              dia_object_nightly=self.config.dia_object_nightly,
                                              schema_file=self.config.schema_file,
                                              extra_schema_file=self.config.extra_schema_file,
                                              column_map=self.config.column_map,
                                              afw_schemas=afw_schemas,
                                              prefix=self.config.prefix)

     def lastVisit(self):
         """Returns last visit information or `None` if visits table is empty.

         Visits table is used by ap_proto to track visit information, it is
         not a part of the regular PPDB schema.

         Returns
         -------
         visit : `Visit` or `None`
             Last stored visit info or `None` if there was nothing stored yet.
         """

         with self._engine.begin() as conn:

             stmnt = sql.select([sql.func.max(self._schema.visits.c.visitId),
                                 sql.func.max(self._schema.visits.c.visitTime)])
             res = conn.execute(stmnt)
             row = res.fetchone()
             if row[0] is None:
                 return None

             visitId = row[0]
             visitTime = row[1]
             _LOG.info("lastVisit: visitId: %s visitTime: %s (%s)", visitId,
                       visitTime, type(visitTime))

             # get max IDs from corresponding tables
             stmnt = sql.select([sql.func.max(self._schema.objects.c.diaObjectId)])
             lastObjectId = conn.scalar(stmnt)
             stmnt = sql.select([sql.func.max(self._schema.sources.c.diaSourceId)])
             lastSourceId = conn.scalar(stmnt)

             return Visit(visitId=visitId, visitTime=visitTime,
                          lastObjectId=lastObjectId, lastSourceId=lastSourceId)

     def saveVisit(self, visitId, visitTime):
         """Store visit information.

         This method is only used by ``ap_proto`` script from ``l1dbproto``
         and is not intended for production pipelines.

         Parameters
         ----------
         visitId : `int`
             Visit identifier
         visitTime : `datetime.datetime`
             Visit timestamp.
         """

         ins = self._schema.visits.insert().values(visitId=visitId,
                                                   visitTime=visitTime)
         self._engine.execute(ins)

     def tableRowCount(self):
         """Returns dictionary with the table names and row counts.

         Used by ``ap_proto`` to keep track of the size of the database tables.
         Depending on database technology this could be expensive operation.

         Returns
         -------
         row_counts : `dict`
             Dict where key is a table name and value is a row count.
         """
         res = {}
         tables = [self._schema.objects, self._schema.sources, self._schema.forcedSources]
         if self.config.dia_object_index == 'last_object_table':
             tables.append(self._schema.objects_last)
         for table in tables:
             stmt = sql.select([func.count()]).select_from(table)
             count = self._engine.scalar(stmt)
             res[table.name] = count

         return res

     def getDiaObjects(self, pixel_ranges):
         """Returns catalog of DiaObject instances from given region.

         Objects are searched based on pixelization index and region is
         determined by the set of indices. There is no assumption on a
         particular type of index, client is responsible for consistency
         when calculating pixelization indices.

         This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
         the schema of PPDB table. Re-mapping of the column names is done for
         some columns (based on column map passed to constructor) but types
         or units are not changed.

         Returns only the last version of each DiaObject.

         Parameters
         ----------
         pixel_ranges : `list` of `tuple`
             Sequence of ranges, range is a tuple (minPixelID, maxPixelID).
             This defines set of pixel indices to be included in result.

         Returns
         -------
         catalog : `lsst.afw.table.SourceCatalog`
             Catalog contaning DiaObject records.
         """

         # decide what columns we need
         if self.config.dia_object_index == 'last_object_table':
             table = self._schema.objects_last
         else:
             table = self._schema.objects
         if not self.config.dia_object_columns:
             query = table.select()
         else:
             columns = [table.c[col] for col in self.config.dia_object_columns]
             query = sql.select(columns)

         if self.config.diaobject_index_hint:
             val = self.config.diaobject_index_hint
             query = query.with_hint(table, 'index_rs_asc(%(name)s "{}")'.format(val))
         if self.config.dynamic_sampling_hint > 0:
             val = self.config.dynamic_sampling_hint
             query = query.with_hint(table, 'dynamic_sampling(%(name)s {})'.format(val))
         if self.config.cardinality_hint > 0:
             val = self.config.cardinality_hint
             query = query.with_hint(table, 'FIRST_ROWS_1 cardinality(%(name)s {})'.format(val))

         # build selection
         exprlist = []
         for low, upper in pixel_ranges:
             upper -= 1
             if low == upper:
                 exprlist.append(table.c.pixelId == low)
             else:
                 exprlist.append(sql.expression.between(table.c.pixelId, low, upper))
         query = query.where(sql.expression.or_(*exprlist))

         # select latest version of objects
         if self.config.dia_object_index != 'last_object_table':
             query = query.where(table.c.validityEnd == None)  # noqa: E711

         _LOG.debug("query: %s", query)

         if self.config.explain:
             # run the same query with explain
             self._explain(query, self._engine)

         # execute select
         with Timer('DiaObject select', self.config.timer):
             with self._engine.begin() as conn:
                 res = conn.execute(query)
                 objects = self._convertResult(res, "DiaObject")
         _LOG.debug("found %s DiaObjects", len(objects))
         return objects

     def getDiaSourcesInRegion(self, pixel_ranges, dt):
         """Returns catalog of DiaSource instances from given region.

         Sources are searched based on pixelization index and region is
         determined by the set of indices. There is no assumption on a
         particular type of index, client is responsible for consistency
         when calculating pixelization indices.

         This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
         the schema of PPDB table. Re-mapping of the column names is done for
         some columns (based on column map passed to constructor) but types or
         units are not changed.

         Parameters
         ----------
         pixel_ranges : `list` of `tuple`
             Sequence of ranges, range is a tuple (minPixelID, maxPixelID).
             This defines set of pixel indices to be included in result.
         dt : `datetime.datetime`
             Time of the current visit

         Returns
         -------
         catalog : `lsst.afw.table.SourceCatalog` or `None`
             Catalog contaning DiaSource records. `None` is returned if
             ``read_sources_months`` configuration parameter is set to 0.
         """

         if self.config.read_sources_months == 0:
             _LOG.info("Skip DiaSources fetching")
             return None

         table = self._schema.sources
         query = table.select()

         # build selection
         exprlist = []
         for low, upper in pixel_ranges:
             upper -= 1
             if low == upper:
                 exprlist.append(table.c.pixelId == low)
             else:
                 exprlist.append(sql.expression.between(table.c.pixelId, low, upper))
         query = query.where(sql.expression.or_(*exprlist))

         # execute select
         with Timer('DiaSource select', self.config.timer):
             with _ansi_session(self._engine) as conn:
                 res = conn.execute(query)
                 sources = self._convertResult(res, "DiaSource")
         _LOG.debug("found %s DiaSources", len(sources))
         return sources

     def getDiaSources(self, object_ids, dt):
         """Returns catalog of DiaSource instances given set of DiaObject IDs.

         This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
         the schema of PPDB table. Re-mapping of the column names is done for
         some columns (based on column map passed to constructor) but types or
         units are not changed.

         Parameters
         ----------
         object_ids :
             Collection of DiaObject IDs
         dt : `datetime.datetime`
             Time of the current visit

         Returns
         -------
         catalog : `lsst.afw.table.SourceCatalog` or `None`
             Catalog contaning DiaSource records. `None` is returned if
             ``read_sources_months`` configuration parameter is set to 0 or
             when ``object_ids`` is empty.
         """

         if self.config.read_sources_months == 0:
             _LOG.info("Skip DiaSources fetching")
             return None

         if len(object_ids) <= 0:
             _LOG.info("Skip DiaSources fetching - no Objects")
             # this should create a catalog, but the list of columns may be empty
             return None

         table = self._schema.sources
         sources = None
         with Timer('DiaSource select', self.config.timer):
             with _ansi_session(self._engine) as conn:
                 for ids in _split(sorted(object_ids), 1000):
                     query = 'SELECT *  FROM "' + table.name + '" WHERE '

                     # select by object id
                     ids = ",".join(str(id) for id in ids)
                     query += '"diaObjectId" IN (' + ids + ') '

                     # execute select
                     res = conn.execute(sql.text(query))
                     sources = self._convertResult(res, "DiaSource", sources)

         _LOG.debug("found %s DiaSources", len(sources))
         return sources

     def getDiaForcedSources(self, object_ids, dt):
         """Returns catalog of DiaForcedSource instances matching given
         DiaObjects.

         This method returns :doc:`/modules/lsst.afw.table/index` catalog with schema determined by
         the schema of L1 database table. Re-mapping of the column names may
         be done for some columns (based on column map passed to constructor)
         but types or units are not changed.

         Parameters
         ----------
         object_ids :
             Collection of DiaObject IDs
         dt : `datetime.datetime`
             Time of the current visit

         Returns
         -------
         catalog : `lsst.afw.table.SourceCatalog` or `None`
             Catalog contaning DiaForcedSource records. `None` is returned if
             ``read_sources_months`` configuration parameter is set to 0 or
             when ``object_ids`` is empty.
         """

         if self.config.read_forced_sources_months == 0:
             _LOG.info("Skip DiaForceSources fetching")
             return None

         if not object_ids:
             _LOG.info("Skip DiaForceSources fetching - no Objects")
             # this should create a catalog, but the list of columns may be empty
             return None

         table = self._schema.forcedSources
         sources = None

         with Timer('DiaForcedSource select', self.config.timer):
             with _ansi_session(self._engine) as conn:
                 for ids in _split(sorted(object_ids), 1000):

                     query = 'SELECT *  FROM "' + table.name + '" WHERE '

                     # select by object id
                     ids = ",".join(str(id) for id in ids)
                     query += '"diaObjectId" IN (' + ids + ') '

                     # execute select
                     res = conn.execute(sql.text(query))
                     sources = self._convertResult(res, "DiaForcedSource", sources)

         _LOG.debug("found %s DiaForcedSources", len(sources))
         return sources

     def storeDiaObjects(self, objs, dt):
         """Store catalog of DiaObjects from current visit.

         This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
         compatible with the schema of PPDB table:

           - column names must correspond to database table columns
           - some columns names are re-mapped based on column map passed to
             constructor
           - types and units of the columns must match database definitions,
             no unit conversion is performed presently
           - columns that have default values in database schema can be
             omitted from afw schema
           - this method knows how to fill interval-related columns
             (validityStart, validityEnd) they do not need to appear in
             afw schema

         Parameters
         ----------
         objs : `lsst.afw.table.BaseCatalog`
             Catalog with DiaObject records
         dt : `datetime.datetime`
             Time of the visit
         """

         ids = sorted([obj['id'] for obj in objs])
         _LOG.debug("first object ID: %d", ids[0])

         # NOTE: workaround for sqlite, need this here to avoid
         # "database is locked" error.
         table = self._schema.objects

         # everything to be done in single transaction
         with _ansi_session(self._engine) as conn:

             ids = ",".join(str(id) for id in ids)

             if self.config.dia_object_index == 'last_object_table':

                 # insert and replace all records in LAST table, mysql and postgres have
                 # non-standard features (handled in _storeObjectsAfw)
                 table = self._schema.objects_last
                 do_replace = self.config.object_last_replace
                 if not do_replace:
                     query = 'DELETE FROM "' + table.name + '" '
                     query += 'WHERE "diaObjectId" IN (' + ids + ') '

                     if self.config.explain:
                         # run the same query with explain
                         self._explain(query, conn)

                     with Timer(table.name + ' delete', self.config.timer):
                         res = conn.execute(sql.text(query))
                     _LOG.debug("deleted %s objects", res.rowcount)

                 extra_columns = dict(lastNonForcedSource=dt)
                 self._storeObjectsAfw(objs, conn, table, "DiaObjectLast",
                                       replace=do_replace,
                                       extra_columns=extra_columns)

             else:

                 # truncate existing validity intervals
                 table = self._schema.objects
                 query = 'UPDATE "' + table.name + '" '
                 query += "SET \"validityEnd\" = '" + str(dt) + "' "
                 query += 'WHERE "diaObjectId" IN (' + ids + ') '
                 query += 'AND "validityEnd" IS NULL'

                 # _LOG.debug("query: %s", query)

                 if self.config.explain:
                     # run the same query with explain
                     self._explain(query, conn)

                 with Timer(table.name + ' truncate', self.config.timer):
                     res = conn.execute(sql.text(query))
                 _LOG.debug("truncated %s intervals", res.rowcount)

             # insert new versions
             if self.config.dia_object_nightly:
                 table = self._schema.objects_nightly
             else:
                 table = self._schema.objects
             extra_columns = dict(lastNonForcedSource=dt, validityStart=dt,
                                  validityEnd=None)
             self._storeObjectsAfw(objs, conn, table, "DiaObject",
                                   extra_columns=extra_columns)

     def storeDiaSources(self, sources):
         """Store catalog of DIASources from current visit.

         This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
         compatible with the schema of L1 database table:

           - column names must correspond to database table columns
           - some columns names may be re-mapped based on column map passed to
             constructor
           - types and units of the columns must match database definitions,
             no unit conversion is performed presently
           - columns that have default values in database schema can be
             omitted from afw schema

         Parameters
         ----------
         sources : `lsst.afw.table.BaseCatalog`
             Catalog containing DiaSource records
         """

         # everything to be done in single transaction
         with _ansi_session(self._engine) as conn:

             table = self._schema.sources
             self._storeObjectsAfw(sources, conn, table, "DiaSource")

     def storeDiaForcedSources(self, sources):
         """Store a set of DIAForcedSources from current visit.

         This methods takes :doc:`/modules/lsst.afw.table/index` catalog, its schema must be
         compatible with the schema of L1 database table:

           - column names must correspond to database table columns
           - some columns names may be re-mapped based on column map passed to
             constructor
           - types and units of the columns must match database definitions,
             no unit conversion is performed presently
           - columns that have default values in database schema can be
             omitted from afw schema

         Parameters
         ----------
         sources : `lsst.afw.table.BaseCatalog`
             Catalog containing DiaForcedSource records
         """

         # everything to be done in single transaction
         with _ansi_session(self._engine) as conn:

             table = self._schema.forcedSources
             self._storeObjectsAfw(sources, conn, table, "DiaForcedSource")

     def dailyJob(self):
         """Implement daily activities like cleanup/vacuum.

         What should be done during daily cleanup is determined by
         configuration/schema.
         """

         # move data from DiaObjectNightly into DiaObject
         if self.config.dia_object_nightly:
             with _ansi_session(self._engine) as conn:
                 query = 'INSERT INTO "' + self._schema.objects.name + '" '
                 query += 'SELECT * FROM "' + self._schema.objects_nightly.name + '"'
                 with Timer('DiaObjectNightly copy', self.config.timer):
                     conn.execute(sql.text(query))

                 query = 'DELETE FROM "' + self._schema.objects_nightly.name + '"'
                 with Timer('DiaObjectNightly delete', self.config.timer):
                     conn.execute(sql.text(query))

         if self._engine.name == 'postgresql':

             # do VACUUM on all tables
             _LOG.info("Running VACUUM on all tables")
             connection = self._engine.raw_connection()
             ISOLATION_LEVEL_AUTOCOMMIT = 0
             connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
             cursor = connection.cursor()
             cursor.execute("VACUUM ANALYSE")

     def makeSchema(self, drop=False, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False):
         """Create or re-create all tables.

         Parameters
         ----------
         drop : `bool`
             If True then drop tables before creating new ones.
         mysql_engine : `str`, optional
             Name of the MySQL engine to use for new tables.
         oracle_tablespace : `str`, optional
             Name of Oracle tablespace.
         oracle_iot : `bool`, optional
             Make Index-organized DiaObjectLast table.
         """
         self._schema.makeSchema(drop=drop, mysql_engine=mysql_engine,
                                 oracle_tablespace=oracle_tablespace,
                                 oracle_iot=oracle_iot)

     def _explain(self, query, conn):
         """Run the query with explain
         """

         _LOG.info("explain for query: %s...", query[:64])

         if conn.engine.name == 'mysql':
             query = "EXPLAIN EXTENDED " + query
         else:
             query = "EXPLAIN " + query

         res = conn.execute(sql.text(query))
         if res.returns_rows:
             _LOG.info("explain: %s", res.keys())
             for row in res:
                 _LOG.info("explain: %s", row)
         else:
             _LOG.info("EXPLAIN returned nothing")

     def _storeObjectsAfw(self, objects, conn, table, schema_table_name,
                          replace=False, extra_columns=None):
         """Generic store method.

         Takes catalog of records and stores a bunch of objects in a table.

         Parameters
         ----------
         objects : `lsst.afw.table.BaseCatalog`
             Catalog containing object records
         conn :
             Database connection
         table : `sqlalchemy.Table`
             Database table
         schema_table_name : `str`
             Name of the table to be used for finding table schema.
         replace : `boolean`
             If `True` then use replace instead of INSERT (should be more efficient)
         extra_columns : `dict`, optional
             Mapping (column_name, column_value) which gives column values to add
             to every row, only if column is missing in catalog records.
         """

         def quoteValue(v):
             """Quote and escape values"""
             if v is None:
                 v = "NULL"
             elif isinstance(v, datetime):
                 v = "'" + str(v) + "'"
             elif isinstance(v, str):
                 # we don't expect nasty stuff in strings
                 v = "'" + v + "'"
             elif isinstance(v, geom.Angle):
                 v = v.asDegrees()
                 if np.isfinite(v):
                     v = str(v)
                 else:
                     v = "NULL"
             else:
                 if np.isfinite(v):
                     v = str(v)
                 else:
                     v = "NULL"
             return v

         def quoteId(columnName):
             """Smart quoting for column names.
             Lower-case names are not quoted.
             """
             if not columnName.islower():
                 columnName = '"' + columnName + '"'
             return columnName

         if conn.engine.name == "oracle":
             return self._storeObjectsAfwOracle(objects, conn, table,
                                                schema_table_name, replace,
                                                extra_columns)

         schema = objects.getSchema()
         afw_fields = [field.getName() for key, field in schema]

         column_map = self._schema.getAfwColumns(schema_table_name)

         # list of columns (as in cat schema)
         fields = [column_map[field].name for field in afw_fields if field in column_map]

         # use extra columns that are not in fields already
         extra_fields = (extra_columns or {}).keys()
         extra_fields = [field for field in extra_fields if field not in fields]

         if replace and conn.engine.name in ('mysql', 'sqlite'):
             query = 'REPLACE INTO '
         else:
             query = 'INSERT INTO '
         qfields = [quoteId(field) for field in fields + extra_fields]
         query += quoteId(table.name) + ' (' + ','.join(qfields) + ') ' + 'VALUES '

         values = []
         for rec in objects:
             row = []
             for field in afw_fields:
                 if field not in column_map:
                     continue
                 value = rec[field]
                 if column_map[field].type == "DATETIME" and \
                    np.isfinite(value):
                     # convert seconds into datetime
                     value = datetime.utcfromtimestamp(value)
                 row.append(quoteValue(value))
             for field in extra_fields:
                 row.append(quoteValue(extra_columns[field]))
             values.append('(' + ','.join(row) + ')')

         if self.config.explain:
             # run the same query with explain, only give it one row of data
             self._explain(query + values[0], conn)

         query += ','.join(values)

         if replace and conn.engine.name == 'postgresql':
             # This depends on that "replace" can only be true for DiaObjectLast table
             pks = ('pixelId', 'diaObjectId')
             query += " ON CONFLICT (\"{}\", \"{}\") DO UPDATE SET ".format(*pks)
             fields = [column_map[field].name for field in afw_fields]
             fields = ['"{0}" = EXCLUDED."{0}"'.format(field)
                       for field in fields if field not in pks]
             query += ', '.join(fields)

         # _LOG.debug("query: %s", query)
         _LOG.info("%s: will store %d records", table.name, len(objects))
         with Timer(table.name + ' insert', self.config.timer):
             res = conn.execute(sql.text(query))
         _LOG.debug("inserted %s intervals", res.rowcount)

     def _storeObjectsAfwOracle(self, objects, conn, table, schema_table_name,
                                replace=False, extra_columns=None):
         """Store method for Oracle.

         Takes catalog of records and stores a bunch of objects in a table.

         Parameters
         ----------
         objects : `lsst.afw.table.BaseCatalog`
             Catalog containing object records
         conn :
             Database connection
         table : `sqlalchemy.Table`
             Database table
         schema_table_name : `str`
             Name of the table to be used for finding table schema.
         replace : `boolean`
             If `True` then use replace instead of INSERT (should be more efficient)
         extra_columns : `dict`, optional
             Mapping (column_name, column_value) which gives column values to add
             to every row, only if column is missing in catalog records.
         """

         def quoteId(columnName):
             """Smart quoting for column names.
             Lower-case naems are not quoted (Oracle backend needs them unquoted).
             """
             if not columnName.islower():
                 columnName = '"' + columnName + '"'
             return columnName

         schema = objects.getSchema()
         afw_fields = [field.getName() for key, field in schema]
         # _LOG.info("afw_fields: %s", afw_fields)

         column_map = self._schema.getAfwColumns(schema_table_name)
         # _LOG.info("column_map: %s", column_map)

         # list of columns (as in cat schema)
         fields = [column_map[field].name for field in afw_fields
                   if field in column_map]
         # _LOG.info("fields: %s", fields)

         # use extra columns that are not in fields already
         extra_fields = (extra_columns or {}).keys()
         extra_fields = [field for field in extra_fields if field not in fields]

         qfields = [quoteId(field) for field in fields + extra_fields]

         if not replace:
             vals = [":col{}".format(i) for i in range(len(fields))]
             vals += [":extcol{}".format(i) for i in range(len(extra_fields))]
             query = 'INSERT INTO ' + quoteId(table.name)
             query += ' (' + ','.join(qfields) + ') VALUES'
             query += ' (' + ','.join(vals) + ')'
         else:
             qvals = [":col{} {}".format(i, quoteId(field)) for i, field in enumerate(fields)]
             qvals += [":extcol{} {}".format(i, quoteId(field)) for i, field in enumerate(extra_fields)]
             pks = ('pixelId', 'diaObjectId')
             onexpr = ["SRC.{col} = DST.{col}".format(col=quoteId(col)) for col in pks]
             setexpr = ["DST.{col} = SRC.{col}".format(col=quoteId(col))
                        for col in fields + extra_fields if col not in pks]
             vals = ["SRC.{col}".format(col=quoteId(col)) for col in fields + extra_fields]
             query = "MERGE INTO {} DST ".format(quoteId(table.name))
             query += "USING (SELECT {} FROM DUAL) SRC ".format(", ".join(qvals))
             query += "ON ({}) ".format(" AND ".join(onexpr))
             query += "WHEN MATCHED THEN UPDATE SET {} ".format(" ,".join(setexpr))
             query += "WHEN NOT MATCHED THEN INSERT "
             query += "({}) VALUES ({})".format(','.join(qfields), ','.join(vals))
         # _LOG.info("query: %s", query)

         values = []
         for rec in objects:
             row = {}
             col = 0
             for field in afw_fields:
                 if field not in column_map:
                     continue
                 value = rec[field]
                 if column_map[field].type == "DATETIME" and not np.isnan(value):
                     # convert seconds into datetime
                     value = datetime.utcfromtimestamp(value)
                 elif isinstance(value, geom.Angle):
                     value = str(value.asDegrees())
                 elif not np.isfinite(value):
                     value = None
                 row["col{}".format(col)] = value
                 col += 1
             for i, field in enumerate(extra_fields):
                 row["extcol{}".format(i)] = extra_columns[field]
             values.append(row)

         # _LOG.debug("query: %s", query)
         _LOG.info("%s: will store %d records", table.name, len(objects))
         with Timer(table.name + ' insert', self.config.timer):
             res = conn.execute(sql.text(query), values)
         _LOG.debug("inserted %s intervals", res.rowcount)

     def _convertResult(self, res, table_name, catalog=None):
         """Convert result set into output catalog.

         Parameters
         ----------
         res : `sqlalchemy.ResultProxy`
             SQLAlchemy result set returned by query.
         table_name : `str`
             Name of the table.
         catalog : `lsst.afw.table.BaseCatalog`
             If not None then extend existing catalog

         Returns
         -------
         catalog : `lsst.afw.table.SourceCatalog`
              If ``catalog`` is None then new instance is returned, otherwise
              ``catalog`` is updated and returned.
         """
         # make catalog schema
         columns = res.keys()
         schema, col_map = self._schema.getAfwSchema(table_name, columns)
         if catalog is None:
             _LOG.debug("_convertResult: schema: %s", schema)
             _LOG.debug("_convertResult: col_map: %s", col_map)
             catalog = afwTable.SourceCatalog(schema)

         # fill catalog
         for row in res:
             record = catalog.addNew()
             for col, value in row.items():
                 # some columns may exist in database but not included in afw schema
                 col = col_map.get(col)
                 if col is not None:
                     if isinstance(value, datetime):
                         # convert datetime to number of seconds
                         value = int((value - datetime.utcfromtimestamp(0)).total_seconds())
                     elif col.getTypeString() == 'Angle' and value is not None:
                         value = value * geom.degrees
                     if value is not None:
                         record.set(col, value)

         return catalog
lsst.dax.ppdb.ppdb.Timer.__exit__
def __exit__(self, exc_type, exc_val, exc_tb)
Definition: ppdb.py:75

lsst.dax.ppdb.ppdb.Timer.__enter__
def __enter__(self)
Definition: ppdb.py:66

lsst.dax.ppdb.ppdb.Timer
Definition: ppdb.py:48

astshim.keyMap.keyMapContinued.keys
def keys(self)
Definition: keyMapContinued.py:8

lsst::utils
Definition: Backtrace.h:29

lsst.dax.ppdb.ppdb.Ppdb._storeObjectsAfwOracle
def _storeObjectsAfwOracle(self, objects, conn, table, schema_table_name, replace=False, extra_columns=None)
Definition: ppdb.py:892

lsst.dax.ppdb.ppdb.Ppdb.saveVisit
def saveVisit(self, visitId, visitTime)
Definition: ppdb.py:298

lsst.dax.ppdb.ppdb.PpdbConfig
Definition: ppdb.py:129

lsst.dax.ppdb.ppdb.PpdbConfig.isolation_level
isolation_level
Definition: ppdb.py:132

lsst::afw::table
Definition: table.dox:3

lsst.dax.ppdb.ppdb.Ppdb._convertResult
def _convertResult(self, res, table_name, catalog=None)
Definition: ppdb.py:989

lsst.pex.config
Definition: __init__.py:1

lsst.dax.ppdb.ppdb.Ppdb.__init__
def __init__(self, config, afw_schemas=None)
Definition: ppdb.py:221

lsst.dax.ppdb.ppdb.Timer._timer2
_timer2
Definition: ppdb.py:64

lsst.dax.ppdb.ppdb.Timer.__init__
def __init__(self, name, do_logging=True, log_before_cursor_execute=False)
Definition: ppdb.py:60

lsst.dax.ppdb.ppdb.Ppdb._storeObjectsAfw
def _storeObjectsAfw(self, objects, conn, table, schema_table_name, replace=False, extra_columns=None)
Definition: ppdb.py:778

lsst.dax.ppdb.ppdb.Visit
Visit
Definition: ppdb.py:109

lsst.dax.ppdb.ppdb.Ppdb.getDiaSources
def getDiaSources(self, object_ids, dt)
Definition: ppdb.py:467

lsst.synpipe.compareModel.str
str
Definition: compareModel.py:112

lsst::geom::Angle
A class representing an angle.
Definition: Angle.h:127

lsst.dax.ppdb.ppdb.Ppdb._engine
_engine
Definition: ppdb.py:252

lsst::utils::getPackageDir
std::string getPackageDir(std::string const &packageName)
return the root directory of a setup package
Definition: packaging.cc:33

lsst.dax.ppdb.ppdb.Ppdb._explain
def _explain(self, query, conn)
Definition: ppdb.py:758

lsst.dax.ppdb.ppdb.Ppdb.makeSchema
def makeSchema(self, drop=False, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False)
Definition: ppdb.py:740

lsst.dax.ppdb.ppdb.Ppdb.getDiaForcedSources
def getDiaForcedSources(self, object_ids, dt)
Definition: ppdb.py:517

lsst.pex.config.config.Field
Definition: config.py:179

lsst::geom
Definition: geomOperators.dox:4

lsst.dax.ppdb.timer.Timer
Definition: timer.py:37

type
table::Key< int > type
Definition: Detector.cc:164

lsst.pex.config.choiceField.ChoiceField
Definition: choiceField.py:28

lsst.pex.config.listField.ListField
Definition: listField.py:211

lsst.dax.ppdb.ppdb.Ppdb.getDiaObjects
def getDiaObjects(self, pixel_ranges)
Definition: ppdb.py:338

lsst.pex.config.history.format
def format(config, name=None, writeSourceLine=True, prefix="", verbose=False)
Definition: history.py:168

lsst.dax.ppdb.ppdb.Ppdb.lastVisit
def lastVisit(self)
Definition: ppdb.py:263

lsst::afw::table.source.sourceContinued.SourceCatalog
Definition: sourceContinued.py:33

lsst.dax.ppdb.ppdb.Ppdb._schema
_schema
Definition: ppdb.py:254

object

lsst.dax.ppdb.ppdb.PpdbConfig.db_url
db_url
Definition: ppdb.py:131

lsst.dax.ppdb.ppdb.Ppdb.tableRowCount
def tableRowCount(self)
Definition: ppdb.py:316

lsst.dax.ppdb.ppdb.Timer._do_logging
_do_logging
Definition: ppdb.py:62

lsst.dax.ppdb.ppdb.Ppdb.dailyJob
def dailyJob(self)
Definition: ppdb.py:711

lsst.dax.ppdb.ppdb.Ppdb.storeDiaObjects
def storeDiaObjects(self, objs, dt)
Definition: ppdb.py:570

lsst.dax.ppdb.ppdb.Ppdb.getDiaSourcesInRegion
def getDiaSourcesInRegion(self, pixel_ranges, dt)
Definition: ppdb.py:414

lsst.dax.ppdb.ppdb.PpdbConfig.validate
def validate(self)
Definition: ppdb.py:199

lsst.dax.ppdb.ppdb.Ppdb.storeDiaForcedSources
def storeDiaForcedSources(self, sources)
Definition: ppdb.py:685

lsst.dax.ppdb.ppdbSchema.PpdbSchema
Definition: ppdbSchema.py:161

lsst.dax.ppdb.ppdb.Ppdb.storeDiaSources
def storeDiaSources(self, sources)
Definition: ppdb.py:659

list
daf::base::PropertyList * list
Definition: fits.cc:833

lsst.dax.ppdb.ppdb.Timer._log_before_cursor_execute
_log_before_cursor_execute
Definition: ppdb.py:61

lsst.dax.ppdb.ppdb.Ppdb.config
config
Definition: ppdb.py:223

lsst.dax.ppdb.ppdb.Ppdb
Definition: ppdb.py:205

lsst.synpipe.compareModel.int
int
Definition: compareModel.py:110

lsst.dax.ppdb.ppdb.Timer._timer1
_timer1
Definition: ppdb.py:63