Functions
None	metrics_log_to_influx (Iterable[str] file, str context_keys, str extra_tags, bool fix_row_count, bool replication, str prefix, bool no_header, str header_database)

None	_metrics_log_to_influx (TextIO file, Iterable[str] context_keys, dict[str, Any] extra_tags, bool fix_row_count, bool replication, str prefix)

None	_print_metrics (str name, dict[str, Any] tags, dict[str, Any] values, float timestamp)

dict[str, Any]	_extract_mdc (re.Match match, Iterable[str] context_keys)

Variables
	_LOG_LINE_RE_PIPELINE

	_LOG_LINE_RE_REPLICATION

	_LOG_LINE_CASSANDRA_RE

	_AP_PIPE_DIAOBJECTS_RE = re.compile(r"Calculating summary stats for (?P<count>\d+) DiaObjects")

	_AP_PIPE_DIASOURCES_RE

	_AP_PIPE_DIAFORCED_RE = re.compile(r"Updating (?P<count>\d+) diaForcedSources in the APDB")

tuple	_CASSNDRA_MESSAGES_RE

dict	_SKIP_METRICS_REPLICATION

Function Documentation

◆ _extract_mdc()

dict[str, Any] lsst.dax.apdb.scripts.metrics._extract_mdc	(	re.Match	match,
		Iterable[str]	context_keys )

protected

Definition at line 229 of file metrics.py.

def _extract_mdc(match: re.Match, context_keys: Iterable[str]) -> dict[str, Any]:
    tags: dict[str, Any] = {}
    mdc_str = match.group("MDC")
    if mdc_str:
        mdc_str = mdc_str.replace("'", '"')
        mdc: dict[str, Any] = yaml.safe_load(io.StringIO(mdc_str))
        for tag in context_keys:
            if (tag_val := mdc.get(tag)) is not None:
                tags[tag] = tag_val
    return tags

◆ _metrics_log_to_influx()

None lsst.dax.apdb.scripts.metrics._metrics_log_to_influx	(	TextIO	file,
		Iterable[str]	context_keys,
		dict[str, Any]	extra_tags,
		bool	fix_row_count,
		bool	replication,
		str	prefix )

protected

Parse metrics from a single file.

Definition at line 155 of file metrics.py.

) -> None:
    """Parse metrics from a single file."""
    objects_count = -1
    sources_count = -1
    forced_sources_count = -1
 
    line_re = _LOG_LINE_RE_REPLICATION if replication else _LOG_LINE_RE_PIPELINE
 
    for line in file:
        line = line.strip()
        if fix_row_count and not replication:
            if match := _AP_PIPE_DIAOBJECTS_RE.search(line):
                objects_count = int(match.group("count"))
            elif match := _AP_PIPE_DIASOURCES_RE.search(line):
                sources_count = int(match.group("count1")) + int(match.group("count2"))
            elif match := _AP_PIPE_DIAFORCED_RE.search(line):
                forced_sources_count = int(match.group("count"))
 
        if match := line_re.match(line):
            metric_str = match.group("metric")
            metric: dict[str, Any] = json.loads(metric_str)
            tags = dict(extra_tags)
 
            name: str = metric["name"]
            if replication and name in _SKIP_METRICS_REPLICATION:
                continue
 
            timestamp: float = metric["timestamp"]
            for tag, tag_val in metric["tags"].items():
                tags[tag] = tag_val
            values: dict[str, Any] = metric["values"]
 
            if fix_row_count and name == "insert_time":
                if tags["table"].startswith("DiaObject"):
                    values["row_count"] = objects_count
                elif tags["table"].startswith("DiaSource"):
                    values["row_count"] = sources_count
                elif tags["table"].startswith("DiaForcedSource"):
                    values["row_count"] = forced_sources_count
 
            if not replication and context_keys:
                tags.update(_extract_mdc(match, context_keys))
 
            _print_metrics(prefix + name, tags, values, timestamp)
 
        elif match := _LOG_LINE_CASSANDRA_RE.match(line):
            tags = dict(extra_tags)
            tags["level"] = match.group("level").lower()
            dt = datetime.fromisoformat(match.group("datetime"))
            timestamp = dt.timestamp()
            tags.update(_extract_mdc(match, context_keys))
            values = {"count": 1}
 
            message = match.group("message")
            for message_re, name in _CASSNDRA_MESSAGES_RE:
                if (message_match := message_re.search(message)) is not None:
                    tags.update(message_match.groupdict())
                    _print_metrics(prefix + name, tags, values, timestamp)
                    break
 
 

◆ _print_metrics()

None lsst.dax.apdb.scripts.metrics._print_metrics	(	str	name,
		dict[str, Any]	tags,
		dict[str, Any]	values,
		float	timestamp )

protected

Definition at line 223 of file metrics.py.

def _print_metrics(name: str, tags: dict[str, Any], values: dict[str, Any], timestamp: float) -> None:
    tags_str = ",".join([name] + [f"{key}={val}" for key, val in tags.items()])
    values_str = ",".join(f"{key}={val}" for key, val in values.items())
    print(f"{tags_str} {values_str} {int(timestamp * 1e9)}")
 
 

◆ metrics_log_to_influx()

None lsst.dax.apdb.scripts.metrics.metrics_log_to_influx	(	Iterable[str]	file,
		str	context_keys,
		str	extra_tags,
		bool	fix_row_count,
		bool	replication,
		str	prefix,
		bool	no_header,
		str	header_database )

Extract metrics from log file and dump as InfluxDB data.

Parameters
----------
file : `~collections.abc.Iterable` [`str`]
    Names of the files to parse for metrics.
context_keys : `str`
    Names of keys to extract from message context, comma-separated.
extra_tags : `str`
    Additional tags to add to each record, comma-separated key=value pairs.
fix_row_count : `bool`
    If True then extract records counts from pipeline messages instead of
    metrics. A workaround for broken metrics.
replication : `bool`
    If True then the log is from replication service, otherwise it is a log
    from AP pipeline.
prefix : `str`
    Prefix to add to each tag name.
no_header : `bool`
    If False then do not print DML header.
header_database : `str`
    Name of the database for DML header.

Definition at line 96 of file metrics.py.

) -> None:
    """Extract metrics from log file and dump as InfluxDB data.
 
    Parameters
    ----------
    file : `~collections.abc.Iterable` [`str`]
        Names of the files to parse for metrics.
    context_keys : `str`
        Names of keys to extract from message context, comma-separated.
    extra_tags : `str`
        Additional tags to add to each record, comma-separated key=value pairs.
    fix_row_count : `bool`
        If True then extract records counts from pipeline messages instead of
        metrics. A workaround for broken metrics.
    replication : `bool`
        If True then the log is from replication service, otherwise it is a log
        from AP pipeline.
    prefix : `str`
        Prefix to add to each tag name.
    no_header : `bool`
        If False then do not print DML header.
    header_database : `str`
        Name of the database for DML header.
    """
    context_names = [name for name in context_keys.split(",") if name]
    tags: dict[str, Any] = {}
    for tag_val in extra_tags.split(","):
        if tag_val:
            tag, _, val = tag_val.partition("=")
            tags[tag] = val
 
    if not no_header:
        print(
            f"""\
# DML
 
# CONTEXT-DATABASE: {header_database}
"""
        )
 
    if not file:
        file = ["-"]
    for file_name in file:
        if file_name == "-":
            _metrics_log_to_influx(sys.stdin, context_names, tags, fix_row_count, replication, prefix)
        else:
            with open(file_name) as file_obj:
                _metrics_log_to_influx(file_obj, context_names, tags, fix_row_count, replication, prefix)
 
 

Variable Documentation

◆ _AP_PIPE_DIAFORCED_RE

lsst.dax.apdb.scripts.metrics._AP_PIPE_DIAFORCED_RE = re.compile(r"Updating (?P<count>\d+) diaForcedSources in the APDB")

protected

Definition at line 73 of file metrics.py.

◆ _AP_PIPE_DIAOBJECTS_RE

lsst.dax.apdb.scripts.metrics._AP_PIPE_DIAOBJECTS_RE = re.compile(r"Calculating summary stats for (?P<count>\d+) DiaObjects")

protected

Definition at line 69 of file metrics.py.

◆ _AP_PIPE_DIASOURCES_RE

lsst.dax.apdb.scripts.metrics._AP_PIPE_DIASOURCES_RE

protected

Initial value:

=  re.compile(
    r"(?P<count1>\d+) updated and \d+ unassociated diaObjects. Creating (?P<count2>\d+) new diaObjects"
)

Definition at line 70 of file metrics.py.

◆ _CASSNDRA_MESSAGES_RE

tuple lsst.dax.apdb.scripts.metrics._CASSNDRA_MESSAGES_RE

protected

Initial value:

=  (
    (re.compile(r"^Error preparing query for host (?P<host>\S+):$"), "error_prepare_query"),
    (re.compile(r"^Control connection failed to connect"), "error_control_connect"),
    (
        re.compile(r"^Unexpected failure handling node (?P<host>\S+) being marked up:$"),
        "error_failure_marking_up",
    ),
    (re.compile(r"^Failed to submit task to executor$"), "error_submit_task"),
    (re.compile(r"^Failed to create connection pool for new host (?P<host>\S+):$"), "warn_create_pool"),
    (re.compile(r"^Error attempting to reconnect to (?P<host>\S+),"), "warn_reconnect"),
    (re.compile(r"^Host (?P<host>\S+) has been marked down"), "warn_host_down"),
)

Definition at line 75 of file metrics.py.

◆ _LOG_LINE_CASSANDRA_RE

lsst.dax.apdb.scripts.metrics._LOG_LINE_CASSANDRA_RE

protected

Initial value:

=  re.compile(
    ,
    re.VERBOSE,
)

Definition at line 58 of file metrics.py.

◆ _LOG_LINE_RE_PIPELINE

lsst.dax.apdb.scripts.metrics._LOG_LINE_RE_PIPELINE

protected

Initial value:

=  re.compile(
    ,
    re.VERBOSE,
)

Definition at line 36 of file metrics.py.

◆ _LOG_LINE_RE_REPLICATION

lsst.dax.apdb.scripts.metrics._LOG_LINE_RE_REPLICATION

protected

Initial value:

=  re.compile(
    ,
    re.VERBOSE,
)

Definition at line 47 of file metrics.py.

◆ _SKIP_METRICS_REPLICATION

dict lsst.dax.apdb.scripts.metrics._SKIP_METRICS_REPLICATION

protected

Initial value:

=  {
    "read_metadata_config",
    "version_check",
}

Definition at line 90 of file metrics.py.

Functions

Variables

Function Documentation

◆ _extract_mdc()

◆ _metrics_log_to_influx()

◆ _print_metrics()

◆ metrics_log_to_influx()

Variable Documentation

◆ _AP_PIPE_DIAFORCED_RE

◆ _AP_PIPE_DIAOBJECTS_RE

◆ _AP_PIPE_DIASOURCES_RE

◆ _CASSNDRA_MESSAGES_RE

◆ _LOG_LINE_CASSANDRA_RE

◆ _LOG_LINE_RE_PIPELINE

◆ _LOG_LINE_RE_REPLICATION

◆ _SKIP_METRICS_REPLICATION