deeper log model

This commit is contained in:
Jake Kasper
2026-04-27 13:42:49 -04:00
parent 9ac96cee9a
commit e62f46b68c
17 changed files with 670 additions and 47 deletions

View File

@@ -4,6 +4,7 @@ from __future__ import annotations
import asyncio
import json
import re
import sqlite3
from collections import deque
from datetime import UTC, datetime
@@ -22,22 +23,43 @@ from app.config import (
LOG_AUTO_CONFIGURE,
LOG_FLUENTBIT_MATCH,
LOG_INGEST_ENABLED,
LOG_PROCESS_BUFFER_LINES,
LOG_RECEIVER_BIND_HOST,
LOG_RECEIVER_FORMAT,
LOG_RECEIVER_HOST,
LOG_RECEIVER_PORT,
LOG_SUBSCRIBER_BUFFER_LINES,
LOG_TRACE_DEBUG_LEVEL,
LOG_TRACE_BUFFER_LINES,
LOG_TRACE_TARGET_SERVICES,
)
from app.services import pls
_server: asyncio.base_events.Server | None = None
_allowed_nfs = {nf.upper() for nf in LOG_ALLOWED_NFS}
_events: deque[dict[str, Any]] = deque(maxlen=max(LOG_BUFFER_LINES, 1))
_trace_events: deque[dict[str, Any]] = deque(maxlen=max(LOG_TRACE_BUFFER_LINES, LOG_BUFFER_LINES, 1))
_process_events: dict[str, deque[dict[str, Any]]] = {
nf.upper(): deque(maxlen=max(LOG_PROCESS_BUFFER_LINES, 1))
for nf in _allowed_nfs if nf != "SYSTEM"
}
_subscriber_events: dict[str, deque[dict[str, Any]]] = {}
_ingested_total = 0
_parse_errors = 0
_last_event_at: str | None = None
_db_initialized = False
_allowed_nfs = {nf.upper() for nf in LOG_ALLOWED_NFS}
_supi_pattern = re.compile(r"(imsi-\d{6,20}|\b\d{6,20}\b)", re.IGNORECASE)
_trace_state: dict[str, Any] = {
"active": False,
"filter": "",
"normalized": "",
"started_at": None,
"matched_events": 0,
"nodes": [],
"services": list(LOG_TRACE_TARGET_SERVICES),
"level": LOG_TRACE_DEBUG_LEVEL,
"original_levels": {},
}
def _db_path() -> Path:
@@ -181,6 +203,43 @@ def _infer_nf(payload: dict[str, Any], message: str) -> str:
return "SYSTEM"
def _normalize_supi(value: str | None) -> str:
if not value:
return ""
text = str(value).strip().lower()
if not text:
return ""
if text.startswith("imsi-"):
digits = "".join(ch for ch in text[5:] if ch.isdigit())
return f"imsi-{digits}" if digits else text
digits = "".join(ch for ch in text if ch.isdigit())
if digits:
return f"imsi-{digits}"
return text
def _extract_supis(message: str) -> list[str]:
matches = []
for raw in _supi_pattern.findall(message or ""):
normalized = _normalize_supi(raw)
if normalized and normalized not in matches:
matches.append(normalized)
return matches
def _matches_trace(event: dict[str, Any]) -> bool:
if not _trace_state.get("active"):
return False
normalized = _trace_state.get("normalized", "")
if not normalized:
return False
message = str(event.get("message", "")).lower()
if normalized in message:
return True
digits = normalized.removeprefix("imsi-")
return bool(digits and digits in message)
def _normalize_event(payload: dict[str, Any], remote_host: str) -> dict[str, Any]:
ts_value = (
payload.get("timestamp")
@@ -215,6 +274,7 @@ def _normalize_event(payload: dict[str, Any], remote_host: str) -> dict[str, Any
or ""
)
message = str(message).strip()
supis = _extract_supis(message)
tag = str(payload.get("tag", ""))
nf = _infer_nf(payload, message)
fingerprint = sha1(f"{ts_iso}|{node}|{nf}|{source}|{message}".encode("utf-8")).hexdigest()
@@ -227,6 +287,7 @@ def _normalize_event(payload: dict[str, Any], remote_host: str) -> dict[str, Any
"source": str(source),
"tag": tag,
"message": message,
"supis": supis,
"raw": payload,
}
@@ -238,6 +299,16 @@ async def _ingest_payload(payload: dict[str, Any], remote_host: str) -> None:
return
_events.append(event)
_trace_events.append(event)
nf_key = event.get("nf", "").upper()
if nf_key:
_process_events.setdefault(nf_key, deque(maxlen=max(LOG_PROCESS_BUFFER_LINES, 1))).append(event)
for supi in event.get("supis", []):
_subscriber_events.setdefault(
supi,
deque(maxlen=max(LOG_SUBSCRIBER_BUFFER_LINES, 1)),
).append(event)
if _matches_trace(event):
_trace_state["matched_events"] = int(_trace_state.get("matched_events", 0)) + 1
_ingested_total += 1
_last_event_at = event["timestamp"]
@@ -300,6 +371,8 @@ def receiver_status() -> dict[str, Any]:
"format": LOG_RECEIVER_FORMAT,
"allowed_nfs": sorted(_allowed_nfs),
"buffer_lines": LOG_BUFFER_LINES,
"process_buffer_lines": LOG_PROCESS_BUFFER_LINES,
"subscriber_buffer_lines": LOG_SUBSCRIBER_BUFFER_LINES,
"trace_buffer_lines": LOG_TRACE_BUFFER_LINES,
"context_before": LOG_ALERT_CONTEXT_BEFORE,
"context_after": LOG_ALERT_CONTEXT_AFTER,
@@ -308,6 +381,17 @@ def receiver_status() -> dict[str, Any]:
"parse_errors": _parse_errors,
"last_event_at": _last_event_at,
"current_buffer_size": len(_events),
"process_buffers": sorted(_process_events.keys()),
"subscriber_buffers": len(_subscriber_events),
"trace": {
"active": bool(_trace_state.get("active")),
"filter": _trace_state.get("filter", ""),
"started_at": _trace_state.get("started_at"),
"matched_events": _trace_state.get("matched_events", 0),
"nodes": list(_trace_state.get("nodes", [])),
"services": list(_trace_state.get("services", [])),
"level": _trace_state.get("level", LOG_TRACE_DEBUG_LEVEL),
},
}
@@ -396,8 +480,135 @@ async def configure_site_output() -> dict[str, Any]:
}
def _sort_and_limit(events: list[dict[str, Any]], limit: int | None = None) -> list[dict[str, Any]]:
deduped: dict[str, dict[str, Any]] = {}
for event in events:
deduped[event.get("id", str(id(event)))] = event
ordered = sorted(deduped.values(), key=lambda event: event.get("epoch", 0.0))
if limit is not None:
return ordered[-limit:]
return ordered
def get_process_events(nf: str, limit: int | None = None) -> list[dict[str, Any]]:
nf_key = str(nf or "").upper()
events = list(_process_events.get(nf_key, []))
return _sort_and_limit(events, limit)
def get_subscriber_events(supi_or_fragment: str, limit: int | None = None) -> list[dict[str, Any]]:
normalized = _normalize_supi(supi_or_fragment)
fragment = str(supi_or_fragment or "").strip().lower()
if not normalized and not fragment:
return []
matches: list[dict[str, Any]] = []
for supi, events in _subscriber_events.items():
digits = supi.removeprefix("imsi-")
if normalized and (supi == normalized or normalized in supi or normalized.removeprefix("imsi-") in digits):
matches.extend(events)
continue
if fragment and (fragment in supi.lower() or fragment in digits):
matches.extend(events)
return _sort_and_limit(matches, limit)
async def _trace_target_nodes() -> list[dict[str, Any]]:
cluster = await pls.get_cluster_status()
nodes = []
if isinstance(cluster, dict):
for node in cluster.get("nodes", []):
host = pls.node_host(node.get("name", ""))
if host:
nodes.append({"name": node.get("name", ""), "host": host})
if not nodes:
system = await pls.get_system_info()
host = str(system.get("hostname", "") if isinstance(system, dict) else "") or "127.0.0.1"
nodes.append({"name": host, "host": host})
deduped = {}
for node in nodes:
deduped[node["host"]] = node
return list(deduped.values())
async def start_subscriber_trace(supi_or_fragment: str) -> dict[str, Any]:
normalized = _normalize_supi(supi_or_fragment)
fragment = str(supi_or_fragment or "").strip()
if not normalized and not fragment:
raise RuntimeError("A SUPI or SUPI fragment is required to start a trace")
if _trace_state.get("active"):
await stop_subscriber_trace()
target_nodes = await _trace_target_nodes()
original_levels: dict[str, dict[str, Any]] = {}
applied_nodes: list[str] = []
for node in target_nodes:
host = node["host"]
current = await pls.get_log_config(host=host)
if not isinstance(current, dict):
continue
original_levels[host] = current
updated = dict(current)
updated["level"] = LOG_TRACE_DEBUG_LEVEL
await pls.put_log_config(updated, host=host)
applied_nodes.append(host)
_trace_state.update(
{
"active": True,
"filter": fragment,
"normalized": normalized or fragment.lower(),
"started_at": datetime.now(UTC).isoformat(),
"matched_events": 0,
"nodes": applied_nodes,
"services": list(LOG_TRACE_TARGET_SERVICES),
"level": LOG_TRACE_DEBUG_LEVEL,
"original_levels": original_levels,
}
)
return receiver_status()["trace"]
async def stop_subscriber_trace() -> dict[str, Any]:
original_levels = dict(_trace_state.get("original_levels", {}))
restored_nodes: list[str] = []
for host, config in original_levels.items():
try:
if isinstance(config, dict):
await pls.put_log_config(config, host=host)
restored_nodes.append(host)
except Exception:
continue
summary = {
"filter": _trace_state.get("filter", ""),
"started_at": _trace_state.get("started_at"),
"matched_events": _trace_state.get("matched_events", 0),
"restored_nodes": restored_nodes,
}
_trace_state.update(
{
"active": False,
"filter": "",
"normalized": "",
"started_at": None,
"matched_events": 0,
"nodes": [],
"services": list(LOG_TRACE_TARGET_SERVICES),
"level": LOG_TRACE_DEBUG_LEVEL,
"original_levels": {},
}
)
return summary
def get_events(limit: int | None = None, node: str | None = None, nf: str | None = None, imsi: str | None = None) -> list[dict[str, Any]]:
events = list(_trace_events if imsi else _events)
if imsi:
events = get_subscriber_events(imsi, limit=None)
elif nf:
events = get_process_events(nf, limit=None)
else:
events = list(_events)
if node:
node_l = node.lower()
events = [event for event in events if event.get("node", "").lower() == node_l]
@@ -405,12 +616,20 @@ def get_events(limit: int | None = None, node: str | None = None, nf: str | None
nf_u = nf.upper()
events = [event for event in events if event.get("nf", "").upper() == nf_u]
if imsi:
needle = imsi.strip()
events = [event for event in events if needle and needle in event.get("message", "")]
events.sort(key=lambda event: event.get("epoch", 0.0))
if limit is not None:
return events[-limit:]
return events
needle = str(imsi).strip().lower()
normalized = _normalize_supi(imsi)
digits = normalized.removeprefix("imsi-") if normalized else ""
events = [
event for event in events
if needle and (
needle in event.get("message", "").lower()
or any(
needle in supi.lower() or (digits and digits in supi.removeprefix("imsi-"))
for supi in event.get("supis", [])
)
)
]
return _sort_and_limit(events, limit)
def record_alert_context(