"""Alert sources: Alertmanager plus log-derived alerts.""" import asyncio import json import httpx from app.config import ALERTMANAGER_URL from app.services import cluster_inventory, log_ingest _BASE = ALERTMANAGER_URL.rstrip("/") async def get_alerts() -> list: """Return normalised list of active alerts from Alertmanager and log analysis.""" cluster = await cluster_inventory.get_cluster_inventory() alertmanager_task = asyncio.create_task(_get_alertmanager_alerts(cluster)) log_task = asyncio.to_thread(_get_log_alerts, cluster) am_alerts, log_alerts = await asyncio.gather(alertmanager_task, log_task, return_exceptions=True) if isinstance(am_alerts, Exception): am_alerts = [] if isinstance(log_alerts, Exception): log_alerts = [] return sorted( [*am_alerts, *log_alerts], key=lambda alert: (_severity_rank(alert.get("severity")), alert.get("timestamp", "")), ) async def _get_alertmanager_alerts(cluster: dict) -> list: try: async with httpx.AsyncClient(timeout=5) as client: r = await client.get(f"{_BASE}/api/v2/alerts", params={"active": "true", "silenced": "false"}) r.raise_for_status() raw = r.json() except Exception: return [] alerts = [] for a in raw: labels = a.get("labels", {}) annotations = a.get("annotations", {}) name = labels.get("alertname", "Unknown") summary = annotations.get("summary", annotations.get("description", "")) nf_name = _infer_nf(name, summary, labels.get("instance", "")) nodes = cluster_inventory.find_nf_nodes(cluster, nf_name) if nf_name else [] alerts.append({ "name": name, "severity": labels.get("severity", "warning"), "instance": labels.get("instance", ""), "summary": summary, "nf": nf_name, "nodes": nodes, "source": "alertmanager", "timestamp": a.get("startsAt", ""), }) return alerts def _get_log_alerts(cluster: dict) -> list: node_map = {} for node in cluster.get("nodes", []): if node.get("hostname"): node_map[node["hostname"]] = node if node.get("address"): node_map[node["address"]] = node alerts = [] for ctx in log_ingest.recent_alert_context(limit=50): before = _decode_context(ctx.get("before_context")) after = _decode_context(ctx.get("after_context")) node_name = ctx.get("node", "") nodes = [] if node_name and node_name in node_map: nodes = [node_map[node_name]] alerts.append({ "name": f"{ctx.get('nf') or 'System'} log anomaly", "severity": ctx.get("severity", "warning"), "instance": ctx.get("source", ""), "summary": ctx.get("description", "Log-derived alert"), "nf": ctx.get("nf", ""), "nodes": nodes, "source": "logs", "timestamp": ctx.get("event_ts", ""), "context_id": ctx.get("id"), "node": node_name, "match_message": ctx.get("match_message", ""), "context_preview": { "before": before[-3:], "after": after[:3], }, }) return alerts def _decode_context(value: str | None) -> list[dict]: if not value: return [] try: data = json.loads(value) return data if isinstance(data, list) else [] except Exception: return [] def _severity_rank(severity: str | None) -> int: return {"critical": 0, "warning": 1, "info": 2}.get((severity or "warning").lower(), 3) def _infer_nf(name: str, summary: str, instance: str) -> str: text = f"{name} {summary} {instance}".upper() for nf_name in ["AMF", "SMF", "UPF", "UDM", "UDR", "NRF", "AUSF", "PCF", "MME", "SGWC", "DRA", "DSM"]: if nf_name in text: return nf_name return ""