added multi node functionality

This commit is contained in:
Jake Kasper
2026-04-24 12:33:52 -04:00
parent c4c081362e
commit 16e5f2ced2
30 changed files with 673 additions and 93 deletions

View File

@@ -235,20 +235,23 @@ async def analyze_logs() -> dict:
Gather log-pattern issues + Prometheus NF status + Alertmanager alerts.
Returns a fully structured dict ready for JSON serialisation.
"""
from app.services import alertmanager, prometheus
from app.services import alertmanager, prometheus, cluster_inventory
# Kick off all I/O in parallel
containers_f = asyncio.create_task(_discover_containers())
alerts_f = asyncio.create_task(alertmanager.get_alerts())
nf_status_f = asyncio.create_task(prometheus.get_nf_status())
cluster_f = asyncio.create_task(cluster_inventory.get_cluster_inventory())
containers = await containers_f
alerts, nf_statuses = await asyncio.gather(alerts_f, nf_status_f,
alerts, nf_statuses, cluster = await asyncio.gather(alerts_f, nf_status_f, cluster_f,
return_exceptions=True)
if isinstance(alerts, Exception):
alerts = []
if isinstance(nf_statuses, Exception):
nf_statuses = []
if isinstance(cluster, Exception):
cluster = {"enabled": False, "nodes": []}
# Read all container logs concurrently
log_tasks = {nf: asyncio.create_task(_read_logs(cname))
@@ -280,25 +283,29 @@ async def analyze_logs() -> dict:
# 2. NF-down events from Prometheus
for nf_st in nf_statuses:
if isinstance(nf_st, dict) and nf_st.get("state") == "down":
node_text = ", ".join(node["hostname"] for node in nf_st.get("nodes", []))
issues.append({
"id": f"nf-down-{nf_st['name']}",
"category": "Connectivity",
"nf": nf_st["name"],
"node": node_text,
"severity": "critical",
"count": 1,
"description": f"{nf_st['name']} is unreachable",
"remediation": (f"Run `{CONTAINER_RUNTIME} ps` and check if {nf_st['name']} "
f"container is running; inspect its logs."),
"remediation": (f"Check {node_text or 'the hosting node'} first, then run "
f"`{CONTAINER_RUNTIME} ps` and inspect `{nf_st['name'].lower()}` logs."),
"source": "prometheus",
})
# 3. Active Alertmanager alerts
for alert in alerts:
if isinstance(alert, dict):
node_text = ", ".join(node["hostname"] for node in alert.get("nodes", []))
issues.append({
"id": f"alert-{alert.get('name', '')}-{len(issues)}",
"category": _alert_category(alert),
"nf": _alert_nf(alert),
"node": node_text,
"severity": alert.get("severity", "warning"),
"count": 1,
"description": alert.get("summary") or alert.get("name", "Unknown alert"),
@@ -331,6 +338,7 @@ async def analyze_logs() -> dict:
"categories": categories,
"timestamp": datetime.now().isoformat(),
"log_sources": list(containers.keys()),
"cluster": cluster,
}
# Persist to history ring-buffer