added multi node functionality
This commit is contained in:
BIN
app/services/__pycache__/__init__.cpython-314.pyc
Normal file
BIN
app/services/__pycache__/__init__.cpython-314.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
app/services/__pycache__/alertmanager.cpython-314.pyc
Normal file
BIN
app/services/__pycache__/alertmanager.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/services/__pycache__/cluster_inventory.cpython-314.pyc
Normal file
BIN
app/services/__pycache__/cluster_inventory.cpython-314.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
app/services/__pycache__/pls.cpython-314.pyc
Normal file
BIN
app/services/__pycache__/pls.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/services/__pycache__/prometheus.cpython-314.pyc
Normal file
BIN
app/services/__pycache__/prometheus.cpython-314.pyc
Normal file
Binary file not shown.
@@ -30,6 +30,7 @@ async def answer(query: str, network_state: dict, alerts: list) -> str:
|
||||
def _rule_based(query: str, network_state: dict, alerts: list) -> str:
|
||||
q = query.lower()
|
||||
nfs = network_state.get("nfs", [])
|
||||
cluster = network_state.get("cluster", {})
|
||||
up = [n for n in nfs if n["state"] == "up"]
|
||||
down = [n for n in nfs if n["state"] == "down"]
|
||||
|
||||
@@ -58,26 +59,40 @@ def _rule_based(query: str, network_state: dict, alerts: list) -> str:
|
||||
return _alerts_summary(alerts)
|
||||
|
||||
if any(w in q for w in ["subscriber", "ue ", "device", "phone", "handset", "registration", "attach"]):
|
||||
return _subscriber_analysis(nfs, alerts)
|
||||
return _subscriber_analysis(nfs, alerts, cluster)
|
||||
|
||||
if any(w in q for w in ["session", "pdu", "bearer", "user plane", "traffic", "throughput"]):
|
||||
return _session_analysis(nfs, alerts)
|
||||
return _session_analysis(nfs, alerts, cluster)
|
||||
|
||||
# Default → health summary
|
||||
return _health_summary(up, down, alerts)
|
||||
return _health_summary(up, down, alerts, cluster)
|
||||
|
||||
|
||||
def _health_summary(up: list, down: list, alerts: list) -> str:
|
||||
def _health_summary(up: list, down: list, alerts: list, cluster: dict) -> str:
|
||||
ts = datetime.now().strftime("%H:%M:%S")
|
||||
crit = [a for a in alerts if a.get("severity") == "critical"]
|
||||
warn = [a for a in alerts if a.get("severity") != "critical"]
|
||||
lines = [f"**P5G Network Health — {ts}**\n"]
|
||||
nodes = cluster.get("nodes", [])
|
||||
|
||||
if up:
|
||||
lines.append(f"✅ **{len(up)} UP**: {', '.join(n['name'] for n in up)}")
|
||||
lines.append(f"✅ **{len(up)} UP**: {', '.join(_nf_label(n) for n in up)}")
|
||||
if down:
|
||||
lines.append(f"🔴 **{len(down)} DOWN**: {', '.join(n['name'] for n in down)}")
|
||||
lines.append(f" ⚡ Action: check `{CONTAINER_RUNTIME} logs <nf>` in the runtime host")
|
||||
lines.append(f"🔴 **{len(down)} DOWN**: {', '.join(_nf_label(n) for n in down)}")
|
||||
lines.append(" ⚡ Action: inspect the node shown for each affected NF before pulling logs.")
|
||||
|
||||
if nodes:
|
||||
lines.append(f"\n**Cluster nodes ({len(nodes)})**")
|
||||
for node in nodes:
|
||||
running = [nf["name"] for nf in node.get("nfs", []) if nf.get("state") == "up"]
|
||||
down_nfs = [nf["name"] for nf in node.get("nfs", []) if nf.get("state") == "down"]
|
||||
role = node.get("role", "AP")
|
||||
lines.append(
|
||||
f"• **{node['hostname']}** ({role}{', local' if node.get('current') else ''})"
|
||||
f" — running: {', '.join(running) or 'none'}"
|
||||
)
|
||||
if down_nfs:
|
||||
lines.append(f" down here: {', '.join(down_nfs)}")
|
||||
|
||||
if alerts:
|
||||
lines.append(f"\n⚠️ **{len(alerts)} alert(s)** — {len(crit)} critical, {len(warn)} warning")
|
||||
@@ -102,8 +117,15 @@ def _nf_detail(nf_name: str, nfs: list, alerts: list) -> str:
|
||||
f"Check: `{CONTAINER_RUNTIME} ps | grep {nf_name.lower()}`")
|
||||
|
||||
icon = "✅" if nf["state"] == "up" else "🔴"
|
||||
lines = [f"{icon} **{nf_name}** is **{nf['state'].upper()}**",
|
||||
f"Instance: `{nf.get('instance', 'n/a')}`"]
|
||||
placements = nf.get("nodes", [])
|
||||
lines = [f"{icon} **{nf_name}** is **{nf['state'].upper()}**"]
|
||||
if placements:
|
||||
node_text = ", ".join(
|
||||
f"{node['hostname']} ({'/'.join(node.get('roles', []))})"
|
||||
for node in placements
|
||||
)
|
||||
lines.append(f"Nodes: {node_text}")
|
||||
lines.append(f"Instance: `{nf.get('instance', 'n/a')}`")
|
||||
if nf_alerts:
|
||||
lines.append(f"\n⚠️ {len(nf_alerts)} alert(s) for {nf_name}:")
|
||||
for a in nf_alerts:
|
||||
@@ -129,43 +151,72 @@ def _alerts_summary(alerts: list) -> str:
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _subscriber_analysis(nfs: list, alerts: list) -> str:
|
||||
def _subscriber_analysis(nfs: list, alerts: list, cluster: dict) -> str:
|
||||
amf = next((n for n in nfs if n["name"] == "AMF"), None)
|
||||
smf = next((n for n in nfs if n["name"] == "SMF"), None)
|
||||
lines = ["**Subscriber & Registration Analysis**\n"]
|
||||
lines.append(f"AMF (registration/mobility): {'✅ UP' if amf and amf['state'] == 'up' else '🔴 DOWN — subscribers cannot register'}")
|
||||
lines.append(f"SMF (session management): {'✅ UP' if smf and smf['state'] == 'up' else '🔴 DOWN — no new data sessions'}")
|
||||
lines.append(f"AMF (registration/mobility): {_nf_sentence(amf, 'subscribers cannot register')}")
|
||||
lines.append(f"SMF (session management): {_nf_sentence(smf, 'no new data sessions')}")
|
||||
sub_alerts = [a for a in alerts if any(k in a.get("name", "").lower()
|
||||
for k in ["ue", "subscriber", "session", "attach", "registration"])]
|
||||
if sub_alerts:
|
||||
lines.append(f"\n⚠️ {len(sub_alerts)} subscriber-related alert(s) active.")
|
||||
else:
|
||||
lines.append("\nNo subscriber-related alerts detected.")
|
||||
lines.append(_cluster_scope(cluster))
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _session_analysis(nfs: list, alerts: list) -> str:
|
||||
def _session_analysis(nfs: list, alerts: list, cluster: dict) -> str:
|
||||
smf = next((n for n in nfs if n["name"] == "SMF"), None)
|
||||
upf = next((n for n in nfs if n["name"] == "UPF"), None)
|
||||
lines = ["**PDU Session & Data Plane Analysis**\n"]
|
||||
lines.append(f"SMF: {'✅ UP' if smf and smf['state'] == 'up' else '🔴 DOWN'}")
|
||||
lines.append(f"UPF: {'✅ UP' if upf and upf['state'] == 'up' else '🔴 DOWN'}")
|
||||
lines.append(f"SMF: {_nf_sentence(smf, 'session setup is blocked')}")
|
||||
lines.append(f"UPF: {_nf_sentence(upf, 'user-plane forwarding is blocked')}")
|
||||
if (not smf or smf["state"] != "up") or (not upf or upf["state"] != "up"):
|
||||
lines.append("\n⚡ **Impact**: PDU sessions will fail until both SMF and UPF are operational.")
|
||||
else:
|
||||
lines.append("\nBoth SMF and UPF operational — sessions should be establishing normally.")
|
||||
lines.append(_cluster_scope(cluster))
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _nf_label(nf: dict) -> str:
|
||||
placements = nf.get("nodes", [])
|
||||
if not placements:
|
||||
return nf["name"]
|
||||
return f"{nf['name']} on {', '.join(node['hostname'] for node in placements)}"
|
||||
|
||||
|
||||
def _nf_sentence(nf: dict | None, impact: str) -> str:
|
||||
if not nf:
|
||||
return "○ N/A"
|
||||
if nf.get("state") == "up":
|
||||
nodes = ", ".join(node["hostname"] for node in nf.get("nodes", [])) or nf.get("instance", "unknown host")
|
||||
return f"✅ UP on {nodes}"
|
||||
return f"🔴 DOWN — {impact}"
|
||||
|
||||
|
||||
def _cluster_scope(cluster: dict) -> str:
|
||||
nodes = cluster.get("nodes", [])
|
||||
if not nodes:
|
||||
return "\nCluster discovery is not available."
|
||||
details = ", ".join(f"{node['hostname']} ({node.get('role', 'AP')})" for node in nodes)
|
||||
return f"\nCluster scope checked: {details}"
|
||||
|
||||
|
||||
# ── LLM backends ──────────────────────────────────────────────────────────
|
||||
|
||||
def _build_context(network_state: dict, alerts: list) -> str:
|
||||
nfs = network_state.get("nfs", [])
|
||||
up = [n["name"] for n in nfs if n["state"] == "up"]
|
||||
down = [n["name"] for n in nfs if n["state"] == "down"]
|
||||
nodes = network_state.get("cluster", {}).get("nodes", [])
|
||||
node_summary = ", ".join(f"{node['hostname']} ({node.get('role', 'AP')})" for node in nodes) or "none"
|
||||
return (
|
||||
f"NFs UP: {', '.join(up) or 'none'}\n"
|
||||
f"NFs DOWN: {', '.join(down) or 'none'}\n"
|
||||
f"Cluster nodes: {node_summary}\n"
|
||||
f"Active alerts: {', '.join(a.get('name','') for a in alerts[:5]) or 'none'}"
|
||||
)
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import httpx
|
||||
from app.config import ALERTMANAGER_URL
|
||||
from app.services import cluster_inventory
|
||||
|
||||
_BASE = ALERTMANAGER_URL.rstrip("/")
|
||||
|
||||
@@ -16,14 +17,29 @@ async def get_alerts() -> list:
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
cluster = await cluster_inventory.get_cluster_inventory()
|
||||
alerts = []
|
||||
for a in raw:
|
||||
labels = a.get("labels", {})
|
||||
annotations = a.get("annotations", {})
|
||||
name = labels.get("alertname", "Unknown")
|
||||
summary = annotations.get("summary", annotations.get("description", ""))
|
||||
nf_name = _infer_nf(name, summary, labels.get("instance", ""))
|
||||
nodes = cluster_inventory.find_nf_nodes(cluster, nf_name) if nf_name else []
|
||||
alerts.append({
|
||||
"name": labels.get("alertname", "Unknown"),
|
||||
"name": name,
|
||||
"severity": labels.get("severity", "warning"),
|
||||
"instance": labels.get("instance", ""),
|
||||
"summary": annotations.get("summary", annotations.get("description", "")),
|
||||
"summary": summary,
|
||||
"nf": nf_name,
|
||||
"nodes": nodes,
|
||||
})
|
||||
return alerts
|
||||
|
||||
|
||||
def _infer_nf(name: str, summary: str, instance: str) -> str:
|
||||
text = f"{name} {summary} {instance}".upper()
|
||||
for nf_name in ["AMF", "SMF", "UPF", "UDM", "UDR", "NRF", "AUSF", "PCF", "MME", "SGWC", "DRA", "DSM"]:
|
||||
if nf_name in text:
|
||||
return nf_name
|
||||
return ""
|
||||
|
||||
180
app/services/cluster_inventory.py
Normal file
180
app/services/cluster_inventory.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""Cluster discovery built on top of the PLS API."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
from app.config import ALL_NFS
|
||||
from app.services import pls, prometheus
|
||||
|
||||
ROLE_NF_MAP = {
|
||||
"5GALL": {"amf", "smf", "pcf", "udr", "udm", "nrf", "eir", "ausf", "dra", "upf", "chf", "smsf", "aaa", "bmsc"},
|
||||
"CP": {"amf", "smf", "pcf", "udr", "udm", "nrf", "eir", "ausf", "dra", "chf", "smsf", "aaa", "bmsc"},
|
||||
"UP": {"upf"},
|
||||
"DCP": {"amf", "smf", "pcf", "chf", "smsf", "bmsc"},
|
||||
"DLF": {"udr", "udm", "nrf", "eir", "ausf", "aaa"},
|
||||
"SIG": {"dra"},
|
||||
"4GALL": {"mme", "sgwc", "smf", "pcf", "chf", "udr", "udm", "nrf", "eir", "ausf", "dra", "upf", "smsf", "aaa", "bmsc"},
|
||||
"4GCP": {"mme", "sgwc", "smf", "pcf", "chf", "udr", "udm", "nrf", "eir", "ausf", "dra", "smsf", "aaa", "bmsc"},
|
||||
"4GDCP": {"mme", "sgwc", "smf", "pcf", "chf", "smsf", "bmsc"},
|
||||
"COMBOALL": {"amf", "mme", "sgwc", "smf", "pcf", "chf", "udr", "udm", "nrf", "eir", "ausf", "dra", "upf", "smsf", "aaa", "bmsc"},
|
||||
"COMBOCP": {"amf", "mme", "sgwc", "smf", "pcf", "chf", "udr", "udm", "nrf", "eir", "ausf", "dra", "smsf", "aaa", "bmsc"},
|
||||
"COMBODCP": {"amf", "mme", "sgwc", "smf", "pcf", "chf", "aaa"},
|
||||
}
|
||||
ROLE_ALIASES = {
|
||||
"UPF": "UP",
|
||||
}
|
||||
ROLE_PRIORITY = ["COMBOALL", "COMBOCP", "COMBODCP", "5GALL", "4GALL", "4GCP", "4GDCP", "DCP", "DLF", "SIG", "CP", "UP"]
|
||||
|
||||
|
||||
def _infer_role(hostname: str) -> str:
|
||||
tokens = [t for t in re.split(r"[^A-Za-z0-9]+", hostname.upper()) if t]
|
||||
normalized = [ROLE_ALIASES.get(token, token) for token in tokens]
|
||||
for role in ROLE_PRIORITY:
|
||||
if role in normalized:
|
||||
return role
|
||||
for token in normalized:
|
||||
if token.endswith("UPF"):
|
||||
return "UP"
|
||||
return "AP"
|
||||
|
||||
|
||||
async def get_cluster_inventory() -> dict:
|
||||
cluster = await pls.get_cluster_status()
|
||||
if not cluster:
|
||||
return {
|
||||
"enabled": False,
|
||||
"current_node": None,
|
||||
"fully_established": False,
|
||||
"nodes": [],
|
||||
}
|
||||
|
||||
node_names = [node.get("name", "") for node in cluster.get("nodes", [])]
|
||||
info_tasks = [asyncio.create_task(pls.get_system_info(pls.node_host(name))) for name in node_names]
|
||||
service_tasks = [asyncio.create_task(pls.get_services(pls.node_host(name))) for name in node_names]
|
||||
infos = await asyncio.gather(*info_tasks, return_exceptions=True)
|
||||
services = await asyncio.gather(*service_tasks, return_exceptions=True)
|
||||
|
||||
nodes: list[dict] = []
|
||||
for idx, node in enumerate(cluster.get("nodes", [])):
|
||||
info = infos[idx] if isinstance(infos[idx], dict) else {}
|
||||
node_services = services[idx] if isinstance(services[idx], list) else []
|
||||
started = {svc["name"] for svc in node_services if svc.get("state") == "started"}
|
||||
hostname = info.get("hostname") or pls.node_host(node.get("name", ""))
|
||||
role = _infer_role(hostname)
|
||||
nodes.append(
|
||||
{
|
||||
"name": node.get("name", ""),
|
||||
"address": pls.node_host(node.get("name", "")),
|
||||
"hostname": hostname,
|
||||
"current": node.get("name") == cluster.get("current_node"),
|
||||
"repositories": node.get("repositories", []),
|
||||
"role": role,
|
||||
"roles": [role],
|
||||
"expected_nfs": sorted(ROLE_NF_MAP.get(role, set())),
|
||||
"services": node_services,
|
||||
"started_services": sorted(started),
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"enabled": True,
|
||||
"current_node": cluster.get("current_node"),
|
||||
"fully_established": bool(cluster.get("fully_established")),
|
||||
"nodes": nodes,
|
||||
}
|
||||
|
||||
|
||||
def _aggregate_nf_state(nf_name: str, nodes: list[dict], prom_states: dict[str, dict]) -> dict:
|
||||
service_name = nf_name.lower()
|
||||
placements = []
|
||||
seen_service = False
|
||||
for node in nodes:
|
||||
for service in node.get("services", []):
|
||||
if service.get("name") != service_name:
|
||||
continue
|
||||
seen_service = True
|
||||
if service.get("state") == "started":
|
||||
placements.append(
|
||||
{
|
||||
"hostname": node["hostname"],
|
||||
"address": node["address"],
|
||||
"roles": node["roles"],
|
||||
}
|
||||
)
|
||||
|
||||
prom_state = prom_states.get(nf_name, {"state": "unknown", "instance": ""})
|
||||
if placements:
|
||||
state = prom_state["state"] if prom_state["state"] in {"up", "down"} else "up"
|
||||
instance = ", ".join(p["hostname"] for p in placements)
|
||||
elif seen_service:
|
||||
state = "down"
|
||||
instance = ""
|
||||
else:
|
||||
state = prom_state["state"]
|
||||
instance = prom_state["instance"]
|
||||
|
||||
return {
|
||||
"name": nf_name,
|
||||
"state": state,
|
||||
"instance": instance,
|
||||
"nodes": placements,
|
||||
}
|
||||
|
||||
|
||||
def _node_nf_state(node: dict, nf_name: str) -> dict:
|
||||
service_name = nf_name.lower()
|
||||
service = next((svc for svc in node.get("services", []) if svc.get("name") == service_name), None)
|
||||
if not service:
|
||||
return {"name": nf_name, "state": "unknown"}
|
||||
if service.get("state") == "started":
|
||||
return {"name": nf_name, "state": "up"}
|
||||
return {"name": nf_name, "state": "down"}
|
||||
|
||||
|
||||
def _attach_node_nf_status(nodes: list[dict]) -> list[dict]:
|
||||
enriched = []
|
||||
for node in nodes:
|
||||
node_copy = dict(node)
|
||||
expected_nfs = node_copy.get("expected_nfs", [])
|
||||
node_copy["nfs"] = [_node_nf_state(node_copy, nf_name.upper()) for nf_name in expected_nfs]
|
||||
enriched.append(node_copy)
|
||||
return enriched
|
||||
|
||||
|
||||
async def get_network_status() -> dict:
|
||||
inventory_task = asyncio.create_task(get_cluster_inventory())
|
||||
prom_task = asyncio.create_task(prometheus.get_nf_status_map())
|
||||
inventory, prom_states = await asyncio.gather(inventory_task, prom_task)
|
||||
|
||||
nodes = _attach_node_nf_status(inventory.get("nodes", []))
|
||||
inventory["nodes"] = nodes
|
||||
nfs = [_aggregate_nf_state(nf_name, nodes, prom_states) for nf_name in ALL_NFS]
|
||||
up = sum(1 for nf in nfs if nf["state"] == "up")
|
||||
down = sum(1 for nf in nfs if nf["state"] == "down")
|
||||
|
||||
return {
|
||||
"nfs": nfs,
|
||||
"summary": {"up": up, "down": down, "total": len(nfs)},
|
||||
"cluster": inventory,
|
||||
}
|
||||
|
||||
|
||||
def find_nf_nodes(cluster: dict, nf_name: str) -> list[dict]:
|
||||
nodes = cluster.get("nodes", [])
|
||||
matches = []
|
||||
for node in nodes:
|
||||
for nf in node.get("nfs", []):
|
||||
if nf.get("name") == nf_name:
|
||||
matches.append(
|
||||
{
|
||||
"hostname": node["hostname"],
|
||||
"address": node["address"],
|
||||
"role": node.get("role", "AP"),
|
||||
"current": node.get("current", False),
|
||||
"state": nf.get("state", "unknown"),
|
||||
}
|
||||
)
|
||||
break
|
||||
return matches
|
||||
@@ -235,20 +235,23 @@ async def analyze_logs() -> dict:
|
||||
Gather log-pattern issues + Prometheus NF status + Alertmanager alerts.
|
||||
Returns a fully structured dict ready for JSON serialisation.
|
||||
"""
|
||||
from app.services import alertmanager, prometheus
|
||||
from app.services import alertmanager, prometheus, cluster_inventory
|
||||
|
||||
# Kick off all I/O in parallel
|
||||
containers_f = asyncio.create_task(_discover_containers())
|
||||
alerts_f = asyncio.create_task(alertmanager.get_alerts())
|
||||
nf_status_f = asyncio.create_task(prometheus.get_nf_status())
|
||||
cluster_f = asyncio.create_task(cluster_inventory.get_cluster_inventory())
|
||||
|
||||
containers = await containers_f
|
||||
alerts, nf_statuses = await asyncio.gather(alerts_f, nf_status_f,
|
||||
alerts, nf_statuses, cluster = await asyncio.gather(alerts_f, nf_status_f, cluster_f,
|
||||
return_exceptions=True)
|
||||
if isinstance(alerts, Exception):
|
||||
alerts = []
|
||||
if isinstance(nf_statuses, Exception):
|
||||
nf_statuses = []
|
||||
if isinstance(cluster, Exception):
|
||||
cluster = {"enabled": False, "nodes": []}
|
||||
|
||||
# Read all container logs concurrently
|
||||
log_tasks = {nf: asyncio.create_task(_read_logs(cname))
|
||||
@@ -280,25 +283,29 @@ async def analyze_logs() -> dict:
|
||||
# 2. NF-down events from Prometheus
|
||||
for nf_st in nf_statuses:
|
||||
if isinstance(nf_st, dict) and nf_st.get("state") == "down":
|
||||
node_text = ", ".join(node["hostname"] for node in nf_st.get("nodes", []))
|
||||
issues.append({
|
||||
"id": f"nf-down-{nf_st['name']}",
|
||||
"category": "Connectivity",
|
||||
"nf": nf_st["name"],
|
||||
"node": node_text,
|
||||
"severity": "critical",
|
||||
"count": 1,
|
||||
"description": f"{nf_st['name']} is unreachable",
|
||||
"remediation": (f"Run `{CONTAINER_RUNTIME} ps` and check if {nf_st['name']} "
|
||||
f"container is running; inspect its logs."),
|
||||
"remediation": (f"Check {node_text or 'the hosting node'} first, then run "
|
||||
f"`{CONTAINER_RUNTIME} ps` and inspect `{nf_st['name'].lower()}` logs."),
|
||||
"source": "prometheus",
|
||||
})
|
||||
|
||||
# 3. Active Alertmanager alerts
|
||||
for alert in alerts:
|
||||
if isinstance(alert, dict):
|
||||
node_text = ", ".join(node["hostname"] for node in alert.get("nodes", []))
|
||||
issues.append({
|
||||
"id": f"alert-{alert.get('name', '')}-{len(issues)}",
|
||||
"category": _alert_category(alert),
|
||||
"nf": _alert_nf(alert),
|
||||
"node": node_text,
|
||||
"severity": alert.get("severity", "warning"),
|
||||
"count": 1,
|
||||
"description": alert.get("summary") or alert.get("name", "Unknown alert"),
|
||||
@@ -331,6 +338,7 @@ async def analyze_logs() -> dict:
|
||||
"categories": categories,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"log_sources": list(containers.keys()),
|
||||
"cluster": cluster,
|
||||
}
|
||||
|
||||
# Persist to history ring-buffer
|
||||
|
||||
78
app/services/pls.py
Normal file
78
app/services/pls.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""PLS API client for cluster and per-node discovery."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
|
||||
import httpx
|
||||
|
||||
from app.config import PLS_AUTH_BACKEND, PLS_BASE_URL, PLS_PASSWORD, PLS_USERNAME, PLS_VERIFY_TLS
|
||||
|
||||
_token: str | None = None
|
||||
|
||||
|
||||
def _base_url_for_host(host: str | None = None) -> str:
|
||||
if not host:
|
||||
return PLS_BASE_URL.rstrip("/")
|
||||
parts = urlsplit(PLS_BASE_URL)
|
||||
return urlunsplit((parts.scheme, host, parts.path.rstrip("/"), "", ""))
|
||||
|
||||
|
||||
async def _login() -> str | None:
|
||||
global _token
|
||||
if _token:
|
||||
return _token
|
||||
if not PLS_USERNAME or not PLS_PASSWORD:
|
||||
return None
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5, verify=PLS_VERIFY_TLS) as client:
|
||||
response = await client.post(
|
||||
f"{_base_url_for_host()}/auth/login",
|
||||
json={
|
||||
"username": PLS_USERNAME,
|
||||
"password": PLS_PASSWORD,
|
||||
"backend": PLS_AUTH_BACKEND,
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
_token = data.get("access_token")
|
||||
return _token
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
async def _get(path: str, host: str | None = None) -> dict | list | None:
|
||||
token = await _login()
|
||||
if not token:
|
||||
return None
|
||||
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
url = f"{_base_url_for_host(host)}/{path.lstrip('/')}"
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5, verify=PLS_VERIFY_TLS) as client:
|
||||
response = await client.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def node_host(node_name: str) -> str:
|
||||
return node_name.split("@", 1)[1] if "@" in node_name else node_name
|
||||
|
||||
|
||||
async def get_cluster_status() -> dict | None:
|
||||
data = await _get("data_layer/cluster/status")
|
||||
return data if isinstance(data, dict) else None
|
||||
|
||||
|
||||
async def get_system_info(host: str | None = None) -> dict | None:
|
||||
data = await _get("system/info", host=host)
|
||||
return data if isinstance(data, dict) else None
|
||||
|
||||
|
||||
async def get_services(host: str | None = None) -> list[dict]:
|
||||
data = await _get("services", host=host)
|
||||
return data if isinstance(data, list) else []
|
||||
@@ -14,12 +14,12 @@ async def query(promql: str) -> list:
|
||||
return r.json()["data"]["result"]
|
||||
|
||||
|
||||
async def get_nf_status() -> list:
|
||||
"""Return a list of {name, state, instance} for every known NF."""
|
||||
async def get_nf_status_map() -> dict[str, dict]:
|
||||
"""Return Prometheus-backed NF status keyed by display name."""
|
||||
try:
|
||||
results = await query("up")
|
||||
except Exception:
|
||||
return [{"name": n, "state": "unknown", "instance": ""} for n in ALL_NFS]
|
||||
return {n: {"name": n, "state": "unknown", "instance": ""} for n in ALL_NFS}
|
||||
|
||||
seen: dict[str, dict] = {}
|
||||
for r in results:
|
||||
@@ -38,4 +38,9 @@ async def get_nf_status() -> list:
|
||||
if n not in seen:
|
||||
seen[n] = {"name": n, "state": "unknown", "instance": ""}
|
||||
|
||||
return list(seen.values())
|
||||
return seen
|
||||
|
||||
|
||||
async def get_nf_status() -> list:
|
||||
"""Return a list of {name, state, instance} for every known NF."""
|
||||
return list((await get_nf_status_map()).values())
|
||||
|
||||
Reference in New Issue
Block a user