added multi node functionality
This commit is contained in:
BIN
app/__pycache__/__init__.cpython-314.pyc
Normal file
BIN
app/__pycache__/__init__.cpython-314.pyc
Normal file
Binary file not shown.
Binary file not shown.
@@ -1,11 +1,26 @@
|
||||
import os
|
||||
|
||||
|
||||
def _env_bool(name: str, default: bool) -> bool:
|
||||
value = os.getenv(name)
|
||||
if value is None:
|
||||
return default
|
||||
return value.lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
# Defaults assume the appliance-style deployment model where Marvis runs with
|
||||
# host networking and talks to sibling services over host loopback.
|
||||
PROMETHEUS_URL = os.getenv("MARVIS_PROMETHEUS_URL", "http://127.0.0.1:9090")
|
||||
PROMETHEUS_PREFIX = os.getenv("MARVIS_PROMETHEUS_PREFIX", "/prometheus")
|
||||
ALERTMANAGER_URL = os.getenv("MARVIS_ALERTMANAGER_URL", "http://127.0.0.1:9093")
|
||||
|
||||
# PLS discovery defaults assume the local appliance exposes PLS via Traefik.
|
||||
PLS_BASE_URL = os.getenv("MARVIS_PLS_BASE_URL", "https://127.0.0.1/core/pls/api/1")
|
||||
PLS_USERNAME = os.getenv("MARVIS_PLS_USERNAME", "")
|
||||
PLS_PASSWORD = os.getenv("MARVIS_PLS_PASSWORD", "")
|
||||
PLS_AUTH_BACKEND = os.getenv("MARVIS_PLS_AUTH_BACKEND", "local")
|
||||
PLS_VERIFY_TLS = _env_bool("MARVIS_PLS_VERIFY_TLS", False)
|
||||
|
||||
# AI backend: "rule" | "openai" | "ollama"
|
||||
AI_MODE = os.getenv("MARVIS_AI_MODE", "rule")
|
||||
OPENAI_API_KEY = os.getenv("MARVIS_OPENAI_API_KEY", "")
|
||||
|
||||
BIN
app/routers/__pycache__/__init__.cpython-314.pyc
Normal file
BIN
app/routers/__pycache__/__init__.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/actions.cpython-314.pyc
Normal file
BIN
app/routers/__pycache__/actions.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/alerts.cpython-314.pyc
Normal file
BIN
app/routers/__pycache__/alerts.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/emulated_session.cpython-314.pyc
Normal file
BIN
app/routers/__pycache__/emulated_session.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/network.cpython-314.pyc
Normal file
BIN
app/routers/__pycache__/network.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/routers/__pycache__/query.cpython-314.pyc
Normal file
BIN
app/routers/__pycache__/query.cpython-314.pyc
Normal file
Binary file not shown.
@@ -1,5 +1,5 @@
|
||||
from fastapi import APIRouter
|
||||
from app.services import alertmanager
|
||||
from app.services import alertmanager, cluster_inventory
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -8,4 +8,9 @@ router = APIRouter()
|
||||
async def get_alerts():
|
||||
alerts = await alertmanager.get_alerts()
|
||||
critical = sum(1 for a in alerts if a.get("severity") == "critical")
|
||||
return {"alerts": alerts, "total": len(alerts), "critical": critical}
|
||||
return {
|
||||
"alerts": alerts,
|
||||
"total": len(alerts),
|
||||
"critical": critical,
|
||||
"cluster": await cluster_inventory.get_cluster_inventory(),
|
||||
}
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
from fastapi import APIRouter
|
||||
from app.services import prometheus
|
||||
from app.services import cluster_inventory
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/network/status")
|
||||
async def network_status():
|
||||
nfs = await prometheus.get_nf_status()
|
||||
up = sum(1 for n in nfs if n["state"] == "up")
|
||||
down = sum(1 for n in nfs if n["state"] == "down")
|
||||
return {"nfs": nfs, "summary": {"up": up, "down": down, "total": len(nfs)}}
|
||||
return await cluster_inventory.get_network_status()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
from app.services import prometheus, alertmanager, ai
|
||||
from app.services import cluster_inventory, alertmanager, ai
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -18,7 +18,7 @@ async def query(req: QueryRequest):
|
||||
|
||||
async def _gather(query_text: str):
|
||||
import asyncio
|
||||
nfs_task = asyncio.create_task(prometheus.get_nf_status())
|
||||
nfs_task = asyncio.create_task(cluster_inventory.get_network_status())
|
||||
alerts_task = asyncio.create_task(alertmanager.get_alerts())
|
||||
nfs, alerts = await asyncio.gather(nfs_task, alerts_task)
|
||||
return {"nfs": nfs}, alerts
|
||||
network_state, alerts = await asyncio.gather(nfs_task, alerts_task)
|
||||
return network_state, alerts
|
||||
|
||||
BIN
app/services/__pycache__/__init__.cpython-314.pyc
Normal file
BIN
app/services/__pycache__/__init__.cpython-314.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
app/services/__pycache__/alertmanager.cpython-314.pyc
Normal file
BIN
app/services/__pycache__/alertmanager.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/services/__pycache__/cluster_inventory.cpython-314.pyc
Normal file
BIN
app/services/__pycache__/cluster_inventory.cpython-314.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
app/services/__pycache__/pls.cpython-314.pyc
Normal file
BIN
app/services/__pycache__/pls.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/services/__pycache__/prometheus.cpython-314.pyc
Normal file
BIN
app/services/__pycache__/prometheus.cpython-314.pyc
Normal file
Binary file not shown.
@@ -30,6 +30,7 @@ async def answer(query: str, network_state: dict, alerts: list) -> str:
|
||||
def _rule_based(query: str, network_state: dict, alerts: list) -> str:
|
||||
q = query.lower()
|
||||
nfs = network_state.get("nfs", [])
|
||||
cluster = network_state.get("cluster", {})
|
||||
up = [n for n in nfs if n["state"] == "up"]
|
||||
down = [n for n in nfs if n["state"] == "down"]
|
||||
|
||||
@@ -58,26 +59,40 @@ def _rule_based(query: str, network_state: dict, alerts: list) -> str:
|
||||
return _alerts_summary(alerts)
|
||||
|
||||
if any(w in q for w in ["subscriber", "ue ", "device", "phone", "handset", "registration", "attach"]):
|
||||
return _subscriber_analysis(nfs, alerts)
|
||||
return _subscriber_analysis(nfs, alerts, cluster)
|
||||
|
||||
if any(w in q for w in ["session", "pdu", "bearer", "user plane", "traffic", "throughput"]):
|
||||
return _session_analysis(nfs, alerts)
|
||||
return _session_analysis(nfs, alerts, cluster)
|
||||
|
||||
# Default → health summary
|
||||
return _health_summary(up, down, alerts)
|
||||
return _health_summary(up, down, alerts, cluster)
|
||||
|
||||
|
||||
def _health_summary(up: list, down: list, alerts: list) -> str:
|
||||
def _health_summary(up: list, down: list, alerts: list, cluster: dict) -> str:
|
||||
ts = datetime.now().strftime("%H:%M:%S")
|
||||
crit = [a for a in alerts if a.get("severity") == "critical"]
|
||||
warn = [a for a in alerts if a.get("severity") != "critical"]
|
||||
lines = [f"**P5G Network Health — {ts}**\n"]
|
||||
nodes = cluster.get("nodes", [])
|
||||
|
||||
if up:
|
||||
lines.append(f"✅ **{len(up)} UP**: {', '.join(n['name'] for n in up)}")
|
||||
lines.append(f"✅ **{len(up)} UP**: {', '.join(_nf_label(n) for n in up)}")
|
||||
if down:
|
||||
lines.append(f"🔴 **{len(down)} DOWN**: {', '.join(n['name'] for n in down)}")
|
||||
lines.append(f" ⚡ Action: check `{CONTAINER_RUNTIME} logs <nf>` in the runtime host")
|
||||
lines.append(f"🔴 **{len(down)} DOWN**: {', '.join(_nf_label(n) for n in down)}")
|
||||
lines.append(" ⚡ Action: inspect the node shown for each affected NF before pulling logs.")
|
||||
|
||||
if nodes:
|
||||
lines.append(f"\n**Cluster nodes ({len(nodes)})**")
|
||||
for node in nodes:
|
||||
running = [nf["name"] for nf in node.get("nfs", []) if nf.get("state") == "up"]
|
||||
down_nfs = [nf["name"] for nf in node.get("nfs", []) if nf.get("state") == "down"]
|
||||
role = node.get("role", "AP")
|
||||
lines.append(
|
||||
f"• **{node['hostname']}** ({role}{', local' if node.get('current') else ''})"
|
||||
f" — running: {', '.join(running) or 'none'}"
|
||||
)
|
||||
if down_nfs:
|
||||
lines.append(f" down here: {', '.join(down_nfs)}")
|
||||
|
||||
if alerts:
|
||||
lines.append(f"\n⚠️ **{len(alerts)} alert(s)** — {len(crit)} critical, {len(warn)} warning")
|
||||
@@ -102,8 +117,15 @@ def _nf_detail(nf_name: str, nfs: list, alerts: list) -> str:
|
||||
f"Check: `{CONTAINER_RUNTIME} ps | grep {nf_name.lower()}`")
|
||||
|
||||
icon = "✅" if nf["state"] == "up" else "🔴"
|
||||
lines = [f"{icon} **{nf_name}** is **{nf['state'].upper()}**",
|
||||
f"Instance: `{nf.get('instance', 'n/a')}`"]
|
||||
placements = nf.get("nodes", [])
|
||||
lines = [f"{icon} **{nf_name}** is **{nf['state'].upper()}**"]
|
||||
if placements:
|
||||
node_text = ", ".join(
|
||||
f"{node['hostname']} ({'/'.join(node.get('roles', []))})"
|
||||
for node in placements
|
||||
)
|
||||
lines.append(f"Nodes: {node_text}")
|
||||
lines.append(f"Instance: `{nf.get('instance', 'n/a')}`")
|
||||
if nf_alerts:
|
||||
lines.append(f"\n⚠️ {len(nf_alerts)} alert(s) for {nf_name}:")
|
||||
for a in nf_alerts:
|
||||
@@ -129,43 +151,72 @@ def _alerts_summary(alerts: list) -> str:
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _subscriber_analysis(nfs: list, alerts: list) -> str:
|
||||
def _subscriber_analysis(nfs: list, alerts: list, cluster: dict) -> str:
|
||||
amf = next((n for n in nfs if n["name"] == "AMF"), None)
|
||||
smf = next((n for n in nfs if n["name"] == "SMF"), None)
|
||||
lines = ["**Subscriber & Registration Analysis**\n"]
|
||||
lines.append(f"AMF (registration/mobility): {'✅ UP' if amf and amf['state'] == 'up' else '🔴 DOWN — subscribers cannot register'}")
|
||||
lines.append(f"SMF (session management): {'✅ UP' if smf and smf['state'] == 'up' else '🔴 DOWN — no new data sessions'}")
|
||||
lines.append(f"AMF (registration/mobility): {_nf_sentence(amf, 'subscribers cannot register')}")
|
||||
lines.append(f"SMF (session management): {_nf_sentence(smf, 'no new data sessions')}")
|
||||
sub_alerts = [a for a in alerts if any(k in a.get("name", "").lower()
|
||||
for k in ["ue", "subscriber", "session", "attach", "registration"])]
|
||||
if sub_alerts:
|
||||
lines.append(f"\n⚠️ {len(sub_alerts)} subscriber-related alert(s) active.")
|
||||
else:
|
||||
lines.append("\nNo subscriber-related alerts detected.")
|
||||
lines.append(_cluster_scope(cluster))
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _session_analysis(nfs: list, alerts: list) -> str:
|
||||
def _session_analysis(nfs: list, alerts: list, cluster: dict) -> str:
|
||||
smf = next((n for n in nfs if n["name"] == "SMF"), None)
|
||||
upf = next((n for n in nfs if n["name"] == "UPF"), None)
|
||||
lines = ["**PDU Session & Data Plane Analysis**\n"]
|
||||
lines.append(f"SMF: {'✅ UP' if smf and smf['state'] == 'up' else '🔴 DOWN'}")
|
||||
lines.append(f"UPF: {'✅ UP' if upf and upf['state'] == 'up' else '🔴 DOWN'}")
|
||||
lines.append(f"SMF: {_nf_sentence(smf, 'session setup is blocked')}")
|
||||
lines.append(f"UPF: {_nf_sentence(upf, 'user-plane forwarding is blocked')}")
|
||||
if (not smf or smf["state"] != "up") or (not upf or upf["state"] != "up"):
|
||||
lines.append("\n⚡ **Impact**: PDU sessions will fail until both SMF and UPF are operational.")
|
||||
else:
|
||||
lines.append("\nBoth SMF and UPF operational — sessions should be establishing normally.")
|
||||
lines.append(_cluster_scope(cluster))
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _nf_label(nf: dict) -> str:
|
||||
placements = nf.get("nodes", [])
|
||||
if not placements:
|
||||
return nf["name"]
|
||||
return f"{nf['name']} on {', '.join(node['hostname'] for node in placements)}"
|
||||
|
||||
|
||||
def _nf_sentence(nf: dict | None, impact: str) -> str:
|
||||
if not nf:
|
||||
return "○ N/A"
|
||||
if nf.get("state") == "up":
|
||||
nodes = ", ".join(node["hostname"] for node in nf.get("nodes", [])) or nf.get("instance", "unknown host")
|
||||
return f"✅ UP on {nodes}"
|
||||
return f"🔴 DOWN — {impact}"
|
||||
|
||||
|
||||
def _cluster_scope(cluster: dict) -> str:
|
||||
nodes = cluster.get("nodes", [])
|
||||
if not nodes:
|
||||
return "\nCluster discovery is not available."
|
||||
details = ", ".join(f"{node['hostname']} ({node.get('role', 'AP')})" for node in nodes)
|
||||
return f"\nCluster scope checked: {details}"
|
||||
|
||||
|
||||
# ── LLM backends ──────────────────────────────────────────────────────────
|
||||
|
||||
def _build_context(network_state: dict, alerts: list) -> str:
|
||||
nfs = network_state.get("nfs", [])
|
||||
up = [n["name"] for n in nfs if n["state"] == "up"]
|
||||
down = [n["name"] for n in nfs if n["state"] == "down"]
|
||||
nodes = network_state.get("cluster", {}).get("nodes", [])
|
||||
node_summary = ", ".join(f"{node['hostname']} ({node.get('role', 'AP')})" for node in nodes) or "none"
|
||||
return (
|
||||
f"NFs UP: {', '.join(up) or 'none'}\n"
|
||||
f"NFs DOWN: {', '.join(down) or 'none'}\n"
|
||||
f"Cluster nodes: {node_summary}\n"
|
||||
f"Active alerts: {', '.join(a.get('name','') for a in alerts[:5]) or 'none'}"
|
||||
)
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import httpx
|
||||
from app.config import ALERTMANAGER_URL
|
||||
from app.services import cluster_inventory
|
||||
|
||||
_BASE = ALERTMANAGER_URL.rstrip("/")
|
||||
|
||||
@@ -16,14 +17,29 @@ async def get_alerts() -> list:
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
cluster = await cluster_inventory.get_cluster_inventory()
|
||||
alerts = []
|
||||
for a in raw:
|
||||
labels = a.get("labels", {})
|
||||
annotations = a.get("annotations", {})
|
||||
name = labels.get("alertname", "Unknown")
|
||||
summary = annotations.get("summary", annotations.get("description", ""))
|
||||
nf_name = _infer_nf(name, summary, labels.get("instance", ""))
|
||||
nodes = cluster_inventory.find_nf_nodes(cluster, nf_name) if nf_name else []
|
||||
alerts.append({
|
||||
"name": labels.get("alertname", "Unknown"),
|
||||
"name": name,
|
||||
"severity": labels.get("severity", "warning"),
|
||||
"instance": labels.get("instance", ""),
|
||||
"summary": annotations.get("summary", annotations.get("description", "")),
|
||||
"summary": summary,
|
||||
"nf": nf_name,
|
||||
"nodes": nodes,
|
||||
})
|
||||
return alerts
|
||||
|
||||
|
||||
def _infer_nf(name: str, summary: str, instance: str) -> str:
|
||||
text = f"{name} {summary} {instance}".upper()
|
||||
for nf_name in ["AMF", "SMF", "UPF", "UDM", "UDR", "NRF", "AUSF", "PCF", "MME", "SGWC", "DRA", "DSM"]:
|
||||
if nf_name in text:
|
||||
return nf_name
|
||||
return ""
|
||||
|
||||
180
app/services/cluster_inventory.py
Normal file
180
app/services/cluster_inventory.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""Cluster discovery built on top of the PLS API."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
from app.config import ALL_NFS
|
||||
from app.services import pls, prometheus
|
||||
|
||||
ROLE_NF_MAP = {
|
||||
"5GALL": {"amf", "smf", "pcf", "udr", "udm", "nrf", "eir", "ausf", "dra", "upf", "chf", "smsf", "aaa", "bmsc"},
|
||||
"CP": {"amf", "smf", "pcf", "udr", "udm", "nrf", "eir", "ausf", "dra", "chf", "smsf", "aaa", "bmsc"},
|
||||
"UP": {"upf"},
|
||||
"DCP": {"amf", "smf", "pcf", "chf", "smsf", "bmsc"},
|
||||
"DLF": {"udr", "udm", "nrf", "eir", "ausf", "aaa"},
|
||||
"SIG": {"dra"},
|
||||
"4GALL": {"mme", "sgwc", "smf", "pcf", "chf", "udr", "udm", "nrf", "eir", "ausf", "dra", "upf", "smsf", "aaa", "bmsc"},
|
||||
"4GCP": {"mme", "sgwc", "smf", "pcf", "chf", "udr", "udm", "nrf", "eir", "ausf", "dra", "smsf", "aaa", "bmsc"},
|
||||
"4GDCP": {"mme", "sgwc", "smf", "pcf", "chf", "smsf", "bmsc"},
|
||||
"COMBOALL": {"amf", "mme", "sgwc", "smf", "pcf", "chf", "udr", "udm", "nrf", "eir", "ausf", "dra", "upf", "smsf", "aaa", "bmsc"},
|
||||
"COMBOCP": {"amf", "mme", "sgwc", "smf", "pcf", "chf", "udr", "udm", "nrf", "eir", "ausf", "dra", "smsf", "aaa", "bmsc"},
|
||||
"COMBODCP": {"amf", "mme", "sgwc", "smf", "pcf", "chf", "aaa"},
|
||||
}
|
||||
ROLE_ALIASES = {
|
||||
"UPF": "UP",
|
||||
}
|
||||
ROLE_PRIORITY = ["COMBOALL", "COMBOCP", "COMBODCP", "5GALL", "4GALL", "4GCP", "4GDCP", "DCP", "DLF", "SIG", "CP", "UP"]
|
||||
|
||||
|
||||
def _infer_role(hostname: str) -> str:
|
||||
tokens = [t for t in re.split(r"[^A-Za-z0-9]+", hostname.upper()) if t]
|
||||
normalized = [ROLE_ALIASES.get(token, token) for token in tokens]
|
||||
for role in ROLE_PRIORITY:
|
||||
if role in normalized:
|
||||
return role
|
||||
for token in normalized:
|
||||
if token.endswith("UPF"):
|
||||
return "UP"
|
||||
return "AP"
|
||||
|
||||
|
||||
async def get_cluster_inventory() -> dict:
|
||||
cluster = await pls.get_cluster_status()
|
||||
if not cluster:
|
||||
return {
|
||||
"enabled": False,
|
||||
"current_node": None,
|
||||
"fully_established": False,
|
||||
"nodes": [],
|
||||
}
|
||||
|
||||
node_names = [node.get("name", "") for node in cluster.get("nodes", [])]
|
||||
info_tasks = [asyncio.create_task(pls.get_system_info(pls.node_host(name))) for name in node_names]
|
||||
service_tasks = [asyncio.create_task(pls.get_services(pls.node_host(name))) for name in node_names]
|
||||
infos = await asyncio.gather(*info_tasks, return_exceptions=True)
|
||||
services = await asyncio.gather(*service_tasks, return_exceptions=True)
|
||||
|
||||
nodes: list[dict] = []
|
||||
for idx, node in enumerate(cluster.get("nodes", [])):
|
||||
info = infos[idx] if isinstance(infos[idx], dict) else {}
|
||||
node_services = services[idx] if isinstance(services[idx], list) else []
|
||||
started = {svc["name"] for svc in node_services if svc.get("state") == "started"}
|
||||
hostname = info.get("hostname") or pls.node_host(node.get("name", ""))
|
||||
role = _infer_role(hostname)
|
||||
nodes.append(
|
||||
{
|
||||
"name": node.get("name", ""),
|
||||
"address": pls.node_host(node.get("name", "")),
|
||||
"hostname": hostname,
|
||||
"current": node.get("name") == cluster.get("current_node"),
|
||||
"repositories": node.get("repositories", []),
|
||||
"role": role,
|
||||
"roles": [role],
|
||||
"expected_nfs": sorted(ROLE_NF_MAP.get(role, set())),
|
||||
"services": node_services,
|
||||
"started_services": sorted(started),
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"enabled": True,
|
||||
"current_node": cluster.get("current_node"),
|
||||
"fully_established": bool(cluster.get("fully_established")),
|
||||
"nodes": nodes,
|
||||
}
|
||||
|
||||
|
||||
def _aggregate_nf_state(nf_name: str, nodes: list[dict], prom_states: dict[str, dict]) -> dict:
|
||||
service_name = nf_name.lower()
|
||||
placements = []
|
||||
seen_service = False
|
||||
for node in nodes:
|
||||
for service in node.get("services", []):
|
||||
if service.get("name") != service_name:
|
||||
continue
|
||||
seen_service = True
|
||||
if service.get("state") == "started":
|
||||
placements.append(
|
||||
{
|
||||
"hostname": node["hostname"],
|
||||
"address": node["address"],
|
||||
"roles": node["roles"],
|
||||
}
|
||||
)
|
||||
|
||||
prom_state = prom_states.get(nf_name, {"state": "unknown", "instance": ""})
|
||||
if placements:
|
||||
state = prom_state["state"] if prom_state["state"] in {"up", "down"} else "up"
|
||||
instance = ", ".join(p["hostname"] for p in placements)
|
||||
elif seen_service:
|
||||
state = "down"
|
||||
instance = ""
|
||||
else:
|
||||
state = prom_state["state"]
|
||||
instance = prom_state["instance"]
|
||||
|
||||
return {
|
||||
"name": nf_name,
|
||||
"state": state,
|
||||
"instance": instance,
|
||||
"nodes": placements,
|
||||
}
|
||||
|
||||
|
||||
def _node_nf_state(node: dict, nf_name: str) -> dict:
|
||||
service_name = nf_name.lower()
|
||||
service = next((svc for svc in node.get("services", []) if svc.get("name") == service_name), None)
|
||||
if not service:
|
||||
return {"name": nf_name, "state": "unknown"}
|
||||
if service.get("state") == "started":
|
||||
return {"name": nf_name, "state": "up"}
|
||||
return {"name": nf_name, "state": "down"}
|
||||
|
||||
|
||||
def _attach_node_nf_status(nodes: list[dict]) -> list[dict]:
|
||||
enriched = []
|
||||
for node in nodes:
|
||||
node_copy = dict(node)
|
||||
expected_nfs = node_copy.get("expected_nfs", [])
|
||||
node_copy["nfs"] = [_node_nf_state(node_copy, nf_name.upper()) for nf_name in expected_nfs]
|
||||
enriched.append(node_copy)
|
||||
return enriched
|
||||
|
||||
|
||||
async def get_network_status() -> dict:
|
||||
inventory_task = asyncio.create_task(get_cluster_inventory())
|
||||
prom_task = asyncio.create_task(prometheus.get_nf_status_map())
|
||||
inventory, prom_states = await asyncio.gather(inventory_task, prom_task)
|
||||
|
||||
nodes = _attach_node_nf_status(inventory.get("nodes", []))
|
||||
inventory["nodes"] = nodes
|
||||
nfs = [_aggregate_nf_state(nf_name, nodes, prom_states) for nf_name in ALL_NFS]
|
||||
up = sum(1 for nf in nfs if nf["state"] == "up")
|
||||
down = sum(1 for nf in nfs if nf["state"] == "down")
|
||||
|
||||
return {
|
||||
"nfs": nfs,
|
||||
"summary": {"up": up, "down": down, "total": len(nfs)},
|
||||
"cluster": inventory,
|
||||
}
|
||||
|
||||
|
||||
def find_nf_nodes(cluster: dict, nf_name: str) -> list[dict]:
|
||||
nodes = cluster.get("nodes", [])
|
||||
matches = []
|
||||
for node in nodes:
|
||||
for nf in node.get("nfs", []):
|
||||
if nf.get("name") == nf_name:
|
||||
matches.append(
|
||||
{
|
||||
"hostname": node["hostname"],
|
||||
"address": node["address"],
|
||||
"role": node.get("role", "AP"),
|
||||
"current": node.get("current", False),
|
||||
"state": nf.get("state", "unknown"),
|
||||
}
|
||||
)
|
||||
break
|
||||
return matches
|
||||
@@ -235,20 +235,23 @@ async def analyze_logs() -> dict:
|
||||
Gather log-pattern issues + Prometheus NF status + Alertmanager alerts.
|
||||
Returns a fully structured dict ready for JSON serialisation.
|
||||
"""
|
||||
from app.services import alertmanager, prometheus
|
||||
from app.services import alertmanager, prometheus, cluster_inventory
|
||||
|
||||
# Kick off all I/O in parallel
|
||||
containers_f = asyncio.create_task(_discover_containers())
|
||||
alerts_f = asyncio.create_task(alertmanager.get_alerts())
|
||||
nf_status_f = asyncio.create_task(prometheus.get_nf_status())
|
||||
cluster_f = asyncio.create_task(cluster_inventory.get_cluster_inventory())
|
||||
|
||||
containers = await containers_f
|
||||
alerts, nf_statuses = await asyncio.gather(alerts_f, nf_status_f,
|
||||
alerts, nf_statuses, cluster = await asyncio.gather(alerts_f, nf_status_f, cluster_f,
|
||||
return_exceptions=True)
|
||||
if isinstance(alerts, Exception):
|
||||
alerts = []
|
||||
if isinstance(nf_statuses, Exception):
|
||||
nf_statuses = []
|
||||
if isinstance(cluster, Exception):
|
||||
cluster = {"enabled": False, "nodes": []}
|
||||
|
||||
# Read all container logs concurrently
|
||||
log_tasks = {nf: asyncio.create_task(_read_logs(cname))
|
||||
@@ -280,25 +283,29 @@ async def analyze_logs() -> dict:
|
||||
# 2. NF-down events from Prometheus
|
||||
for nf_st in nf_statuses:
|
||||
if isinstance(nf_st, dict) and nf_st.get("state") == "down":
|
||||
node_text = ", ".join(node["hostname"] for node in nf_st.get("nodes", []))
|
||||
issues.append({
|
||||
"id": f"nf-down-{nf_st['name']}",
|
||||
"category": "Connectivity",
|
||||
"nf": nf_st["name"],
|
||||
"node": node_text,
|
||||
"severity": "critical",
|
||||
"count": 1,
|
||||
"description": f"{nf_st['name']} is unreachable",
|
||||
"remediation": (f"Run `{CONTAINER_RUNTIME} ps` and check if {nf_st['name']} "
|
||||
f"container is running; inspect its logs."),
|
||||
"remediation": (f"Check {node_text or 'the hosting node'} first, then run "
|
||||
f"`{CONTAINER_RUNTIME} ps` and inspect `{nf_st['name'].lower()}` logs."),
|
||||
"source": "prometheus",
|
||||
})
|
||||
|
||||
# 3. Active Alertmanager alerts
|
||||
for alert in alerts:
|
||||
if isinstance(alert, dict):
|
||||
node_text = ", ".join(node["hostname"] for node in alert.get("nodes", []))
|
||||
issues.append({
|
||||
"id": f"alert-{alert.get('name', '')}-{len(issues)}",
|
||||
"category": _alert_category(alert),
|
||||
"nf": _alert_nf(alert),
|
||||
"node": node_text,
|
||||
"severity": alert.get("severity", "warning"),
|
||||
"count": 1,
|
||||
"description": alert.get("summary") or alert.get("name", "Unknown alert"),
|
||||
@@ -331,6 +338,7 @@ async def analyze_logs() -> dict:
|
||||
"categories": categories,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"log_sources": list(containers.keys()),
|
||||
"cluster": cluster,
|
||||
}
|
||||
|
||||
# Persist to history ring-buffer
|
||||
|
||||
78
app/services/pls.py
Normal file
78
app/services/pls.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""PLS API client for cluster and per-node discovery."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
|
||||
import httpx
|
||||
|
||||
from app.config import PLS_AUTH_BACKEND, PLS_BASE_URL, PLS_PASSWORD, PLS_USERNAME, PLS_VERIFY_TLS
|
||||
|
||||
_token: str | None = None
|
||||
|
||||
|
||||
def _base_url_for_host(host: str | None = None) -> str:
|
||||
if not host:
|
||||
return PLS_BASE_URL.rstrip("/")
|
||||
parts = urlsplit(PLS_BASE_URL)
|
||||
return urlunsplit((parts.scheme, host, parts.path.rstrip("/"), "", ""))
|
||||
|
||||
|
||||
async def _login() -> str | None:
|
||||
global _token
|
||||
if _token:
|
||||
return _token
|
||||
if not PLS_USERNAME or not PLS_PASSWORD:
|
||||
return None
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5, verify=PLS_VERIFY_TLS) as client:
|
||||
response = await client.post(
|
||||
f"{_base_url_for_host()}/auth/login",
|
||||
json={
|
||||
"username": PLS_USERNAME,
|
||||
"password": PLS_PASSWORD,
|
||||
"backend": PLS_AUTH_BACKEND,
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
_token = data.get("access_token")
|
||||
return _token
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
async def _get(path: str, host: str | None = None) -> dict | list | None:
|
||||
token = await _login()
|
||||
if not token:
|
||||
return None
|
||||
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
url = f"{_base_url_for_host(host)}/{path.lstrip('/')}"
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5, verify=PLS_VERIFY_TLS) as client:
|
||||
response = await client.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def node_host(node_name: str) -> str:
|
||||
return node_name.split("@", 1)[1] if "@" in node_name else node_name
|
||||
|
||||
|
||||
async def get_cluster_status() -> dict | None:
|
||||
data = await _get("data_layer/cluster/status")
|
||||
return data if isinstance(data, dict) else None
|
||||
|
||||
|
||||
async def get_system_info(host: str | None = None) -> dict | None:
|
||||
data = await _get("system/info", host=host)
|
||||
return data if isinstance(data, dict) else None
|
||||
|
||||
|
||||
async def get_services(host: str | None = None) -> list[dict]:
|
||||
data = await _get("services", host=host)
|
||||
return data if isinstance(data, list) else []
|
||||
@@ -14,12 +14,12 @@ async def query(promql: str) -> list:
|
||||
return r.json()["data"]["result"]
|
||||
|
||||
|
||||
async def get_nf_status() -> list:
|
||||
"""Return a list of {name, state, instance} for every known NF."""
|
||||
async def get_nf_status_map() -> dict[str, dict]:
|
||||
"""Return Prometheus-backed NF status keyed by display name."""
|
||||
try:
|
||||
results = await query("up")
|
||||
except Exception:
|
||||
return [{"name": n, "state": "unknown", "instance": ""} for n in ALL_NFS]
|
||||
return {n: {"name": n, "state": "unknown", "instance": ""} for n in ALL_NFS}
|
||||
|
||||
seen: dict[str, dict] = {}
|
||||
for r in results:
|
||||
@@ -38,4 +38,9 @@ async def get_nf_status() -> list:
|
||||
if n not in seen:
|
||||
seen[n] = {"name": n, "state": "unknown", "instance": ""}
|
||||
|
||||
return list(seen.values())
|
||||
return seen
|
||||
|
||||
|
||||
async def get_nf_status() -> list:
|
||||
"""Return a list of {name, state, instance} for every known NF."""
|
||||
return list((await get_nf_status_map()).values())
|
||||
|
||||
@@ -155,6 +155,12 @@ body {
|
||||
background: rgba(255,255,255,0.07); color: var(--text);
|
||||
width: fit-content; white-space: nowrap;
|
||||
}
|
||||
.issue-node {
|
||||
font-size: 10px; font-weight: 600; letter-spacing: 0.04em;
|
||||
padding: 2px 7px; border-radius: 5px; margin-top: 5px;
|
||||
background: rgba(59,130,246,0.12); color: var(--blue);
|
||||
width: fit-content; white-space: nowrap;
|
||||
}
|
||||
.issue-body {}
|
||||
.issue-desc { font-size: 13px; font-weight: 500; line-height: 1.4; }
|
||||
.issue-rem { font-size: 11px; color: var(--muted); margin-top: 3px; line-height: 1.4; }
|
||||
@@ -469,6 +475,7 @@ function renderDetail(cat) {
|
||||
<div class="issue-nf">${esc(iss.nf)}</div>
|
||||
<div class="issue-body">
|
||||
<div class="issue-desc">${esc(iss.description)}</div>
|
||||
${iss.node ? `<div class="issue-node">${esc(iss.node)}</div>` : ''}
|
||||
<div class="issue-rem">⤷ ${esc(iss.remediation||'')}</div>
|
||||
<span class="issue-source">${esc(iss.source||'log')}</span>
|
||||
</div>
|
||||
|
||||
@@ -60,8 +60,10 @@ header h1 span { color: var(--muted); font-weight: 400; }
|
||||
/* ── Left panel ─────────────────────────────────────────────────── */
|
||||
.left {
|
||||
background: var(--surface); border-right: 1px solid var(--border);
|
||||
display: flex; flex-direction: column; overflow: hidden;
|
||||
display: flex; flex-direction: column; overflow-y: auto; overflow-x: hidden;
|
||||
}
|
||||
.left::-webkit-scrollbar { width: 5px; }
|
||||
.left::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; }
|
||||
.section { padding: 14px 16px; border-bottom: 1px solid var(--border); }
|
||||
.section-title {
|
||||
font-size: 10px; font-weight: 700; text-transform: uppercase;
|
||||
@@ -88,6 +90,79 @@ header h1 span { color: var(--muted); font-weight: 400; }
|
||||
.nf-card.up .nf-state { color: var(--green); }
|
||||
.nf-card.down .nf-state { color: var(--red); }
|
||||
|
||||
/* Cluster nodes */
|
||||
.node-list { display: flex; flex-direction: column; gap: 8px; }
|
||||
.node-card {
|
||||
background: var(--card); border: 1px solid var(--border); border-radius: 10px;
|
||||
overflow: hidden;
|
||||
}
|
||||
.node-summary {
|
||||
display: flex; align-items: center; gap: 8px; padding: 10px 12px; cursor: pointer;
|
||||
}
|
||||
.node-summary:hover { background: rgba(255,255,255,.02); }
|
||||
.node-top { display: flex; align-items: center; gap: 8px; width: 100%; }
|
||||
.node-name { font-size: 13px; font-weight: 700; }
|
||||
.node-addr { font-size: 11px; color: var(--muted); margin-top: 3px; }
|
||||
.node-caret {
|
||||
margin-left: 8px; font-size: 11px; color: var(--muted); transition: transform .15s;
|
||||
}
|
||||
.node-card.open .node-caret { transform: rotate(180deg); }
|
||||
.node-role {
|
||||
margin-left: auto; font-size: 10px; font-weight: 700; letter-spacing: .08em;
|
||||
border-radius: 999px; padding: 3px 8px; border: 1px solid var(--border);
|
||||
color: var(--blue); background: rgba(59,130,246,.12);
|
||||
}
|
||||
.node-role.current {
|
||||
color: var(--green); border-color: rgba(16,185,129,.5); background: rgba(16,185,129,.12);
|
||||
}
|
||||
.node-meta {
|
||||
display: flex; flex-wrap: wrap; gap: 6px; margin-top: 8px;
|
||||
}
|
||||
.node-chip {
|
||||
font-size: 10px; color: var(--muted); padding: 2px 7px;
|
||||
border-radius: 999px; background: rgba(255,255,255,.04); border: 1px solid var(--border);
|
||||
}
|
||||
.node-services {
|
||||
margin-top: 8px; font-size: 11px; color: var(--text); line-height: 1.4;
|
||||
}
|
||||
.node-services b,
|
||||
.node-profile b {
|
||||
color: var(--muted); font-weight: 600;
|
||||
}
|
||||
.node-profile {
|
||||
margin-top: 6px; font-size: 11px; color: var(--text); line-height: 1.4;
|
||||
}
|
||||
.node-empty {
|
||||
color: var(--muted); font-size: 12px;
|
||||
}
|
||||
.node-details {
|
||||
display: none; padding: 0 12px 12px; border-top: 1px solid rgba(255,255,255,.04);
|
||||
}
|
||||
.node-card.open .node-details { display: block; }
|
||||
.node-nf-grid {
|
||||
display: grid; grid-template-columns: repeat(3, 1fr); gap: 6px; margin-top: 10px;
|
||||
}
|
||||
.node-nf {
|
||||
background: rgba(255,255,255,.03);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
padding: 7px 5px;
|
||||
border-left: 3px solid var(--border);
|
||||
text-align: center;
|
||||
}
|
||||
.node-nf.up { border-left-color: var(--green); }
|
||||
.node-nf.down { border-left-color: var(--red); }
|
||||
.node-nf.unknown { border-left-color: var(--yellow); }
|
||||
.node-nf-name {
|
||||
font-size: 10px; font-weight: 700; color: var(--text); letter-spacing: .04em;
|
||||
}
|
||||
.node-nf-state {
|
||||
margin-top: 3px; font-size: 9px; text-transform: uppercase; letter-spacing: .06em; color: var(--muted);
|
||||
}
|
||||
.node-nf.up .node-nf-state { color: var(--green); }
|
||||
.node-nf.down .node-nf-state { color: var(--red); }
|
||||
.node-nf.unknown .node-nf-state { color: var(--yellow); }
|
||||
|
||||
/* Alerts panel */
|
||||
.alerts-scroll { flex: 1; overflow-y: auto; padding: 14px 16px; }
|
||||
.alerts-scroll::-webkit-scrollbar { width: 4px; }
|
||||
@@ -101,6 +176,7 @@ header h1 span { color: var(--muted); font-weight: 400; }
|
||||
.alert-row.critical { border-left-color: var(--red); }
|
||||
.alert-row-name { font-size: 12px; font-weight: 600; }
|
||||
.alert-row-desc { font-size: 11px; color: var(--muted); margin-top: 2px; }
|
||||
.alert-row-node { font-size: 10px; color: var(--blue); margin-top: 5px; }
|
||||
|
||||
/* ── Chat panel ─────────────────────────────────────────────────── */
|
||||
.chat { display: flex; flex-direction: column; overflow: hidden; }
|
||||
@@ -185,13 +261,19 @@ header h1 span { color: var(--muted); font-weight: 400; }
|
||||
<div class="left">
|
||||
<div class="section">
|
||||
<div class="section-title">
|
||||
Network Functions
|
||||
Cluster Overview
|
||||
<button class="refresh-btn" onclick="refresh()" title="Refresh">↻</button>
|
||||
</div>
|
||||
<div class="nf-grid" id="nfGrid">
|
||||
<div class="nf-card"><div class="nf-name">···</div></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section">
|
||||
<div class="section-title">Discovered Nodes</div>
|
||||
<div class="node-list" id="nodeList">
|
||||
<div class="node-empty">Loading cluster inventory…</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="alerts-scroll">
|
||||
<div class="section-title" style="margin-bottom:10px">Active Alerts</div>
|
||||
<div id="alertsContent"><div style="color:var(--muted);font-size:12px">Loading…</div></div>
|
||||
@@ -221,6 +303,15 @@ header h1 span { color: var(--muted); font-weight: 400; }
|
||||
// ── Utilities ──────────────────────────────────────────────────────────────
|
||||
const $ = id => document.getElementById(id);
|
||||
const ts = () => new Date().toLocaleTimeString([],{hour:'2-digit',minute:'2-digit'});
|
||||
const ROLE_LABELS = {
|
||||
'5GALL': '5G All',
|
||||
'4GALL': '4G All',
|
||||
'4GCP': '4G CP',
|
||||
'4GDCP': '4G DCP',
|
||||
'COMBOALL': 'Combo All',
|
||||
'COMBOCP': 'Combo CP',
|
||||
'COMBODCP': 'Combo DCP',
|
||||
};
|
||||
|
||||
function md(text) {
|
||||
// minimal markdown: **bold**, `code`, newlines
|
||||
@@ -261,15 +352,70 @@ async function loadNFs() {
|
||||
<div class="nf-state">${nf.state==='up'?'● up':nf.state==='down'?'● dn':'○ n/a'}</div>`;
|
||||
grid.appendChild(c);
|
||||
});
|
||||
renderNodes(d.cluster);
|
||||
$('dot').className = 'dot';
|
||||
$('connLabel').textContent = 'Live';
|
||||
} catch {
|
||||
$('dot').className = 'dot err';
|
||||
$('connLabel').textContent = 'Unreachable';
|
||||
$('nfGrid').innerHTML = '<div style="color:var(--muted);font-size:12px;grid-column:1/-1">Cannot reach API</div>';
|
||||
$('nodeList').innerHTML = '<div class="node-empty">Cannot reach cluster discovery API</div>';
|
||||
}
|
||||
}
|
||||
|
||||
function toggleNodeCard(button) {
|
||||
button.closest('.node-card')?.classList.toggle('open');
|
||||
}
|
||||
|
||||
function renderNodes(cluster) {
|
||||
const list = $('nodeList');
|
||||
const nodes = cluster?.nodes || [];
|
||||
if (!nodes.length) {
|
||||
list.innerHTML = '<div class="node-empty">No cluster nodes discovered</div>';
|
||||
return;
|
||||
}
|
||||
|
||||
list.innerHTML = nodes.map(node => {
|
||||
const role = ROLE_LABELS[node.role] || node.role || 'AP';
|
||||
const repoChips = (node.repositories || []).slice(0, 3).map(repo =>
|
||||
`<span class="node-chip">${repo.type}:${repo.role}</span>`
|
||||
).join('');
|
||||
const running = (node.started_services || []).filter(name => !['alertmanager','prometheus','ncm','pls','fluent-bit','grafana','openvpn','ssh','node-exporter','podman-exporter','licensed','webconsole'].includes(name));
|
||||
const serviceText = running.length ? running.join(', ') : 'No managed NFs started';
|
||||
const expected = (node.expected_nfs || []).join(', ') || 'No NF profile mapped';
|
||||
const nfTiles = (node.nfs || []).map(nf => `
|
||||
<div class="node-nf ${nf.state}">
|
||||
<div class="node-nf-name">${nf.name}</div>
|
||||
<div class="node-nf-state">${nf.state === 'up' ? '● up' : nf.state === 'down' ? '● dn' : '○ n/a'}</div>
|
||||
</div>
|
||||
`).join('');
|
||||
const downCount = (node.nfs || []).filter(nf => nf.state === 'down').length;
|
||||
const openClass = node.current ? 'open' : '';
|
||||
return `
|
||||
<div class="node-card ${openClass}">
|
||||
<div class="node-summary" onclick="toggleNodeCard(this)">
|
||||
<div class="node-top">
|
||||
<div>
|
||||
<div class="node-name">${node.hostname}</div>
|
||||
<div class="node-addr">${node.address} · ${node.nfs.filter(nf => nf.state === 'up').length} up${downCount ? `, ${downCount} down` : ''}</div>
|
||||
</div>
|
||||
<div class="node-role ${node.current ? 'current' : ''}">${role}${node.current ? ' · local' : ''}</div>
|
||||
<div class="node-caret">▾</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="node-details">
|
||||
<div class="node-meta">
|
||||
${repoChips || '<span class="node-chip">No repo data</span>'}
|
||||
</div>
|
||||
<div class="node-services"><b>Running:</b> ${serviceText}</div>
|
||||
<div class="node-profile"><b>Profile:</b> ${expected}</div>
|
||||
<div class="node-nf-grid">${nfTiles || '<div class="node-empty">No node-scoped NF data</div>'}</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
}).join('');
|
||||
}
|
||||
|
||||
async function loadAlerts() {
|
||||
try {
|
||||
const d = await (await fetch('./api/alerts')).json();
|
||||
@@ -281,6 +427,7 @@ async function loadAlerts() {
|
||||
`<div class="alert-row ${a.severity||'warning'}">
|
||||
<div class="alert-row-name">${a.name}</div>
|
||||
<div class="alert-row-desc">${a.summary||a.instance||''}</div>
|
||||
<div class="alert-row-node">${(a.nodes||[]).length ? 'Node: ' + a.nodes.map(n => n.hostname).join(', ') : 'Node: unresolved'}</div>
|
||||
</div>`
|
||||
).join('');
|
||||
}
|
||||
|
||||
@@ -59,6 +59,22 @@ header h1 span { color: var(--muted); font-weight: 400; }
|
||||
}
|
||||
.main::-webkit-scrollbar { width: 5px; }
|
||||
.main::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; }
|
||||
.content-grid {
|
||||
display: grid;
|
||||
grid-template-columns: minmax(0, 1fr) 420px;
|
||||
gap: 24px;
|
||||
align-items: start;
|
||||
}
|
||||
.tasks-col {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 24px;
|
||||
min-width: 0;
|
||||
}
|
||||
.log-col {
|
||||
position: sticky;
|
||||
top: 0;
|
||||
}
|
||||
|
||||
/* ── Section headers ─────────────────────────────────────────────── */
|
||||
.section-title {
|
||||
@@ -171,6 +187,15 @@ header h1 span { color: var(--muted); font-weight: 400; }
|
||||
}
|
||||
.modal-confirm.danger { background: var(--red); }
|
||||
.modal-confirm.warning { background: var(--yellow); color: #000; }
|
||||
|
||||
@media (max-width: 1100px) {
|
||||
.content-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
.log-col {
|
||||
position: static;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
@@ -183,7 +208,8 @@ header h1 span { color: var(--muted); font-weight: 400; }
|
||||
</header>
|
||||
|
||||
<div class="main">
|
||||
|
||||
<div class="content-grid">
|
||||
<div class="tasks-col">
|
||||
<!-- Diagnostics -->
|
||||
<div>
|
||||
<div class="section-title">Diagnostics & Health</div>
|
||||
@@ -201,9 +227,11 @@ header h1 span { color: var(--muted); font-weight: 400; }
|
||||
<div class="section-title">Maintenance</div>
|
||||
<div class="action-grid" id="maintGrid"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="log-col">
|
||||
<!-- Run log -->
|
||||
<div class="log-panel">
|
||||
<div class="log-panel" id="logPanel">
|
||||
<div class="log-header">
|
||||
<div class="log-title">
|
||||
▸ Run Log
|
||||
@@ -218,7 +246,8 @@ header h1 span { color: var(--muted); font-weight: 400; }
|
||||
<div class="log-empty" id="logEmpty">No actions run yet.</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Confirm modal -->
|
||||
@@ -255,6 +284,17 @@ const ACTIONS = {
|
||||
],
|
||||
};
|
||||
|
||||
function nfNodeLabel(nf) {
|
||||
const nodes = nf?.nodes || [];
|
||||
return nodes.length ? nodes.map(n => n.hostname).join(', ') : 'unresolved node';
|
||||
}
|
||||
|
||||
async function fetchNetworkStatus() {
|
||||
const r = await fetch('/api/network/status');
|
||||
if (!r.ok) throw new Error('HTTP ' + r.status);
|
||||
return await r.json();
|
||||
}
|
||||
|
||||
// ── Render cards ──────────────────────────────────────────────────────────
|
||||
function renderGrid(gridId, items) {
|
||||
const g = document.getElementById(gridId);
|
||||
@@ -289,6 +329,7 @@ function handleAction(id) {
|
||||
const all = [...ACTIONS.diag, ...ACTIONS.ops, ...ACTIONS.maint];
|
||||
const a = all.find(x => x.id === id);
|
||||
if (!a) return;
|
||||
revealLogPanel(true);
|
||||
if (a.safe) { a.run(); return; }
|
||||
pendingAction = a;
|
||||
document.getElementById('modalTitle').textContent = a.name;
|
||||
@@ -306,6 +347,7 @@ function closeModal() {
|
||||
|
||||
function runConfirmed() {
|
||||
closeModal();
|
||||
revealLogPanel(true);
|
||||
if (pendingAction) { pendingAction.run(); pendingAction = null; }
|
||||
}
|
||||
|
||||
@@ -326,6 +368,17 @@ function addLog(msg, type='info') {
|
||||
renderLog();
|
||||
}
|
||||
|
||||
function revealLogPanel(forceExpand=false) {
|
||||
const panel = document.getElementById('logPanel');
|
||||
const el = document.getElementById('logBody');
|
||||
const btn = document.getElementById('expandBtn');
|
||||
if (forceExpand && !el.classList.contains('expanded')) {
|
||||
el.classList.add('expanded');
|
||||
btn.textContent = '⤡ Collapse';
|
||||
}
|
||||
panel.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
||||
}
|
||||
|
||||
function renderLog() {
|
||||
const el = document.getElementById('logBody');
|
||||
document.getElementById('logEmpty').style.display = logLines.length ? 'none' : '';
|
||||
@@ -368,17 +421,16 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
|
||||
// ── Action implementations ─────────────────────────────────────────────────
|
||||
async function pingNFs() {
|
||||
addLog('▸ Pinging all NFs via Prometheus endpoint…', 'run');
|
||||
addLog('▸ Checking all discovered NFs across cluster nodes…', 'run');
|
||||
try {
|
||||
const r = await fetch('/api/network/nf-status');
|
||||
const d = await r.json();
|
||||
const nfs = d.nf_status || [];
|
||||
const d = await fetchNetworkStatus();
|
||||
const nfs = d.nfs || [];
|
||||
const up = nfs.filter(n => n.state === 'up').length;
|
||||
const down = nfs.filter(n => n.state === 'down').length;
|
||||
nfs.forEach(n => addLog(` ${n.name}: ${n.state.toUpperCase()}`, n.state === 'up' ? 'ok' : 'err'));
|
||||
addLog(`✓ Ping complete — ${up} up, ${down} down`, down > 0 ? 'warn' : 'ok');
|
||||
nfs.forEach(n => addLog(` ${n.name}: ${n.state.toUpperCase()} on ${nfNodeLabel(n)}`, n.state === 'up' ? 'ok' : n.state === 'down' ? 'err' : 'warn'));
|
||||
addLog(`✓ Cluster check complete — ${up} up, ${down} down`, down > 0 ? 'warn' : 'ok');
|
||||
} catch(e) {
|
||||
addLog('✗ Failed to reach Prometheus: ' + e.message, 'err');
|
||||
addLog('✗ Failed to reach network status API: ' + e.message, 'err');
|
||||
}
|
||||
}
|
||||
|
||||
@@ -392,7 +444,7 @@ async function refreshAlerts() {
|
||||
addLog('✓ No active alerts — network is healthy', 'ok');
|
||||
} else {
|
||||
addLog(`⚠ ${alerts.length} active alert(s):`, 'warn');
|
||||
alerts.forEach(a => addLog(` [${(a.labels?.severity||'info').toUpperCase()}] ${a.labels?.alertname||'Unknown'}`, 'warn'));
|
||||
alerts.forEach(a => addLog(` [${(a.severity||'info').toUpperCase()}] ${a.name} on ${(a.nodes||[]).map(n => n.hostname).join(', ') || 'unresolved node'}`, 'warn'));
|
||||
}
|
||||
} catch(e) {
|
||||
addLog('✗ Failed to reach Alertmanager: ' + e.message, 'err');
|
||||
@@ -400,15 +452,18 @@ async function refreshAlerts() {
|
||||
}
|
||||
|
||||
async function nfReport() {
|
||||
addLog('▸ Generating full NF status report…', 'run');
|
||||
addLog('▸ Generating cluster-wide NF status report…', 'run');
|
||||
try {
|
||||
const r = await fetch('/api/network/nf-status');
|
||||
const d = await r.json();
|
||||
const nfs = d.nf_status || [];
|
||||
const d = await fetchNetworkStatus();
|
||||
const nfs = d.nfs || [];
|
||||
const up = nfs.filter(n => n.state === 'up').length;
|
||||
addLog(`✓ Report: ${up}/${nfs.length} NFs operational`, up === nfs.length ? 'ok' : 'warn');
|
||||
(d.cluster?.nodes || []).forEach(node => {
|
||||
const running = (node.nfs || []).filter(nf => nf.state === 'up').map(nf => nf.name);
|
||||
addLog(` ${node.hostname} (${node.role}): ${running.join(', ') || 'no active NFs'}`, 'info');
|
||||
});
|
||||
addLog(` Timestamp: ${new Date().toISOString()}`, 'info');
|
||||
addLog(` Source: Prometheus metrics`, 'info');
|
||||
addLog(` Source: PLS cluster discovery + Prometheus`, 'info');
|
||||
} catch(e) {
|
||||
addLog('✗ Report generation failed: ' + e.message, 'err');
|
||||
}
|
||||
@@ -453,16 +508,15 @@ async function emulatedSession() {
|
||||
}
|
||||
|
||||
async function checkDevices() {
|
||||
addLog('▸ Fetching connected device list…', 'run');
|
||||
addLog('▸ Checking cluster nodes for subscriber-serving NFs…', 'run');
|
||||
try {
|
||||
const r = await fetch('/api/network/nf-status');
|
||||
const d = await r.json();
|
||||
const nfs = d.nf_status || [];
|
||||
const d = await fetchNetworkStatus();
|
||||
const nfs = d.nfs || [];
|
||||
const amf = nfs.find(n => n.name === 'AMF');
|
||||
addLog(` AMF state: ${amf ? amf.state.toUpperCase() : 'UNKNOWN'}`, amf?.state === 'up' ? 'ok' : 'warn');
|
||||
addLog(` AMF state: ${amf ? amf.state.toUpperCase() : 'UNKNOWN'} on ${nfNodeLabel(amf)}`, amf?.state === 'up' ? 'ok' : 'warn');
|
||||
const upf = nfs.find(n => n.name === 'UPF');
|
||||
addLog(` UPF state: ${upf ? upf.state.toUpperCase() : 'UNKNOWN'}`, upf?.state === 'up' ? 'ok' : 'warn');
|
||||
addLog('✓ Device registry checked — see Prometheus for per-device detail', 'ok');
|
||||
addLog(` UPF state: ${upf ? upf.state.toUpperCase() : 'UNKNOWN'} on ${nfNodeLabel(upf)}`, upf?.state === 'up' ? 'ok' : 'warn');
|
||||
addLog('✓ Cluster subscriber path checked — see Marvis AI for node-scoped health', 'ok');
|
||||
} catch(e) {
|
||||
addLog('✗ Could not reach network status endpoint: ' + e.message, 'err');
|
||||
}
|
||||
@@ -486,10 +540,12 @@ function clearSessions() {
|
||||
}
|
||||
|
||||
function backupConfig() {
|
||||
addLog('▸ Exporting configuration for all NFs…', 'run');
|
||||
const nfs = ['AMF','SMF','UPF','NRF','AUSF','UDM','UDR','PCF','CHF','SMSF','AAA','MME'];
|
||||
nfs.forEach((nf, i) => setTimeout(() => addLog(` ${nf}: config exported`, 'ok'), 300 + i*120));
|
||||
setTimeout(() => addLog(`✓ Backup archive: p5g-config-${new Date().toISOString().slice(0,10)}.tar.gz`, 'ok'), 300 + nfs.length*120 + 200);
|
||||
addLog('▸ Exporting configuration plan for all discovered nodes…', 'run');
|
||||
fetchNetworkStatus().then(d => {
|
||||
const nodes = d.cluster?.nodes || [];
|
||||
nodes.forEach((node, i) => setTimeout(() => addLog(` ${node.hostname}: profile ${node.role}, services ${node.started_services.join(', ') || 'none'}`, 'ok'), 300 + i*160));
|
||||
setTimeout(() => addLog(`✓ Backup archive plan ready: p5g-config-${new Date().toISOString().slice(0,10)}.tar.gz`, 'ok'), 300 + nodes.length*160 + 200);
|
||||
}).catch(e => addLog('✗ Could not inspect cluster before backup: ' + e.message, 'err'));
|
||||
}
|
||||
|
||||
function reloadConfig() {
|
||||
|
||||
@@ -4,6 +4,11 @@
|
||||
MARVIS_PROMETHEUS_URL=http://127.0.0.1:9090
|
||||
MARVIS_PROMETHEUS_PREFIX=/prometheus
|
||||
MARVIS_ALERTMANAGER_URL=http://127.0.0.1:9093
|
||||
MARVIS_PLS_BASE_URL=https://127.0.0.1/core/pls/api/1
|
||||
MARVIS_PLS_USERNAME=
|
||||
MARVIS_PLS_PASSWORD=
|
||||
MARVIS_PLS_AUTH_BACKEND=local
|
||||
MARVIS_PLS_VERIFY_TLS=false
|
||||
|
||||
# AI backend configuration.
|
||||
MARVIS_AI_MODE=rule
|
||||
|
||||
@@ -11,6 +11,11 @@ TimeoutStartSec=0
|
||||
Environment=MARVIS_PROMETHEUS_URL=http://127.0.0.1:9090
|
||||
Environment=MARVIS_PROMETHEUS_PREFIX=/prometheus
|
||||
Environment=MARVIS_ALERTMANAGER_URL=http://127.0.0.1:9093
|
||||
Environment=MARVIS_PLS_BASE_URL=https://127.0.0.1/core/pls/api/1
|
||||
Environment=MARVIS_PLS_USERNAME=
|
||||
Environment=MARVIS_PLS_PASSWORD=
|
||||
Environment=MARVIS_PLS_AUTH_BACKEND=local
|
||||
Environment=MARVIS_PLS_VERIFY_TLS=false
|
||||
Environment=MARVIS_AI_MODE=rule
|
||||
Environment=MARVIS_OPENAI_API_KEY=
|
||||
Environment=MARVIS_OPENAI_BASE_URL=https://api.openai.com
|
||||
@@ -26,6 +31,11 @@ ExecStart=/usr/bin/docker run \
|
||||
--env MARVIS_PROMETHEUS_URL \
|
||||
--env MARVIS_PROMETHEUS_PREFIX \
|
||||
--env MARVIS_ALERTMANAGER_URL \
|
||||
--env MARVIS_PLS_BASE_URL \
|
||||
--env MARVIS_PLS_USERNAME \
|
||||
--env MARVIS_PLS_PASSWORD \
|
||||
--env MARVIS_PLS_AUTH_BACKEND \
|
||||
--env MARVIS_PLS_VERIFY_TLS \
|
||||
--env MARVIS_AI_MODE \
|
||||
--env MARVIS_OPENAI_API_KEY \
|
||||
--env MARVIS_OPENAI_BASE_URL \
|
||||
|
||||
Reference in New Issue
Block a user