Files
p5g-marvis/app/services/cluster_inventory.py
2026-04-24 14:15:58 -04:00

192 lines
7.2 KiB
Python

"""Cluster discovery built on top of the PLS API."""
from __future__ import annotations
import asyncio
import re
from app.config import ALL_NFS
from app.services import pls, prometheus
_last_inventory: dict | None = None
ROLE_NF_MAP = {
"5GALL": {"amf", "smf", "pcf", "udr", "udm", "nrf", "eir", "ausf", "dra", "upf", "chf", "smsf", "aaa", "bmsc"},
"CP": {"amf", "smf", "pcf", "udr", "udm", "nrf", "eir", "ausf", "dra", "chf", "smsf", "aaa", "bmsc"},
"UP": {"upf"},
"DCP": {"amf", "smf", "pcf", "chf", "smsf", "bmsc"},
"DLF": {"udr", "udm", "nrf", "eir", "ausf", "aaa"},
"SIG": {"dra"},
"4GALL": {"mme", "sgwc", "smf", "pcf", "chf", "udr", "udm", "nrf", "eir", "ausf", "dra", "upf", "smsf", "aaa", "bmsc"},
"4GCP": {"mme", "sgwc", "smf", "pcf", "chf", "udr", "udm", "nrf", "eir", "ausf", "dra", "smsf", "aaa", "bmsc"},
"4GDCP": {"mme", "sgwc", "smf", "pcf", "chf", "smsf", "bmsc"},
"COMBOALL": {"amf", "mme", "sgwc", "smf", "pcf", "chf", "udr", "udm", "nrf", "eir", "ausf", "dra", "upf", "smsf", "aaa", "bmsc"},
"COMBOCP": {"amf", "mme", "sgwc", "smf", "pcf", "chf", "udr", "udm", "nrf", "eir", "ausf", "dra", "smsf", "aaa", "bmsc"},
"COMBODCP": {"amf", "mme", "sgwc", "smf", "pcf", "chf", "aaa"},
}
ROLE_ALIASES = {
"UPF": "UP",
}
ROLE_PRIORITY = ["COMBOALL", "COMBOCP", "COMBODCP", "5GALL", "4GALL", "4GCP", "4GDCP", "DCP", "DLF", "SIG", "CP", "UP"]
def _infer_role(hostname: str) -> str:
tokens = [t for t in re.split(r"[^A-Za-z0-9]+", hostname.upper()) if t]
normalized = [ROLE_ALIASES.get(token, token) for token in tokens]
for role in ROLE_PRIORITY:
if role in normalized:
return role
for token in normalized:
if token.endswith("UPF"):
return "UP"
return "AP"
async def get_cluster_inventory() -> dict:
global _last_inventory
cluster = await pls.get_cluster_status()
if not cluster:
return _last_inventory or {
"enabled": False,
"current_node": None,
"fully_established": False,
"nodes": [],
}
node_names = [node.get("name", "") for node in cluster.get("nodes", [])]
info_tasks = [asyncio.create_task(pls.get_system_info(pls.node_host(name))) for name in node_names]
service_tasks = [asyncio.create_task(pls.get_services(pls.node_host(name))) for name in node_names]
infos = await asyncio.gather(*info_tasks, return_exceptions=True)
services = await asyncio.gather(*service_tasks, return_exceptions=True)
nodes: list[dict] = []
for idx, node in enumerate(cluster.get("nodes", [])):
info = infos[idx] if isinstance(infos[idx], dict) else {}
node_services = services[idx] if isinstance(services[idx], list) else []
started = {svc["name"] for svc in node_services if svc.get("state") == "started"}
hostname = info.get("hostname") or pls.node_host(node.get("name", ""))
role = _infer_role(hostname)
nodes.append(
{
"name": node.get("name", ""),
"address": pls.node_host(node.get("name", "")),
"hostname": hostname,
"current": node.get("name") == cluster.get("current_node"),
"repositories": node.get("repositories", []),
"role": role,
"roles": [role],
"expected_nfs": sorted(ROLE_NF_MAP.get(role, set())),
"services": node_services,
"started_services": sorted(started),
}
)
inventory = {
"enabled": True,
"current_node": cluster.get("current_node"),
"fully_established": bool(cluster.get("fully_established")),
"nodes": nodes,
}
_last_inventory = inventory
return inventory
def _aggregate_nf_state(nf_name: str, nodes: list[dict], prom_states: dict[str, dict]) -> dict:
service_name = nf_name.lower()
placements = []
seen_service = False
for node in nodes:
for service in node.get("services", []):
if service.get("name") != service_name:
continue
seen_service = True
if service.get("state") == "started":
placements.append(
{
"hostname": node["hostname"],
"address": node["address"],
"roles": node["roles"],
}
)
prom_state = prom_states.get(nf_name, {"state": "unknown", "instance": ""})
if placements:
state = prom_state["state"] if prom_state["state"] in {"up", "down"} else "up"
instance = ", ".join(p["hostname"] for p in placements)
elif seen_service:
state = "down"
instance = ""
else:
state = prom_state["state"]
instance = prom_state["instance"]
return {
"name": nf_name,
"state": state,
"instance": instance,
"nodes": placements,
}
def _node_nf_state(node: dict, nf_name: str) -> dict:
service_name = nf_name.lower()
service = next((svc for svc in node.get("services", []) if svc.get("name") == service_name), None)
if not service:
return {"name": nf_name, "state": "unknown"}
if service.get("state") == "started":
return {"name": nf_name, "state": "up"}
return {"name": nf_name, "state": "down"}
def _attach_node_nf_status(nodes: list[dict]) -> list[dict]:
enriched = []
for node in nodes:
node_copy = dict(node)
expected_nfs = {nf.upper() for nf in node_copy.get("expected_nfs", [])}
started_nf_services = {
svc.get("name", "").upper()
for svc in node_copy.get("services", [])
if svc.get("type") == "nf" and svc.get("name")
}
visible_nfs = sorted(expected_nfs | started_nf_services)
node_copy["nfs"] = [_node_nf_state(node_copy, nf_name.upper()) for nf_name in visible_nfs]
enriched.append(node_copy)
return enriched
async def get_network_status() -> dict:
inventory_task = asyncio.create_task(get_cluster_inventory())
prom_task = asyncio.create_task(prometheus.get_nf_status_map())
inventory, prom_states = await asyncio.gather(inventory_task, prom_task)
nodes = _attach_node_nf_status(inventory.get("nodes", []))
inventory["nodes"] = nodes
nfs = [_aggregate_nf_state(nf_name, nodes, prom_states) for nf_name in ALL_NFS]
up = sum(1 for nf in nfs if nf["state"] == "up")
down = sum(1 for nf in nfs if nf["state"] == "down")
return {
"nfs": nfs,
"summary": {"up": up, "down": down, "total": len(nfs)},
"cluster": inventory,
}
def find_nf_nodes(cluster: dict, nf_name: str) -> list[dict]:
nodes = cluster.get("nodes", [])
matches = []
for node in nodes:
for nf in node.get("nfs", []):
if nf.get("name") == nf_name:
matches.append(
{
"hostname": node["hostname"],
"address": node["address"],
"role": node.get("role", "AP"),
"current": node.get("current", False),
"state": nf.get("state", "unknown"),
}
)
break
return matches