started log ingestion and analysis
This commit is contained in:
@@ -6,6 +6,7 @@ Phase 2: swap MARVIS_AI_MODE=openai or MARVIS_AI_MODE=ollama to route through LL
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
import re
|
||||
from app.config import (
|
||||
AI_MODE,
|
||||
CONTAINER_RUNTIME,
|
||||
@@ -17,23 +18,25 @@ from app.config import (
|
||||
)
|
||||
|
||||
|
||||
async def answer(query: str, network_state: dict, alerts: list) -> str:
|
||||
async def answer(query: str, network_state: dict, alerts: list, logs: list[dict] | None = None) -> str:
|
||||
if AI_MODE == "openai":
|
||||
return await _call_openai(query, network_state, alerts)
|
||||
return await _call_openai(query, network_state, alerts, logs or [])
|
||||
if AI_MODE == "ollama":
|
||||
return await _call_ollama(query, network_state, alerts)
|
||||
return _rule_based(query, network_state, alerts)
|
||||
return await _call_ollama(query, network_state, alerts, logs or [])
|
||||
return _rule_based(query, network_state, alerts, logs or [])
|
||||
|
||||
|
||||
# ── Rule-based engine ──────────────────────────────────────────────────────
|
||||
|
||||
def _rule_based(query: str, network_state: dict, alerts: list) -> str:
|
||||
def _rule_based(query: str, network_state: dict, alerts: list, logs: list[dict]) -> str:
|
||||
q = query.lower()
|
||||
nfs = network_state.get("nfs", [])
|
||||
cluster = network_state.get("cluster", {})
|
||||
up = [n for n in nfs if n["state"] == "up"]
|
||||
down = [n for n in nfs if n["state"] == "down"]
|
||||
|
||||
log_hits = _find_log_hits(q, logs)
|
||||
|
||||
if any(w in q for w in ["hello", "hi ", "hey", "howdy"]):
|
||||
return ("Hello! I'm **P5G Marvis**, your AI network assistant for HPE Private 5G.\n"
|
||||
"Ask me about network health, specific functions, alerts, or performance.")
|
||||
@@ -53,22 +56,25 @@ def _rule_based(query: str, network_state: dict, alerts: list) -> str:
|
||||
from app.config import ALL_NFS
|
||||
for nf_name in ALL_NFS:
|
||||
if nf_name.lower() in q:
|
||||
return _nf_detail(nf_name, nfs, alerts)
|
||||
return _nf_detail(nf_name, nfs, alerts, log_hits)
|
||||
|
||||
if any(w in q for w in ["alert", "alarm", "warning", "critical", "incident", "problem", "issue"]):
|
||||
return _alerts_summary(alerts)
|
||||
|
||||
if any(w in q for w in ["log", "trace", "journal", "message", "error"]):
|
||||
return _log_summary(log_hits, logs)
|
||||
|
||||
if any(w in q for w in ["subscriber", "ue ", "device", "phone", "handset", "registration", "attach"]):
|
||||
return _subscriber_analysis(nfs, alerts, cluster)
|
||||
return _subscriber_analysis(nfs, alerts, cluster, log_hits)
|
||||
|
||||
if any(w in q for w in ["session", "pdu", "bearer", "user plane", "traffic", "throughput"]):
|
||||
return _session_analysis(nfs, alerts, cluster)
|
||||
return _session_analysis(nfs, alerts, cluster, log_hits)
|
||||
|
||||
# Default → health summary
|
||||
return _health_summary(up, down, alerts, cluster)
|
||||
return _health_summary(up, down, alerts, cluster, log_hits)
|
||||
|
||||
|
||||
def _health_summary(up: list, down: list, alerts: list, cluster: dict) -> str:
|
||||
def _health_summary(up: list, down: list, alerts: list, cluster: dict, log_hits: list[dict]) -> str:
|
||||
ts = datetime.now().strftime("%H:%M:%S")
|
||||
crit = [a for a in alerts if a.get("severity") == "critical"]
|
||||
warn = [a for a in alerts if a.get("severity") != "critical"]
|
||||
@@ -104,13 +110,21 @@ def _health_summary(up: list, down: list, alerts: list, cluster: dict) -> str:
|
||||
|
||||
if not down and not alerts:
|
||||
lines.append("\n🟢 All systems nominal.")
|
||||
if log_hits:
|
||||
lines.append(f"\n🧾 **Relevant log hits ({len(log_hits)})**")
|
||||
for hit in log_hits[:4]:
|
||||
lines.append(
|
||||
f" • {hit.get('timestamp','')} — {hit.get('node','unknown')} {hit.get('nf','SYSTEM')}: "
|
||||
f"{_trim_message(hit.get('message',''))}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _nf_detail(nf_name: str, nfs: list, alerts: list) -> str:
|
||||
def _nf_detail(nf_name: str, nfs: list, alerts: list, log_hits: list[dict]) -> str:
|
||||
nf = next((n for n in nfs if n["name"] == nf_name), None)
|
||||
nf_alerts = [a for a in alerts
|
||||
if nf_name in a.get("name", "") or nf_name.lower() in a.get("instance", "").lower()]
|
||||
nf_logs = [hit for hit in log_hits if hit.get("nf") == nf_name]
|
||||
|
||||
if not nf or nf["state"] == "unknown":
|
||||
return (f"ℹ️ No Prometheus data found for **{nf_name}**.\n"
|
||||
@@ -132,6 +146,13 @@ def _nf_detail(nf_name: str, nfs: list, alerts: list) -> str:
|
||||
lines.append(f" → {a['name']}: {a.get('summary', '')}")
|
||||
else:
|
||||
lines.append("No active alerts for this function.")
|
||||
if nf_logs:
|
||||
lines.append(f"\n🧾 Recent {nf_name} log evidence:")
|
||||
for hit in nf_logs[:4]:
|
||||
lines.append(
|
||||
f" • {hit.get('timestamp','')} on {hit.get('node','unknown')}: "
|
||||
f"{_trim_message(hit.get('message',''))}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
@@ -151,7 +172,7 @@ def _alerts_summary(alerts: list) -> str:
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _subscriber_analysis(nfs: list, alerts: list, cluster: dict) -> str:
|
||||
def _subscriber_analysis(nfs: list, alerts: list, cluster: dict, log_hits: list[dict]) -> str:
|
||||
amf = next((n for n in nfs if n["name"] == "AMF"), None)
|
||||
smf = next((n for n in nfs if n["name"] == "SMF"), None)
|
||||
lines = ["**Subscriber & Registration Analysis**\n"]
|
||||
@@ -163,11 +184,18 @@ def _subscriber_analysis(nfs: list, alerts: list, cluster: dict) -> str:
|
||||
lines.append(f"\n⚠️ {len(sub_alerts)} subscriber-related alert(s) active.")
|
||||
else:
|
||||
lines.append("\nNo subscriber-related alerts detected.")
|
||||
sub_logs = [hit for hit in log_hits if any(key in hit.get("message", "").lower() for key in ["imsi", "supi", "registration", "attach", "subscriber"])]
|
||||
if sub_logs:
|
||||
lines.append("\nRecent subscriber-related log evidence:")
|
||||
for hit in sub_logs[:4]:
|
||||
lines.append(
|
||||
f"• {hit.get('nf','SYSTEM')} on {hit.get('node','unknown')}: {_trim_message(hit.get('message',''))}"
|
||||
)
|
||||
lines.append(_cluster_scope(cluster))
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _session_analysis(nfs: list, alerts: list, cluster: dict) -> str:
|
||||
def _session_analysis(nfs: list, alerts: list, cluster: dict, log_hits: list[dict]) -> str:
|
||||
smf = next((n for n in nfs if n["name"] == "SMF"), None)
|
||||
upf = next((n for n in nfs if n["name"] == "UPF"), None)
|
||||
lines = ["**PDU Session & Data Plane Analysis**\n"]
|
||||
@@ -177,10 +205,38 @@ def _session_analysis(nfs: list, alerts: list, cluster: dict) -> str:
|
||||
lines.append("\n⚡ **Impact**: PDU sessions will fail until both SMF and UPF are operational.")
|
||||
else:
|
||||
lines.append("\nBoth SMF and UPF operational — sessions should be establishing normally.")
|
||||
session_logs = [hit for hit in log_hits if hit.get("nf") in {"SMF", "UPF"}]
|
||||
if session_logs:
|
||||
lines.append("\nRecent session/data-plane log evidence:")
|
||||
for hit in session_logs[:4]:
|
||||
lines.append(
|
||||
f"• {hit.get('nf','SYSTEM')} on {hit.get('node','unknown')}: {_trim_message(hit.get('message',''))}"
|
||||
)
|
||||
lines.append(_cluster_scope(cluster))
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _log_summary(log_hits: list[dict], logs: list[dict]) -> str:
|
||||
if not logs:
|
||||
return "ℹ️ No ingested logs are currently available."
|
||||
if not log_hits:
|
||||
latest = max(logs, key=lambda event: event.get("epoch", 0.0), default=None)
|
||||
if latest:
|
||||
return (
|
||||
"ℹ️ I do not see direct log matches for that question.\n\n"
|
||||
f"Latest ingested log: {latest.get('timestamp','')} on {latest.get('node','unknown')} "
|
||||
f"{latest.get('nf','SYSTEM')} — {_trim_message(latest.get('message',''))}"
|
||||
)
|
||||
return "ℹ️ No relevant log matches were found."
|
||||
lines = [f"🧾 **Relevant log matches ({len(log_hits)})**\n"]
|
||||
for hit in log_hits[:8]:
|
||||
lines.append(
|
||||
f"• {hit.get('timestamp','')} — {hit.get('node','unknown')} {hit.get('nf','SYSTEM')}: "
|
||||
f"{_trim_message(hit.get('message',''))}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _nf_label(nf: dict) -> str:
|
||||
placements = nf.get("nodes", [])
|
||||
if not placements:
|
||||
@@ -207,24 +263,30 @@ def _cluster_scope(cluster: dict) -> str:
|
||||
|
||||
# ── LLM backends ──────────────────────────────────────────────────────────
|
||||
|
||||
def _build_context(network_state: dict, alerts: list) -> str:
|
||||
def _build_context(network_state: dict, alerts: list, logs: list[dict]) -> str:
|
||||
nfs = network_state.get("nfs", [])
|
||||
up = [n["name"] for n in nfs if n["state"] == "up"]
|
||||
down = [n["name"] for n in nfs if n["state"] == "down"]
|
||||
nodes = network_state.get("cluster", {}).get("nodes", [])
|
||||
node_summary = ", ".join(f"{node['hostname']} ({node.get('role', 'AP')})" for node in nodes) or "none"
|
||||
recent_logs = logs[-10:] if logs else []
|
||||
log_summary = "; ".join(
|
||||
f"{entry.get('timestamp','')} {entry.get('node','unknown')} {entry.get('nf','SYSTEM')}: {_trim_message(entry.get('message',''), 120)}"
|
||||
for entry in recent_logs
|
||||
) or "none"
|
||||
return (
|
||||
f"NFs UP: {', '.join(up) or 'none'}\n"
|
||||
f"NFs DOWN: {', '.join(down) or 'none'}\n"
|
||||
f"Cluster nodes: {node_summary}\n"
|
||||
f"Active alerts: {', '.join(a.get('name','') for a in alerts[:5]) or 'none'}"
|
||||
f"Active alerts: {', '.join(a.get('name','') for a in alerts[:5]) or 'none'}\n"
|
||||
f"Recent logs: {log_summary}"
|
||||
)
|
||||
|
||||
|
||||
async def _call_openai(query: str, network_state: dict, alerts: list) -> str:
|
||||
async def _call_openai(query: str, network_state: dict, alerts: list, logs: list[dict]) -> str:
|
||||
try:
|
||||
import httpx
|
||||
ctx = _build_context(network_state, alerts)
|
||||
ctx = _build_context(network_state, alerts, logs)
|
||||
messages = [
|
||||
{"role": "system", "content":
|
||||
f"You are P5G Marvis, an AI network assistant for HPE Private 5G.\n"
|
||||
@@ -247,13 +309,13 @@ async def _call_openai(query: str, network_state: dict, alerts: list) -> str:
|
||||
# some reasoning models put the answer in content, others in reasoning_content
|
||||
return msg.get("content") or msg.get("reasoning_content") or "(empty response)"
|
||||
except Exception as e:
|
||||
return f"LLM error: {e}\n\n" + _rule_based(query, network_state, alerts)
|
||||
return f"LLM error: {e}\n\n" + _rule_based(query, network_state, alerts, logs)
|
||||
|
||||
|
||||
async def _call_ollama(query: str, network_state: dict, alerts: list) -> str:
|
||||
async def _call_ollama(query: str, network_state: dict, alerts: list, logs: list[dict]) -> str:
|
||||
try:
|
||||
import httpx
|
||||
ctx = _build_context(network_state, alerts)
|
||||
ctx = _build_context(network_state, alerts, logs)
|
||||
prompt = (f"You are P5G Marvis, an AI network assistant.\n"
|
||||
f"Network state:\n{ctx}\n\nUser: {query}\nAssistant:")
|
||||
async with httpx.AsyncClient(timeout=60) as client:
|
||||
@@ -263,4 +325,34 @@ async def _call_ollama(query: str, network_state: dict, alerts: list) -> str:
|
||||
)
|
||||
return resp.json().get("response", "No response.")
|
||||
except Exception as e:
|
||||
return f"Ollama error: {e}\n\n" + _rule_based(query, network_state, alerts)
|
||||
return f"Ollama error: {e}\n\n" + _rule_based(query, network_state, alerts, logs)
|
||||
|
||||
|
||||
def _find_log_hits(query: str, logs: list[dict]) -> list[dict]:
|
||||
terms = [term for term in re.findall(r"[a-z0-9_-]+", query.lower()) if len(term) >= 3]
|
||||
if not logs or not terms:
|
||||
return []
|
||||
hits = []
|
||||
for event in logs:
|
||||
haystack = " ".join(
|
||||
[
|
||||
str(event.get("nf", "")).lower(),
|
||||
str(event.get("node", "")).lower(),
|
||||
str(event.get("source", "")).lower(),
|
||||
str(event.get("message", "")).lower(),
|
||||
]
|
||||
)
|
||||
score = sum(1 for term in terms if term in haystack)
|
||||
if score:
|
||||
event_copy = dict(event)
|
||||
event_copy["_score"] = score
|
||||
hits.append(event_copy)
|
||||
hits.sort(key=lambda event: (event.get("_score", 0), event.get("epoch", 0.0)), reverse=True)
|
||||
return hits
|
||||
|
||||
|
||||
def _trim_message(message: str, limit: int = 160) -> str:
|
||||
message = " ".join(str(message).split())
|
||||
if len(message) <= limit:
|
||||
return message
|
||||
return message[: limit - 3] + "..."
|
||||
|
||||
Reference in New Issue
Block a user