From 60f6a0cbab6199dd365427540951f14ab6fc7f80 Mon Sep 17 00:00:00 2001 From: xiaoxing0135 <706015750@qq.com> Date: Sun, 17 May 2026 01:18:41 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20add=20Dashboard,=20History=20pages=20?= =?UTF-8?q?=E2=80=94=20AI=20quality=20metrics=20+=20run=20tracking=20+=20c?= =?UTF-8?q?ollaboration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - DashboardPage: KPIs (pass rate, confidence, test cases) + recent runs + top failures - HistoryPage: searchable run list, select 2 to export/compare bundle - API: GET /history (scan workspace for past runs) - API: GET /dashboard (aggregate quality metrics across all runs) - Collaboration: export run bundle as portable JSON (import into another instance) - Navigation: Dashboard + History tabs added to header --- runtime/api/main.py | 95 +++++++++++++++++ runtime/web/src/App.tsx | 14 ++- runtime/web/src/main.tsx | 4 + runtime/web/src/pages/DashboardPage.tsx | 116 +++++++++++++++++++++ runtime/web/src/pages/HistoryPage.tsx | 131 ++++++++++++++++++++++++ 5 files changed, 359 insertions(+), 1 deletion(-) create mode 100644 runtime/web/src/pages/DashboardPage.tsx create mode 100644 runtime/web/src/pages/HistoryPage.tsx diff --git a/runtime/api/main.py b/runtime/api/main.py index 1783969..8401c58 100644 --- a/runtime/api/main.py +++ b/runtime/api/main.py @@ -148,6 +148,101 @@ def submit_feedback(payload: dict) -> dict: return {"status": "ok", "saved_to": str(fname)} +@app.get("/history") +def list_history() -> dict: + """List past test runs from workspace.""" + import json as _json + + ws = get_settings().workspace_dir + runs: list[dict] = [] + + # Scan workspace/_demo and workspace/执行日志 for run outputs + for scan_dir in [ws / "_demo", ws / "执行日志"]: + if not scan_dir.exists(): + continue + for f in sorted(scan_dir.rglob("*.json"), reverse=True): + try: + data = _json.loads(f.read_text(encoding="utf-8")) + if isinstance(data, dict) and "run_id" in data: + runs.append({ + "run_id": data.get("run_id", f.stem), + "target": data.get("target", data.get("target_type", f.stem)), + "date": data.get("date", data.get("timestamp", "")), + "total": data.get("total", 0), + "passed": data.get("succeeded", data.get("passed", 0)), + "failed": data.get("failed", 0), + "duration_s": data.get("duration_s", data.get("duration_ms", 0) / 1000 if "duration_ms" in data else 0), + "confidence": data.get("confidence", 0), + }) + except Exception: + continue + + return {"runs": runs[:50]} + + +@app.get("/dashboard") +def get_dashboard() -> dict: + """Aggregate quality metrics from all runs.""" + import json as _json + + ws = get_settings().workspace_dir + all_runs: list[dict] = [] + expert_fails: dict[str, int] = {} + + for scan_dir in [ws / "_demo", ws / "执行日志"]: + if not scan_dir.exists(): + continue + for f in scan_dir.rglob("*.json"): + try: + data = _json.loads(f.read_text(encoding="utf-8")) + if isinstance(data, dict) and "total" in data: + all_runs.append(data) + if "results" in data and isinstance(data["results"], dict): + for node_id, r in data["results"].items(): + if not r.get("ok") and r.get("name"): + name = r["name"] + expert_fails[name] = expert_fails.get(name, 0) + 1 + except Exception: + continue + + total = len(all_runs) + if total == 0: + return { + "total_runs": 0, "avg_pass_rate": 0, "avg_confidence": 0, + "total_test_cases": 0, "recent_runs": [], "top_failures": [], + } + + pass_rates = [(r.get("succeeded", r.get("passed", 0)) / max(r.get("total", 1), 1)) for r in all_runs] + confidences = [r.get("confidence", 0) for r in all_runs if isinstance(r.get("confidence"), (int, float))] + total_cases = sum(r.get("total", 0) for r in all_runs) + + top = sorted(expert_fails.items(), key=lambda x: -x[1])[:10] + + recent = sorted(all_runs, key=lambda r: str(r.get("date", r.get("timestamp", ""))), reverse=True)[:10] + recent_summaries = [ + { + "run_id": r.get("run_id", ""), + "target": r.get("target", r.get("target_type", "")), + "date": str(r.get("date", r.get("timestamp", ""))), + "total": r.get("total", 0), + "passed": r.get("succeeded", r.get("passed", 0)), + "failed": r.get("failed", 0), + "confidence": r.get("confidence", 0), + "duration_s": r.get("duration_s", 0), + } + for r in recent + ] + + return { + "total_runs": total, + "avg_pass_rate": sum(pass_rates) / total, + "avg_confidence": sum(confidences) / len(confidences) if confidences else 0, + "total_test_cases": total_cases, + "recent_runs": recent_summaries, + "top_failures": [{"expert": name, "fail_count": cnt} for name, cnt in top], + } + + def _run_in_background(run_id: str, decision) -> None: try: summary = _kernel.execute_sync(run_id, decision) diff --git a/runtime/web/src/App.tsx b/runtime/web/src/App.tsx index 650797d..baf196e 100644 --- a/runtime/web/src/App.tsx +++ b/runtime/web/src/App.tsx @@ -1,5 +1,5 @@ import { Outlet, NavLink } from "react-router-dom"; -import { Beaker, Upload, BookOpen, Settings, Stethoscope, MessageSquare } from "lucide-react"; +import { Beaker, Upload, BookOpen, Settings, Stethoscope, MessageSquare, BarChart3, Clock } from "lucide-react"; export default function App() { return ( @@ -25,6 +25,18 @@ export default function App() { Catalog +
  • + (isActive ? "font-semibold" : "")}> + +
  • +
  • + (isActive ? "font-semibold" : "")}> + +
  • (isActive ? "font-semibold" : "")}>