From 35f94d3a91349d6e17ed13929db9d9a5cd738014 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Mon, 11 May 2026 06:17:48 -0400 Subject: [PATCH] feat(dashboard): storyboard count as canonical compliance headline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an "X / Y storyboards passing" element between the SDK headline ("2 silent" etc.) and the track pills, with a tooltip explaining the relationship: storyboards = canonical conformance unit (each applicable specialism + protocol baseline + universal check is one storyboard, pass or fail) track pills = SDK's coarse roll-up that can read as "passing" even when underlying storyboards are partial — useful for quick glance but misleading in isolation Track pills gain their own tooltip pointing readers at the Verification panel for per-storyboard detail. Resolves the Evgeny-shape disconnect from escalation #329: track summary showed "2 silent / 30 of 30 scenarios passing" while the agent's signal_owned specialism storyboard was 1/5 steps. With the data flowing correctly after PR #4364, this surface change closes the loop on the adtech-product reviewer's "deprecate track summary on the public dashboard, keep it operator-only" call by making the storyboard count visually prominent and clarifying that the SDK track pills are debug context. Push A item 4 of 4 in the compliance reporting fidelity initiative. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../dashboard-storyboard-summary-headline.md | 4 ++ server/public/dashboard-agents.html | 45 ++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 .changeset/dashboard-storyboard-summary-headline.md diff --git a/.changeset/dashboard-storyboard-summary-headline.md b/.changeset/dashboard-storyboard-summary-headline.md new file mode 100644 index 0000000000..12dfb12b1c --- /dev/null +++ b/.changeset/dashboard-storyboard-summary-headline.md @@ -0,0 +1,4 @@ +--- +--- + +Adds a "X / Y storyboards passing" headline element on the dashboard agent card, with a tooltip explaining that storyboard counts are the canonical compliance unit (each applicable specialism, protocol baseline, and universal check is one storyboard) and that the track pills below are the SDK's coarse roll-up. Resolves the Evgeny-shape disconnect surfaced by escalation #329: track summary showed "30/30 passing" (correctly, per the SDK's silent-track semantics) while the underlying storyboards were partial — the dashboard had no surface to communicate which number to trust. The track pills also gain a tooltip pointing readers at the Verification panel for the per-storyboard view. Follows from the adtech-product review feedback on PR #4364. diff --git a/server/public/dashboard-agents.html b/server/public/dashboard-agents.html index 75dc9df971..b8785968d3 100644 --- a/server/public/dashboard-agents.html +++ b/server/public/dashboard-agents.html @@ -121,6 +121,29 @@ margin-top: var(--space-2); } + .agent-storyboard-summary { + margin-top: var(--space-2); + display: inline-flex; + align-items: baseline; + gap: var(--space-1); + padding: var(--space-1) var(--space-2); + background: var(--color-bg-subtle); + border-radius: var(--radius-sm); + cursor: help; + } + + .agent-storyboard-summary-count { + font-size: var(--text-sm); + font-weight: var(--font-semibold); + color: var(--color-text); + font-variant-numeric: tabular-nums; + } + + .agent-storyboard-summary-label { + font-size: var(--text-xs); + color: var(--color-text-secondary); + } + .agent-tracks { display: flex; gap: 3px; @@ -1488,6 +1511,25 @@

Agents

return ''; }).join(''); + // Storyboards passing is the canonical compliance metric: each + // applicable storyboard (universal + protocol + declared specialism) + // either passes or doesn't. The track pills above are the SDK's + // coarse roll-up (a track "passes" when its storyboards execute + // without warnings) — useful as a quick-glance summary but + // misleading in isolation: agents that pass 0/N storyboards + // can still show "silent" track status if the SDK had nothing + // to flag (the literal absence of negative observations is + // counted as a clean track). When both views exist, the + // storyboard count is the one to trust. + const sbPassing = Number.isFinite(cs.storyboards_passing) ? cs.storyboards_passing : null; + const sbTotal = Number.isFinite(cs.storyboards_total) ? cs.storyboards_total : null; + const storyboardSummary = (sbPassing !== null && sbTotal !== null && sbTotal > 0) + ? `
` + + `${sbPassing} / ${sbTotal}` + + ` storyboards passing` + + `
` + : ''; + const runs = history?.runs || []; const sparkline = runs.length > 0 ? '
' + runs.slice().reverse().map(r => { @@ -1533,7 +1575,8 @@

Agents

${cs.headline ? '
' + escapeHtml(cs.headline) + '
' : ''} - ${clickableTrackPills ? '
' + clickableTrackPills + '
' : ''} + ${storyboardSummary} + ${clickableTrackPills ? '
' + clickableTrackPills + '
' : ''} ${sparkline} ${renderVerificationPanel(cs, agent.url, hasAuth)}