From 414b3b1a1ee9100fe284fdb1b3ce090df55d50e9 Mon Sep 17 00:00:00 2001
From: Brian O'Kelley <bokelley@scope3.com>
Date: Mon, 11 May 2026 04:08:15 -0400
Subject: [PATCH 1/3] fix(compliance): rewrite deriveStoryboardStatuses for SDK
 6.x scenario keys
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The compliance heartbeat has been writing zero rows to
agent_storyboard_status since the SDK switched comply() to storyboard-
driven testing. The SDK emits one TestResult per phase of each storyboard,
keyed `<storyboard_id>/<phase_id>` in result.tracks[].scenarios[].scenario
(see @adcp/sdk compliance/storyboard-tracks.ts). The old implementation
walked the YAML's per-step `comply_scenario` field (bare names like
`signals_flow`, `capability_discovery`) and looked them up in the SDK's
scenario map. Every lookup missed → testedCount === 0 → every storyboard
skipped at the `continue` guard.

Effect across the registry:
  agent_storyboard_status total rows: 6  (across 4 agents)
  rows written by triggered_by='heartbeat': 0
  rows surviving were legacy bare-name keys from old manual runs

This silently broke the AAO Verified badge pipeline (no storyboard rows
→ deriveVerificationStatus has nothing to verify against) and every
agent's dashboard `storyboards_passing: 0 / N` was misleading: the
runner wasn't failing storyboards, the parser was dropping them.

Surfaced by escalation #329: Evgeny's agent was running 30/30 scenarios
clean but showing `degraded` because specialism_status.signal-owned read
'untested' from a never-populated agent_storyboard_status row.

Fix: read SDK output directly. Group scenarios by storyboard id, roll
per-step pass counts up from each phase's `steps` array, fall back to
phase-level counts when steps are absent. The `storyboardIds` override
is preserved for explicit-IDs callers that need an `untested` entry
when the runner didn't run a requested storyboard. The unused YAML
`comply_scenario` field is no longer load-bearing for status mapping
(the SDK already knows which storyboards it ran).

Tests: 9 cases covering all-pass, partial, all-fail, phase-only fallback,
legacy bare-name skip, empty input, and explicit-IDs untested gap.

Stack note: this is orthogonal to Emma's #4247 compliance-state
unification stack (#4250, #4263, #4264, #4268, #4274) which collapses
agent_test_history into agent_compliance_runs. Different files; rebases
cleanly in either order.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../fix-storyboard-status-scenario-keys.md    |   6 +
 .../src/addie/services/compliance-testing.ts  |  97 +++++++----
 .../unit/derive-storyboard-statuses.test.ts   | 158 ++++++++++++++++++
 3 files changed, 224 insertions(+), 37 deletions(-)
 create mode 100644 .changeset/fix-storyboard-status-scenario-keys.md
 create mode 100644 server/tests/unit/derive-storyboard-statuses.test.ts

diff --git a/.changeset/fix-storyboard-status-scenario-keys.md b/.changeset/fix-storyboard-status-scenario-keys.md
new file mode 100644
index 0000000000..adc6352661
--- /dev/null
+++ b/.changeset/fix-storyboard-status-scenario-keys.md
@@ -0,0 +1,6 @@
+---
+---
+
+Rewrites `deriveStoryboardStatuses` to read SDK 6.x's storyboard-keyed scenarios. `comply()` emits `result.tracks[].scenarios[].scenario` as `<storyboard_id>/<phase_id>` (one per phase), but the old implementation walked YAML steps' `comply_scenario` fields and looked up bare names like `signals_flow` / `capability_discovery` — every lookup missed, so `testedCount === 0` skipped every storyboard. Net effect: zero rows in `agent_storyboard_status` have ever been written by the compliance heartbeat. The dashboard's "X passing / Y total" was structurally `0 / N` across the registry, every declared specialism was `untested`, and the AAO Verified badge pipeline silently stopped issuing.
+
+New implementation groups scenarios by storyboard id, rolls per-step pass counts up from each phase's `steps` array (with phase-level fallback when steps are absent), and supports the existing `storyboardIds` override for explicit-IDs callers that need an untested entry when the runner didn't run a requested storyboard. Surfaced by escalation #329 — Evgeny's agent was passing 30/30 scenarios but showing `degraded` because the storyboard counts never updated.
diff --git a/server/src/addie/services/compliance-testing.ts b/server/src/addie/services/compliance-testing.ts
index c909808999..a2bc955220 100644
--- a/server/src/addie/services/compliance-testing.ts
+++ b/server/src/addie/services/compliance-testing.ts
@@ -28,8 +28,6 @@ import type {
   TriggeredBy,
 } from '../../db/compliance-db.js';
 
-import { getStoryboard, getAllStoryboards } from '../../services/storyboards.js';
-import type { Storyboard } from '../../services/storyboards.js';
 
 // ── Re-exports ────────────────────────────────────────────────────
 
@@ -227,67 +225,92 @@ function mapOverallStatus(status: string): OverallRunStatus {
 /**
  * Derive per-storyboard pass/fail from a compliance result.
  *
- * Maps scenario results back to storyboard steps via comply_scenario.
- * For explicit runs (storyboardIds provided), only those storyboards
- * are evaluated. For heartbeat runs, all storyboards with matching
- * scenarios are evaluated.
+ * `comply()` emits one `TestResult` per *phase* of each storyboard it ran,
+ * keyed `<storyboard_id>/<phase_id>` in `result.tracks[].scenarios[].scenario`
+ * (see `@adcp/sdk` `compliance/storyboard-tracks.ts`). We group those by
+ * storyboard id and roll step-level pass counts up from each phase's
+ * `steps` array — which is what `agent_storyboard_status.steps_passed/total`
+ * record.
+ *
+ * Modes:
+ *   - heartbeat path (no `storyboardIds`): emit an entry for every storyboard
+ *     the SDK actually produced data for.
+ *   - explicit-IDs path (`storyboardIds` non-empty): emit one entry per id,
+ *     with `status='untested'` for any id the SDK didn't run.
  */
 export function deriveStoryboardStatuses(
   result: ComplianceResult,
   storyboardIds?: string[],
 ): StoryboardStatusEntry[] {
-  // Build scenario → passed map from all track results
-  const scenarioResults = new Map<string, boolean>();
+  interface Aggregate {
+    stepsPassed: number;
+    stepsTotal: number;
+    phasesPassed: number;
+    phasesTotal: number;
+  }
+  const perStoryboard = new Map<string, Aggregate>();
+
   for (const track of result.tracks) {
     for (const s of track.scenarios) {
-      scenarioResults.set(s.scenario, s.overall_passed);
+      const sepIdx = typeof s.scenario === 'string' ? s.scenario.indexOf('/') : -1;
+      if (sepIdx <= 0) continue; // skip legacy bare-name scenarios (no longer emitted by storyboard-driven comply())
+      const sbId = s.scenario.slice(0, sepIdx);
+      let agg = perStoryboard.get(sbId);
+      if (!agg) {
+        agg = { stepsPassed: 0, stepsTotal: 0, phasesPassed: 0, phasesTotal: 0 };
+        perStoryboard.set(sbId, agg);
+      }
+      agg.phasesTotal++;
+      if (s.overall_passed) agg.phasesPassed++;
+
+      // Roll per-step results up from the phase. Some SDK paths emit a phase
+      // without a `steps` array (e.g. resource-resolution failures); we then
+      // fall back to phase-level counts below so the storyboard still
+      // reports a status.
+      const steps = s.steps ?? [];
+      for (const step of steps) {
+        agg.stepsTotal++;
+        if (step.passed) agg.stepsPassed++;
+      }
     }
   }
 
-  if (scenarioResults.size === 0) return [];
-
-  const storyboardsToCheck: Storyboard[] = storyboardIds
-    ? storyboardIds.map(id => getStoryboard(id)).filter((s): s is Storyboard => !!s)
-    : getAllStoryboards();
+  // Decide which storyboard ids to emit entries for.
+  const toEmit = storyboardIds && storyboardIds.length > 0
+    ? storyboardIds
+    : Array.from(perStoryboard.keys());
 
   const entries: StoryboardStatusEntry[] = [];
-
-  for (const sb of storyboardsToCheck) {
-    // Collect steps with comply_scenario
-    const testableSteps: Array<{ stepId: string; scenario: string }> = [];
-    for (const phase of sb.phases) {
-      for (const step of phase.steps) {
-        if (step.comply_scenario) {
-          testableSteps.push({ stepId: step.id, scenario: step.comply_scenario });
-        }
+  for (const sbId of toEmit) {
+    const agg = perStoryboard.get(sbId);
+    if (!agg) {
+      // Explicit id requested but the runner didn't produce data for it.
+      if (storyboardIds && storyboardIds.length > 0) {
+        entries.push({ storyboard_id: sbId, status: 'untested', steps_passed: 0, steps_total: 0 });
       }
+      continue;
     }
 
-    if (testableSteps.length === 0) continue;
-
-    // Only include storyboards where at least one scenario was tested
-    const testedCount = testableSteps.filter(s => scenarioResults.has(s.scenario)).length;
-    if (testedCount === 0 && !storyboardIds) continue;
-
-    const passedCount = testableSteps.filter(s => scenarioResults.get(s.scenario) === true).length;
-    const totalSteps = testableSteps.length;
+    const useSteps = agg.stepsTotal > 0;
+    const passed = useSteps ? agg.stepsPassed : agg.phasesPassed;
+    const total = useSteps ? agg.stepsTotal : agg.phasesTotal;
 
     let status: StoryboardStatusEntry['status'];
-    if (testedCount === 0) {
+    if (total === 0) {
       status = 'untested';
-    } else if (passedCount === totalSteps) {
+    } else if (passed === total) {
       status = 'passing';
-    } else if (passedCount === 0) {
+    } else if (passed === 0) {
       status = 'failing';
     } else {
       status = 'partial';
     }
 
     entries.push({
-      storyboard_id: sb.id,
+      storyboard_id: sbId,
       status,
-      steps_passed: passedCount,
-      steps_total: totalSteps,
+      steps_passed: passed,
+      steps_total: total,
     });
   }
 
diff --git a/server/tests/unit/derive-storyboard-statuses.test.ts b/server/tests/unit/derive-storyboard-statuses.test.ts
new file mode 100644
index 0000000000..2292ba0aa8
--- /dev/null
+++ b/server/tests/unit/derive-storyboard-statuses.test.ts
@@ -0,0 +1,158 @@
+import { describe, it, expect } from 'vitest';
+import { deriveStoryboardStatuses } from '../../src/addie/services/compliance-testing.js';
+import type { ComplianceResult } from '@adcp/sdk/testing';
+
+/**
+ * Minimal builder for ComplianceResult fixtures.
+ *
+ * `comply()` returns one TestResult per phase of each storyboard, keyed
+ * `<storyboard_id>/<phase_id>`. The fixtures here construct that shape
+ * directly so the tests pin the scenario-key contract we read from the SDK.
+ */
+function makeResult(
+  scenarios: Array<{
+    scenario: string;
+    passed: boolean;
+    steps?: Array<{ passed: boolean; step?: string }>;
+  }>,
+): ComplianceResult {
+  return {
+    agent_url: 'https://example.test/mcp',
+    overall_status: 'passing',
+    tracks: [
+      {
+        track: 'signals',
+        label: 'Signals',
+        status: 'passing',
+        duration_ms: 0,
+        skipped_scenarios: [],
+        observations: [],
+        scenarios: scenarios.map(s => ({
+          agent_url: 'https://example.test/mcp',
+          scenario: s.scenario as unknown as ComplianceResult['tracks'][number]['scenarios'][number]['scenario'],
+          overall_passed: s.passed,
+          steps: s.steps?.map(step => ({
+            step: step.step ?? 'step',
+            passed: step.passed,
+            duration_ms: 0,
+          })),
+          summary: 'fixture',
+          total_duration_ms: 0,
+          tested_at: '2026-05-11T00:00:00.000Z',
+        })),
+      },
+    ],
+    tested_tracks: [],
+    skipped_tracks: [],
+    summary: {
+      tracks_passed: 0,
+      tracks_failed: 0,
+      tracks_skipped: 0,
+      tracks_partial: 0,
+      tracks_silent: 0,
+      headline: 'fixture',
+    },
+    observations: [],
+    tested_at: '2026-05-11T00:00:00.000Z',
+    total_duration_ms: 0,
+  } as unknown as ComplianceResult;
+}
+
+describe('deriveStoryboardStatuses', () => {
+  it('emits one entry per storyboard the runner produced data for', () => {
+    const result = makeResult([
+      { scenario: 'signal_owned/capability_discovery', passed: true, steps: [{ passed: true }] },
+      { scenario: 'signal_owned/discovery', passed: true, steps: [{ passed: true }, { passed: true }] },
+      { scenario: 'signals_baseline/discover_and_activate', passed: true, steps: [{ passed: true }] },
+    ]);
+    const entries = deriveStoryboardStatuses(result);
+    const ids = entries.map(e => e.storyboard_id).sort();
+    expect(ids).toEqual(['signal_owned', 'signals_baseline']);
+  });
+
+  it('marks a storyboard passing when every phase passes (step counts roll up)', () => {
+    const result = makeResult([
+      { scenario: 'signal_owned/capability_discovery', passed: true, steps: [{ passed: true }] },
+      { scenario: 'signal_owned/discovery', passed: true, steps: [{ passed: true }, { passed: true }] },
+      { scenario: 'signal_owned/activation', passed: true, steps: [{ passed: true }] },
+    ]);
+    const [entry] = deriveStoryboardStatuses(result);
+    expect(entry).toEqual({
+      storyboard_id: 'signal_owned',
+      status: 'passing',
+      steps_passed: 4,
+      steps_total: 4,
+    });
+  });
+
+  it("marks a storyboard partial when some phases' steps fail", () => {
+    const result = makeResult([
+      { scenario: 'signal_owned/capability_discovery', passed: true, steps: [{ passed: true }] },
+      { scenario: 'signal_owned/discovery', passed: false, steps: [{ passed: true }, { passed: false }] },
+    ]);
+    const [entry] = deriveStoryboardStatuses(result);
+    expect(entry).toMatchObject({
+      storyboard_id: 'signal_owned',
+      status: 'partial',
+      steps_passed: 2,
+      steps_total: 3,
+    });
+  });
+
+  it('marks a storyboard failing when every step failed', () => {
+    const result = makeResult([
+      { scenario: 'signal_owned/capability_discovery', passed: false, steps: [{ passed: false }] },
+      { scenario: 'signal_owned/discovery', passed: false, steps: [{ passed: false }, { passed: false }] },
+    ]);
+    const [entry] = deriveStoryboardStatuses(result);
+    expect(entry).toMatchObject({ status: 'failing', steps_passed: 0, steps_total: 3 });
+  });
+
+  it('falls back to phase-level counts when phases have no steps array', () => {
+    const result = makeResult([
+      { scenario: 'signal_owned/capability_discovery', passed: true },
+      { scenario: 'signal_owned/discovery', passed: false },
+    ]);
+    const [entry] = deriveStoryboardStatuses(result);
+    expect(entry).toMatchObject({
+      storyboard_id: 'signal_owned',
+      status: 'partial',
+      steps_passed: 1,
+      steps_total: 2,
+    });
+  });
+
+  it('skips legacy bare-name scenarios (no "/" separator)', () => {
+    const result = makeResult([
+      { scenario: 'signals_flow', passed: true, steps: [{ passed: true }] },
+      { scenario: 'capability_discovery', passed: true, steps: [{ passed: true }] },
+    ]);
+    expect(deriveStoryboardStatuses(result)).toEqual([]);
+  });
+
+  it('returns empty when no scenarios were produced', () => {
+    expect(deriveStoryboardStatuses(makeResult([]))).toEqual([]);
+  });
+
+  describe('with explicit storyboardIds', () => {
+    it('emits untested entry when the runner did not run a requested storyboard', () => {
+      const result = makeResult([
+        { scenario: 'signal_owned/capability_discovery', passed: true, steps: [{ passed: true }] },
+      ]);
+      const entries = deriveStoryboardStatuses(result, ['signal_owned', 'signal_marketplace']);
+      expect(entries).toEqual([
+        { storyboard_id: 'signal_owned', status: 'passing', steps_passed: 1, steps_total: 1 },
+        { storyboard_id: 'signal_marketplace', status: 'untested', steps_passed: 0, steps_total: 0 },
+      ]);
+    });
+
+    it('only emits entries for the requested ids even when more were run', () => {
+      const result = makeResult([
+        { scenario: 'signal_owned/p1', passed: true, steps: [{ passed: true }] },
+        { scenario: 'signals_baseline/p1', passed: true, steps: [{ passed: true }] },
+      ]);
+      const entries = deriveStoryboardStatuses(result, ['signal_owned']);
+      expect(entries.map(e => e.storyboard_id)).toEqual(['signal_owned']);
+    });
+  });
+});

From 67740eae00b3aa9533dde9cda1ca33d72d07a0cb Mon Sep 17 00:00:00 2001
From: Brian O'Kelley <bokelley@scope3.com>
Date: Mon, 11 May 2026 04:14:26 -0400
Subject: [PATCH 2/3] =?UTF-8?q?chore(scripts):=20test-comply-storyboard-st?=
 =?UTF-8?q?atuses=20=E2=80=94=20local=20harness=20for=20the=20fix?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Runs comply() against an agent URL and prints what
deriveStoryboardStatuses would produce, without DB writes. Used to
validate the SDK-6.x scenario-key fix against real agents
(adcp-signals-adaptor.evgeny-193.workers.dev/mcp and
wonderstruck.sales-agent.scope3.com/mcp) before merging.

Will stay useful for future SDK upgrades that touch scenario emission
or storyboard-track aggregation — same pattern as the
diagnose-agent-comply-queue script from #4361.

Usage:
  npx tsx server/src/scripts/test-comply-storyboard-statuses.ts <agent-url> [<agent-url> ...]

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../test-comply-storyboard-statuses.ts        | 95 +++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 server/src/scripts/test-comply-storyboard-statuses.ts

diff --git a/server/src/scripts/test-comply-storyboard-statuses.ts b/server/src/scripts/test-comply-storyboard-statuses.ts
new file mode 100644
index 0000000000..2a26eda3d1
--- /dev/null
+++ b/server/src/scripts/test-comply-storyboard-statuses.ts
@@ -0,0 +1,95 @@
+/**
+ * Run `comply()` against an agent URL and print what
+ * `deriveStoryboardStatuses` would produce. Read-only — no DB writes.
+ *
+ * Lets us validate the new SDK-6.x scenario-key parser against real agents
+ * before merging. Mirrors what the compliance heartbeat does for the
+ * storyboard-status piece, but prints to stdout instead of recording.
+ *
+ * Usage:
+ *   npx tsx server/src/scripts/test-comply-storyboard-statuses.ts <agent-url>
+ *   npx tsx server/src/scripts/test-comply-storyboard-statuses.ts <url1> <url2> ...
+ */
+
+import { AAO_UA_COMPLIANCE } from '../config/user-agents.js';
+import {
+  comply,
+  deriveStoryboardStatuses,
+  complianceResultToDbInput,
+  type ComplyOptions,
+} from '../addie/services/compliance-testing.js';
+
+const urls = process.argv.slice(2).filter(a => !a.startsWith('--'));
+
+if (urls.length === 0) {
+  console.error('Usage: test-comply-storyboard-statuses.ts <agent-url> [<agent-url> ...]');
+  process.exit(1);
+}
+
+async function probe(agentUrl: string): Promise<void> {
+  console.log(`\n${'='.repeat(80)}\nAgent: ${agentUrl}\n${'='.repeat(80)}`);
+  const start = Date.now();
+
+  const opts: ComplyOptions = {
+    test_session_id: `local-probe-${Date.now()}`,
+    timeout_ms: 90_000,
+    userAgent: AAO_UA_COMPLIANCE,
+  };
+
+  let result;
+  try {
+    result = await comply(agentUrl, opts);
+  } catch (err) {
+    console.log(`  comply() threw: ${err instanceof Error ? err.message : String(err)}`);
+    return;
+  }
+
+  const duration = Date.now() - start;
+  console.log(`\nOverall: ${result.overall_status}  (${duration}ms)`);
+  console.log(`Headline: ${result.summary.headline}`);
+  console.log(`Declared specialisms: ${JSON.stringify(result.agent_profile?.specialisms ?? [])}`);
+  console.log(`Storyboards executed: ${JSON.stringify(result.storyboards_executed ?? '(field absent)')}`);
+
+  console.log(`\nTracks:`);
+  for (const t of result.tracks) {
+    console.log(`  ${t.track.padEnd(20)} status=${t.status.padEnd(8)} scenarios=${t.scenarios.length}`);
+    for (const s of t.scenarios.slice(0, 6)) {
+      const pass = s.overall_passed ? '✓' : '✗';
+      const stepCount = s.steps?.length ?? 0;
+      const stepsPassed = s.steps?.filter(st => st.passed).length ?? 0;
+      console.log(`    ${pass} ${s.scenario.padEnd(50)} steps=${stepsPassed}/${stepCount}`);
+    }
+    if (t.scenarios.length > 6) {
+      console.log(`    … +${t.scenarios.length - 6} more`);
+    }
+  }
+
+  console.log(`\nderiveStoryboardStatuses() output (what the heartbeat would persist):`);
+  const entries = deriveStoryboardStatuses(result);
+  if (entries.length === 0) {
+    console.log(`  (empty — nothing to persist)`);
+  } else {
+    for (const e of entries) {
+      console.log(`  ${e.storyboard_id.padEnd(40)} ${e.status.padEnd(10)} steps=${e.steps_passed}/${e.steps_total}`);
+    }
+  }
+
+  console.log(`\ncomplianceResultToDbInput().storyboard_statuses (full input shape):`);
+  const dbInput = complianceResultToDbInput(result, agentUrl, 'production', 'manual');
+  console.log(`  count: ${dbInput.storyboard_statuses?.length ?? 0}`);
+  if (dbInput.storyboard_statuses?.length) {
+    console.log(JSON.stringify(dbInput.storyboard_statuses, null, 2));
+  }
+}
+
+async function main(): Promise<void> {
+  for (const url of urls) {
+    await probe(url);
+  }
+  console.log('');
+}
+
+main().catch((err) => {
+  console.error('Probe failed:', err);
+  process.exit(1);
+});

From c11abeb7911725d910d478d8410c91993295a4fa Mon Sep 17 00:00:00 2001
From: Brian O'Kelley <bokelley@scope3.com>
Date: Mon, 11 May 2026 04:19:43 -0400
Subject: [PATCH 3/3] =?UTF-8?q?fix(compliance):=20code=20review=20nits=20?=
 =?UTF-8?q?=E2=80=94=20clarify=20steps=20doc,=20hoist=20explicit-ids=20che?=
 =?UTF-8?q?ck,=20add=203=20edge=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses code-reviewer feedback on PR #4364:
- JSDoc on deriveStoryboardStatuses now calls out that steps_passed/total
  are not directly comparable across rows (some rows are real step counts,
  some are phase-level fallbacks when the SDK omits per-step data).
- Comment pinning the storyboard-id invariant (flat ids, no `/`) so the
  indexOf split stays correct as new storyboards land.
- Defensive `result.tracks ?? []` so a malformed result doesn't throw.
- Hoist `storyboardIds && length > 0` into a single `hasExplicitIds`
  const used at both the toEmit decision and the no-data fallback.
- Three new test cases:
  * same storyboard split across multiple tracks aggregates correctly
  * result.tracks absent → []
  * non-string scenario values (null, number) → skipped without throwing

12/12 vitest passing.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../src/addie/services/compliance-testing.ts  | 22 +++--
 .../unit/derive-storyboard-statuses.test.ts   | 83 +++++++++++++++++++
 2 files changed, 100 insertions(+), 5 deletions(-)

diff --git a/server/src/addie/services/compliance-testing.ts b/server/src/addie/services/compliance-testing.ts
index a2bc955220..e8f5e04587 100644
--- a/server/src/addie/services/compliance-testing.ts
+++ b/server/src/addie/services/compliance-testing.ts
@@ -237,6 +237,14 @@ function mapOverallStatus(status: string): OverallRunStatus {
  *     the SDK actually produced data for.
  *   - explicit-IDs path (`storyboardIds` non-empty): emit one entry per id,
  *     with `status='untested'` for any id the SDK didn't run.
+ *
+ * `steps_passed` / `steps_total` reflect what the SDK reported for that
+ * storyboard in this run. Two storyboards (or the same storyboard across
+ * different runs) may count steps differently: most rows are real step
+ * counts; rows where the SDK emitted phases without per-step data fall back
+ * to phase-level counts. The values are meaningful within a single row
+ * (passed/total ratio, status derivation) but should not be compared across
+ * rows without checking which mode produced them.
  */
 export function deriveStoryboardStatuses(
   result: ComplianceResult,
@@ -249,8 +257,13 @@ export function deriveStoryboardStatuses(
     phasesTotal: number;
   }
   const perStoryboard = new Map<string, Aggregate>();
+  // Storyboard ids in `static/compliance/source/**/index.yaml` are flat
+  // identifiers (no `/`); splitting on the first `/` therefore always yields
+  // the storyboard id followed by the phase id. The `<= 0` guard also
+  // rejects pathological leading-slash strings.
+  const tracks = result.tracks ?? [];
 
-  for (const track of result.tracks) {
+  for (const track of tracks) {
     for (const s of track.scenarios) {
       const sepIdx = typeof s.scenario === 'string' ? s.scenario.indexOf('/') : -1;
       if (sepIdx <= 0) continue; // skip legacy bare-name scenarios (no longer emitted by storyboard-driven comply())
@@ -276,16 +289,15 @@ export function deriveStoryboardStatuses(
   }
 
   // Decide which storyboard ids to emit entries for.
-  const toEmit = storyboardIds && storyboardIds.length > 0
-    ? storyboardIds
-    : Array.from(perStoryboard.keys());
+  const hasExplicitIds = !!storyboardIds && storyboardIds.length > 0;
+  const toEmit = hasExplicitIds ? storyboardIds! : Array.from(perStoryboard.keys());
 
   const entries: StoryboardStatusEntry[] = [];
   for (const sbId of toEmit) {
     const agg = perStoryboard.get(sbId);
     if (!agg) {
       // Explicit id requested but the runner didn't produce data for it.
-      if (storyboardIds && storyboardIds.length > 0) {
+      if (hasExplicitIds) {
         entries.push({ storyboard_id: sbId, status: 'untested', steps_passed: 0, steps_total: 0 });
       }
       continue;
diff --git a/server/tests/unit/derive-storyboard-statuses.test.ts b/server/tests/unit/derive-storyboard-statuses.test.ts
index 2292ba0aa8..cb136f0686 100644
--- a/server/tests/unit/derive-storyboard-statuses.test.ts
+++ b/server/tests/unit/derive-storyboard-statuses.test.ts
@@ -134,6 +134,89 @@ describe('deriveStoryboardStatuses', () => {
     expect(deriveStoryboardStatuses(makeResult([]))).toEqual([]);
   });
 
+  it('aggregates a storyboard whose phases appear in multiple tracks', () => {
+    const r = makeResult([]);
+    r.tracks = [
+      {
+        track: 'core',
+        label: 'Core',
+        status: 'passing',
+        duration_ms: 0,
+        skipped_scenarios: [],
+        observations: [],
+        scenarios: [
+          {
+            agent_url: 'https://example.test/mcp',
+            scenario: 'sales_non_guaranteed/capability_discovery' as never,
+            overall_passed: true,
+            steps: [{ step: 'a', passed: true, duration_ms: 0 }],
+            summary: '',
+            total_duration_ms: 0,
+            tested_at: '',
+          },
+        ],
+      },
+      {
+        track: 'media_buy',
+        label: 'Media Buy',
+        status: 'passing',
+        duration_ms: 0,
+        skipped_scenarios: [],
+        observations: [],
+        scenarios: [
+          {
+            agent_url: 'https://example.test/mcp',
+            scenario: 'sales_non_guaranteed/create_buy' as never,
+            overall_passed: true,
+            steps: [{ step: 'b', passed: true, duration_ms: 0 }, { step: 'c', passed: false, duration_ms: 0 }],
+            summary: '',
+            total_duration_ms: 0,
+            tested_at: '',
+          },
+        ],
+      },
+    ] as unknown as ComplianceResult['tracks'];
+    const entries = deriveStoryboardStatuses(r);
+    expect(entries).toHaveLength(1);
+    expect(entries[0]).toMatchObject({
+      storyboard_id: 'sales_non_guaranteed',
+      status: 'partial',
+      steps_passed: 2,
+      steps_total: 3,
+    });
+  });
+
+  it('handles result.tracks being absent', () => {
+    const r = makeResult([]);
+    (r as { tracks?: unknown }).tracks = undefined;
+    expect(deriveStoryboardStatuses(r)).toEqual([]);
+  });
+
+  it('ignores non-string scenario values without throwing', () => {
+    const r = makeResult([]);
+    r.tracks[0].scenarios = [
+      {
+        agent_url: 'https://example.test/mcp',
+        scenario: null as never,
+        overall_passed: true,
+        steps: [{ step: 'x', passed: true, duration_ms: 0 }],
+        summary: '',
+        total_duration_ms: 0,
+        tested_at: '',
+      },
+      {
+        agent_url: 'https://example.test/mcp',
+        scenario: 12345 as never,
+        overall_passed: true,
+        steps: [{ step: 'y', passed: true, duration_ms: 0 }],
+        summary: '',
+        total_duration_ms: 0,
+        tested_at: '',
+      },
+    ];
+    expect(deriveStoryboardStatuses(r)).toEqual([]);
+  });
+
   describe('with explicit storyboardIds', () => {
     it('emits untested entry when the runner did not run a requested storyboard', () => {
       const result = makeResult([