From 414b3b1a1ee9100fe284fdb1b3ce090df55d50e9 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Mon, 11 May 2026 04:08:15 -0400 Subject: [PATCH 1/3] fix(compliance): rewrite deriveStoryboardStatuses for SDK 6.x scenario keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compliance heartbeat has been writing zero rows to agent_storyboard_status since the SDK switched comply() to storyboard- driven testing. The SDK emits one TestResult per phase of each storyboard, keyed `/` in result.tracks[].scenarios[].scenario (see @adcp/sdk compliance/storyboard-tracks.ts). The old implementation walked the YAML's per-step `comply_scenario` field (bare names like `signals_flow`, `capability_discovery`) and looked them up in the SDK's scenario map. Every lookup missed → testedCount === 0 → every storyboard skipped at the `continue` guard. Effect across the registry: agent_storyboard_status total rows: 6 (across 4 agents) rows written by triggered_by='heartbeat': 0 rows surviving were legacy bare-name keys from old manual runs This silently broke the AAO Verified badge pipeline (no storyboard rows → deriveVerificationStatus has nothing to verify against) and every agent's dashboard `storyboards_passing: 0 / N` was misleading: the runner wasn't failing storyboards, the parser was dropping them. Surfaced by escalation #329: Evgeny's agent was running 30/30 scenarios clean but showing `degraded` because specialism_status.signal-owned read 'untested' from a never-populated agent_storyboard_status row. Fix: read SDK output directly. Group scenarios by storyboard id, roll per-step pass counts up from each phase's `steps` array, fall back to phase-level counts when steps are absent. The `storyboardIds` override is preserved for explicit-IDs callers that need an `untested` entry when the runner didn't run a requested storyboard. The unused YAML `comply_scenario` field is no longer load-bearing for status mapping (the SDK already knows which storyboards it ran). Tests: 9 cases covering all-pass, partial, all-fail, phase-only fallback, legacy bare-name skip, empty input, and explicit-IDs untested gap. Stack note: this is orthogonal to Emma's #4247 compliance-state unification stack (#4250, #4263, #4264, #4268, #4274) which collapses agent_test_history into agent_compliance_runs. Different files; rebases cleanly in either order. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../fix-storyboard-status-scenario-keys.md | 6 + .../src/addie/services/compliance-testing.ts | 97 +++++++---- .../unit/derive-storyboard-statuses.test.ts | 158 ++++++++++++++++++ 3 files changed, 224 insertions(+), 37 deletions(-) create mode 100644 .changeset/fix-storyboard-status-scenario-keys.md create mode 100644 server/tests/unit/derive-storyboard-statuses.test.ts diff --git a/.changeset/fix-storyboard-status-scenario-keys.md b/.changeset/fix-storyboard-status-scenario-keys.md new file mode 100644 index 0000000000..adc6352661 --- /dev/null +++ b/.changeset/fix-storyboard-status-scenario-keys.md @@ -0,0 +1,6 @@ +--- +--- + +Rewrites `deriveStoryboardStatuses` to read SDK 6.x's storyboard-keyed scenarios. `comply()` emits `result.tracks[].scenarios[].scenario` as `/` (one per phase), but the old implementation walked YAML steps' `comply_scenario` fields and looked up bare names like `signals_flow` / `capability_discovery` — every lookup missed, so `testedCount === 0` skipped every storyboard. Net effect: zero rows in `agent_storyboard_status` have ever been written by the compliance heartbeat. The dashboard's "X passing / Y total" was structurally `0 / N` across the registry, every declared specialism was `untested`, and the AAO Verified badge pipeline silently stopped issuing. + +New implementation groups scenarios by storyboard id, rolls per-step pass counts up from each phase's `steps` array (with phase-level fallback when steps are absent), and supports the existing `storyboardIds` override for explicit-IDs callers that need an untested entry when the runner didn't run a requested storyboard. Surfaced by escalation #329 — Evgeny's agent was passing 30/30 scenarios but showing `degraded` because the storyboard counts never updated. diff --git a/server/src/addie/services/compliance-testing.ts b/server/src/addie/services/compliance-testing.ts index c909808999..a2bc955220 100644 --- a/server/src/addie/services/compliance-testing.ts +++ b/server/src/addie/services/compliance-testing.ts @@ -28,8 +28,6 @@ import type { TriggeredBy, } from '../../db/compliance-db.js'; -import { getStoryboard, getAllStoryboards } from '../../services/storyboards.js'; -import type { Storyboard } from '../../services/storyboards.js'; // ── Re-exports ──────────────────────────────────────────────────── @@ -227,67 +225,92 @@ function mapOverallStatus(status: string): OverallRunStatus { /** * Derive per-storyboard pass/fail from a compliance result. * - * Maps scenario results back to storyboard steps via comply_scenario. - * For explicit runs (storyboardIds provided), only those storyboards - * are evaluated. For heartbeat runs, all storyboards with matching - * scenarios are evaluated. + * `comply()` emits one `TestResult` per *phase* of each storyboard it ran, + * keyed `/` in `result.tracks[].scenarios[].scenario` + * (see `@adcp/sdk` `compliance/storyboard-tracks.ts`). We group those by + * storyboard id and roll step-level pass counts up from each phase's + * `steps` array — which is what `agent_storyboard_status.steps_passed/total` + * record. + * + * Modes: + * - heartbeat path (no `storyboardIds`): emit an entry for every storyboard + * the SDK actually produced data for. + * - explicit-IDs path (`storyboardIds` non-empty): emit one entry per id, + * with `status='untested'` for any id the SDK didn't run. */ export function deriveStoryboardStatuses( result: ComplianceResult, storyboardIds?: string[], ): StoryboardStatusEntry[] { - // Build scenario → passed map from all track results - const scenarioResults = new Map(); + interface Aggregate { + stepsPassed: number; + stepsTotal: number; + phasesPassed: number; + phasesTotal: number; + } + const perStoryboard = new Map(); + for (const track of result.tracks) { for (const s of track.scenarios) { - scenarioResults.set(s.scenario, s.overall_passed); + const sepIdx = typeof s.scenario === 'string' ? s.scenario.indexOf('/') : -1; + if (sepIdx <= 0) continue; // skip legacy bare-name scenarios (no longer emitted by storyboard-driven comply()) + const sbId = s.scenario.slice(0, sepIdx); + let agg = perStoryboard.get(sbId); + if (!agg) { + agg = { stepsPassed: 0, stepsTotal: 0, phasesPassed: 0, phasesTotal: 0 }; + perStoryboard.set(sbId, agg); + } + agg.phasesTotal++; + if (s.overall_passed) agg.phasesPassed++; + + // Roll per-step results up from the phase. Some SDK paths emit a phase + // without a `steps` array (e.g. resource-resolution failures); we then + // fall back to phase-level counts below so the storyboard still + // reports a status. + const steps = s.steps ?? []; + for (const step of steps) { + agg.stepsTotal++; + if (step.passed) agg.stepsPassed++; + } } } - if (scenarioResults.size === 0) return []; - - const storyboardsToCheck: Storyboard[] = storyboardIds - ? storyboardIds.map(id => getStoryboard(id)).filter((s): s is Storyboard => !!s) - : getAllStoryboards(); + // Decide which storyboard ids to emit entries for. + const toEmit = storyboardIds && storyboardIds.length > 0 + ? storyboardIds + : Array.from(perStoryboard.keys()); const entries: StoryboardStatusEntry[] = []; - - for (const sb of storyboardsToCheck) { - // Collect steps with comply_scenario - const testableSteps: Array<{ stepId: string; scenario: string }> = []; - for (const phase of sb.phases) { - for (const step of phase.steps) { - if (step.comply_scenario) { - testableSteps.push({ stepId: step.id, scenario: step.comply_scenario }); - } + for (const sbId of toEmit) { + const agg = perStoryboard.get(sbId); + if (!agg) { + // Explicit id requested but the runner didn't produce data for it. + if (storyboardIds && storyboardIds.length > 0) { + entries.push({ storyboard_id: sbId, status: 'untested', steps_passed: 0, steps_total: 0 }); } + continue; } - if (testableSteps.length === 0) continue; - - // Only include storyboards where at least one scenario was tested - const testedCount = testableSteps.filter(s => scenarioResults.has(s.scenario)).length; - if (testedCount === 0 && !storyboardIds) continue; - - const passedCount = testableSteps.filter(s => scenarioResults.get(s.scenario) === true).length; - const totalSteps = testableSteps.length; + const useSteps = agg.stepsTotal > 0; + const passed = useSteps ? agg.stepsPassed : agg.phasesPassed; + const total = useSteps ? agg.stepsTotal : agg.phasesTotal; let status: StoryboardStatusEntry['status']; - if (testedCount === 0) { + if (total === 0) { status = 'untested'; - } else if (passedCount === totalSteps) { + } else if (passed === total) { status = 'passing'; - } else if (passedCount === 0) { + } else if (passed === 0) { status = 'failing'; } else { status = 'partial'; } entries.push({ - storyboard_id: sb.id, + storyboard_id: sbId, status, - steps_passed: passedCount, - steps_total: totalSteps, + steps_passed: passed, + steps_total: total, }); } diff --git a/server/tests/unit/derive-storyboard-statuses.test.ts b/server/tests/unit/derive-storyboard-statuses.test.ts new file mode 100644 index 0000000000..2292ba0aa8 --- /dev/null +++ b/server/tests/unit/derive-storyboard-statuses.test.ts @@ -0,0 +1,158 @@ +import { describe, it, expect } from 'vitest'; +import { deriveStoryboardStatuses } from '../../src/addie/services/compliance-testing.js'; +import type { ComplianceResult } from '@adcp/sdk/testing'; + +/** + * Minimal builder for ComplianceResult fixtures. + * + * `comply()` returns one TestResult per phase of each storyboard, keyed + * `/`. The fixtures here construct that shape + * directly so the tests pin the scenario-key contract we read from the SDK. + */ +function makeResult( + scenarios: Array<{ + scenario: string; + passed: boolean; + steps?: Array<{ passed: boolean; step?: string }>; + }>, +): ComplianceResult { + return { + agent_url: 'https://example.test/mcp', + overall_status: 'passing', + tracks: [ + { + track: 'signals', + label: 'Signals', + status: 'passing', + duration_ms: 0, + skipped_scenarios: [], + observations: [], + scenarios: scenarios.map(s => ({ + agent_url: 'https://example.test/mcp', + scenario: s.scenario as unknown as ComplianceResult['tracks'][number]['scenarios'][number]['scenario'], + overall_passed: s.passed, + steps: s.steps?.map(step => ({ + step: step.step ?? 'step', + passed: step.passed, + duration_ms: 0, + })), + summary: 'fixture', + total_duration_ms: 0, + tested_at: '2026-05-11T00:00:00.000Z', + })), + }, + ], + tested_tracks: [], + skipped_tracks: [], + summary: { + tracks_passed: 0, + tracks_failed: 0, + tracks_skipped: 0, + tracks_partial: 0, + tracks_silent: 0, + headline: 'fixture', + }, + observations: [], + tested_at: '2026-05-11T00:00:00.000Z', + total_duration_ms: 0, + } as unknown as ComplianceResult; +} + +describe('deriveStoryboardStatuses', () => { + it('emits one entry per storyboard the runner produced data for', () => { + const result = makeResult([ + { scenario: 'signal_owned/capability_discovery', passed: true, steps: [{ passed: true }] }, + { scenario: 'signal_owned/discovery', passed: true, steps: [{ passed: true }, { passed: true }] }, + { scenario: 'signals_baseline/discover_and_activate', passed: true, steps: [{ passed: true }] }, + ]); + const entries = deriveStoryboardStatuses(result); + const ids = entries.map(e => e.storyboard_id).sort(); + expect(ids).toEqual(['signal_owned', 'signals_baseline']); + }); + + it('marks a storyboard passing when every phase passes (step counts roll up)', () => { + const result = makeResult([ + { scenario: 'signal_owned/capability_discovery', passed: true, steps: [{ passed: true }] }, + { scenario: 'signal_owned/discovery', passed: true, steps: [{ passed: true }, { passed: true }] }, + { scenario: 'signal_owned/activation', passed: true, steps: [{ passed: true }] }, + ]); + const [entry] = deriveStoryboardStatuses(result); + expect(entry).toEqual({ + storyboard_id: 'signal_owned', + status: 'passing', + steps_passed: 4, + steps_total: 4, + }); + }); + + it("marks a storyboard partial when some phases' steps fail", () => { + const result = makeResult([ + { scenario: 'signal_owned/capability_discovery', passed: true, steps: [{ passed: true }] }, + { scenario: 'signal_owned/discovery', passed: false, steps: [{ passed: true }, { passed: false }] }, + ]); + const [entry] = deriveStoryboardStatuses(result); + expect(entry).toMatchObject({ + storyboard_id: 'signal_owned', + status: 'partial', + steps_passed: 2, + steps_total: 3, + }); + }); + + it('marks a storyboard failing when every step failed', () => { + const result = makeResult([ + { scenario: 'signal_owned/capability_discovery', passed: false, steps: [{ passed: false }] }, + { scenario: 'signal_owned/discovery', passed: false, steps: [{ passed: false }, { passed: false }] }, + ]); + const [entry] = deriveStoryboardStatuses(result); + expect(entry).toMatchObject({ status: 'failing', steps_passed: 0, steps_total: 3 }); + }); + + it('falls back to phase-level counts when phases have no steps array', () => { + const result = makeResult([ + { scenario: 'signal_owned/capability_discovery', passed: true }, + { scenario: 'signal_owned/discovery', passed: false }, + ]); + const [entry] = deriveStoryboardStatuses(result); + expect(entry).toMatchObject({ + storyboard_id: 'signal_owned', + status: 'partial', + steps_passed: 1, + steps_total: 2, + }); + }); + + it('skips legacy bare-name scenarios (no "/" separator)', () => { + const result = makeResult([ + { scenario: 'signals_flow', passed: true, steps: [{ passed: true }] }, + { scenario: 'capability_discovery', passed: true, steps: [{ passed: true }] }, + ]); + expect(deriveStoryboardStatuses(result)).toEqual([]); + }); + + it('returns empty when no scenarios were produced', () => { + expect(deriveStoryboardStatuses(makeResult([]))).toEqual([]); + }); + + describe('with explicit storyboardIds', () => { + it('emits untested entry when the runner did not run a requested storyboard', () => { + const result = makeResult([ + { scenario: 'signal_owned/capability_discovery', passed: true, steps: [{ passed: true }] }, + ]); + const entries = deriveStoryboardStatuses(result, ['signal_owned', 'signal_marketplace']); + expect(entries).toEqual([ + { storyboard_id: 'signal_owned', status: 'passing', steps_passed: 1, steps_total: 1 }, + { storyboard_id: 'signal_marketplace', status: 'untested', steps_passed: 0, steps_total: 0 }, + ]); + }); + + it('only emits entries for the requested ids even when more were run', () => { + const result = makeResult([ + { scenario: 'signal_owned/p1', passed: true, steps: [{ passed: true }] }, + { scenario: 'signals_baseline/p1', passed: true, steps: [{ passed: true }] }, + ]); + const entries = deriveStoryboardStatuses(result, ['signal_owned']); + expect(entries.map(e => e.storyboard_id)).toEqual(['signal_owned']); + }); + }); +}); From 67740eae00b3aa9533dde9cda1ca33d72d07a0cb Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Mon, 11 May 2026 04:14:26 -0400 Subject: [PATCH 2/3] =?UTF-8?q?chore(scripts):=20test-comply-storyboard-st?= =?UTF-8?q?atuses=20=E2=80=94=20local=20harness=20for=20the=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Runs comply() against an agent URL and prints what deriveStoryboardStatuses would produce, without DB writes. Used to validate the SDK-6.x scenario-key fix against real agents (adcp-signals-adaptor.evgeny-193.workers.dev/mcp and wonderstruck.sales-agent.scope3.com/mcp) before merging. Will stay useful for future SDK upgrades that touch scenario emission or storyboard-track aggregation — same pattern as the diagnose-agent-comply-queue script from #4361. Usage: npx tsx server/src/scripts/test-comply-storyboard-statuses.ts [ ...] Co-Authored-By: Claude Opus 4.7 (1M context) --- .../test-comply-storyboard-statuses.ts | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 server/src/scripts/test-comply-storyboard-statuses.ts diff --git a/server/src/scripts/test-comply-storyboard-statuses.ts b/server/src/scripts/test-comply-storyboard-statuses.ts new file mode 100644 index 0000000000..2a26eda3d1 --- /dev/null +++ b/server/src/scripts/test-comply-storyboard-statuses.ts @@ -0,0 +1,95 @@ +/** + * Run `comply()` against an agent URL and print what + * `deriveStoryboardStatuses` would produce. Read-only — no DB writes. + * + * Lets us validate the new SDK-6.x scenario-key parser against real agents + * before merging. Mirrors what the compliance heartbeat does for the + * storyboard-status piece, but prints to stdout instead of recording. + * + * Usage: + * npx tsx server/src/scripts/test-comply-storyboard-statuses.ts + * npx tsx server/src/scripts/test-comply-storyboard-statuses.ts ... + */ + +import { AAO_UA_COMPLIANCE } from '../config/user-agents.js'; +import { + comply, + deriveStoryboardStatuses, + complianceResultToDbInput, + type ComplyOptions, +} from '../addie/services/compliance-testing.js'; + +const urls = process.argv.slice(2).filter(a => !a.startsWith('--')); + +if (urls.length === 0) { + console.error('Usage: test-comply-storyboard-statuses.ts [ ...]'); + process.exit(1); +} + +async function probe(agentUrl: string): Promise { + console.log(`\n${'='.repeat(80)}\nAgent: ${agentUrl}\n${'='.repeat(80)}`); + const start = Date.now(); + + const opts: ComplyOptions = { + test_session_id: `local-probe-${Date.now()}`, + timeout_ms: 90_000, + userAgent: AAO_UA_COMPLIANCE, + }; + + let result; + try { + result = await comply(agentUrl, opts); + } catch (err) { + console.log(` comply() threw: ${err instanceof Error ? err.message : String(err)}`); + return; + } + + const duration = Date.now() - start; + console.log(`\nOverall: ${result.overall_status} (${duration}ms)`); + console.log(`Headline: ${result.summary.headline}`); + console.log(`Declared specialisms: ${JSON.stringify(result.agent_profile?.specialisms ?? [])}`); + console.log(`Storyboards executed: ${JSON.stringify(result.storyboards_executed ?? '(field absent)')}`); + + console.log(`\nTracks:`); + for (const t of result.tracks) { + console.log(` ${t.track.padEnd(20)} status=${t.status.padEnd(8)} scenarios=${t.scenarios.length}`); + for (const s of t.scenarios.slice(0, 6)) { + const pass = s.overall_passed ? '✓' : '✗'; + const stepCount = s.steps?.length ?? 0; + const stepsPassed = s.steps?.filter(st => st.passed).length ?? 0; + console.log(` ${pass} ${s.scenario.padEnd(50)} steps=${stepsPassed}/${stepCount}`); + } + if (t.scenarios.length > 6) { + console.log(` … +${t.scenarios.length - 6} more`); + } + } + + console.log(`\nderiveStoryboardStatuses() output (what the heartbeat would persist):`); + const entries = deriveStoryboardStatuses(result); + if (entries.length === 0) { + console.log(` (empty — nothing to persist)`); + } else { + for (const e of entries) { + console.log(` ${e.storyboard_id.padEnd(40)} ${e.status.padEnd(10)} steps=${e.steps_passed}/${e.steps_total}`); + } + } + + console.log(`\ncomplianceResultToDbInput().storyboard_statuses (full input shape):`); + const dbInput = complianceResultToDbInput(result, agentUrl, 'production', 'manual'); + console.log(` count: ${dbInput.storyboard_statuses?.length ?? 0}`); + if (dbInput.storyboard_statuses?.length) { + console.log(JSON.stringify(dbInput.storyboard_statuses, null, 2)); + } +} + +async function main(): Promise { + for (const url of urls) { + await probe(url); + } + console.log(''); +} + +main().catch((err) => { + console.error('Probe failed:', err); + process.exit(1); +}); From c11abeb7911725d910d478d8410c91993295a4fa Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Mon, 11 May 2026 04:19:43 -0400 Subject: [PATCH 3/3] =?UTF-8?q?fix(compliance):=20code=20review=20nits=20?= =?UTF-8?q?=E2=80=94=20clarify=20steps=20doc,=20hoist=20explicit-ids=20che?= =?UTF-8?q?ck,=20add=203=20edge=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses code-reviewer feedback on PR #4364: - JSDoc on deriveStoryboardStatuses now calls out that steps_passed/total are not directly comparable across rows (some rows are real step counts, some are phase-level fallbacks when the SDK omits per-step data). - Comment pinning the storyboard-id invariant (flat ids, no `/`) so the indexOf split stays correct as new storyboards land. - Defensive `result.tracks ?? []` so a malformed result doesn't throw. - Hoist `storyboardIds && length > 0` into a single `hasExplicitIds` const used at both the toEmit decision and the no-data fallback. - Three new test cases: * same storyboard split across multiple tracks aggregates correctly * result.tracks absent → [] * non-string scenario values (null, number) → skipped without throwing 12/12 vitest passing. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/addie/services/compliance-testing.ts | 22 +++-- .../unit/derive-storyboard-statuses.test.ts | 83 +++++++++++++++++++ 2 files changed, 100 insertions(+), 5 deletions(-) diff --git a/server/src/addie/services/compliance-testing.ts b/server/src/addie/services/compliance-testing.ts index a2bc955220..e8f5e04587 100644 --- a/server/src/addie/services/compliance-testing.ts +++ b/server/src/addie/services/compliance-testing.ts @@ -237,6 +237,14 @@ function mapOverallStatus(status: string): OverallRunStatus { * the SDK actually produced data for. * - explicit-IDs path (`storyboardIds` non-empty): emit one entry per id, * with `status='untested'` for any id the SDK didn't run. + * + * `steps_passed` / `steps_total` reflect what the SDK reported for that + * storyboard in this run. Two storyboards (or the same storyboard across + * different runs) may count steps differently: most rows are real step + * counts; rows where the SDK emitted phases without per-step data fall back + * to phase-level counts. The values are meaningful within a single row + * (passed/total ratio, status derivation) but should not be compared across + * rows without checking which mode produced them. */ export function deriveStoryboardStatuses( result: ComplianceResult, @@ -249,8 +257,13 @@ export function deriveStoryboardStatuses( phasesTotal: number; } const perStoryboard = new Map(); + // Storyboard ids in `static/compliance/source/**/index.yaml` are flat + // identifiers (no `/`); splitting on the first `/` therefore always yields + // the storyboard id followed by the phase id. The `<= 0` guard also + // rejects pathological leading-slash strings. + const tracks = result.tracks ?? []; - for (const track of result.tracks) { + for (const track of tracks) { for (const s of track.scenarios) { const sepIdx = typeof s.scenario === 'string' ? s.scenario.indexOf('/') : -1; if (sepIdx <= 0) continue; // skip legacy bare-name scenarios (no longer emitted by storyboard-driven comply()) @@ -276,16 +289,15 @@ export function deriveStoryboardStatuses( } // Decide which storyboard ids to emit entries for. - const toEmit = storyboardIds && storyboardIds.length > 0 - ? storyboardIds - : Array.from(perStoryboard.keys()); + const hasExplicitIds = !!storyboardIds && storyboardIds.length > 0; + const toEmit = hasExplicitIds ? storyboardIds! : Array.from(perStoryboard.keys()); const entries: StoryboardStatusEntry[] = []; for (const sbId of toEmit) { const agg = perStoryboard.get(sbId); if (!agg) { // Explicit id requested but the runner didn't produce data for it. - if (storyboardIds && storyboardIds.length > 0) { + if (hasExplicitIds) { entries.push({ storyboard_id: sbId, status: 'untested', steps_passed: 0, steps_total: 0 }); } continue; diff --git a/server/tests/unit/derive-storyboard-statuses.test.ts b/server/tests/unit/derive-storyboard-statuses.test.ts index 2292ba0aa8..cb136f0686 100644 --- a/server/tests/unit/derive-storyboard-statuses.test.ts +++ b/server/tests/unit/derive-storyboard-statuses.test.ts @@ -134,6 +134,89 @@ describe('deriveStoryboardStatuses', () => { expect(deriveStoryboardStatuses(makeResult([]))).toEqual([]); }); + it('aggregates a storyboard whose phases appear in multiple tracks', () => { + const r = makeResult([]); + r.tracks = [ + { + track: 'core', + label: 'Core', + status: 'passing', + duration_ms: 0, + skipped_scenarios: [], + observations: [], + scenarios: [ + { + agent_url: 'https://example.test/mcp', + scenario: 'sales_non_guaranteed/capability_discovery' as never, + overall_passed: true, + steps: [{ step: 'a', passed: true, duration_ms: 0 }], + summary: '', + total_duration_ms: 0, + tested_at: '', + }, + ], + }, + { + track: 'media_buy', + label: 'Media Buy', + status: 'passing', + duration_ms: 0, + skipped_scenarios: [], + observations: [], + scenarios: [ + { + agent_url: 'https://example.test/mcp', + scenario: 'sales_non_guaranteed/create_buy' as never, + overall_passed: true, + steps: [{ step: 'b', passed: true, duration_ms: 0 }, { step: 'c', passed: false, duration_ms: 0 }], + summary: '', + total_duration_ms: 0, + tested_at: '', + }, + ], + }, + ] as unknown as ComplianceResult['tracks']; + const entries = deriveStoryboardStatuses(r); + expect(entries).toHaveLength(1); + expect(entries[0]).toMatchObject({ + storyboard_id: 'sales_non_guaranteed', + status: 'partial', + steps_passed: 2, + steps_total: 3, + }); + }); + + it('handles result.tracks being absent', () => { + const r = makeResult([]); + (r as { tracks?: unknown }).tracks = undefined; + expect(deriveStoryboardStatuses(r)).toEqual([]); + }); + + it('ignores non-string scenario values without throwing', () => { + const r = makeResult([]); + r.tracks[0].scenarios = [ + { + agent_url: 'https://example.test/mcp', + scenario: null as never, + overall_passed: true, + steps: [{ step: 'x', passed: true, duration_ms: 0 }], + summary: '', + total_duration_ms: 0, + tested_at: '', + }, + { + agent_url: 'https://example.test/mcp', + scenario: 12345 as never, + overall_passed: true, + steps: [{ step: 'y', passed: true, duration_ms: 0 }], + summary: '', + total_duration_ms: 0, + tested_at: '', + }, + ]; + expect(deriveStoryboardStatuses(r)).toEqual([]); + }); + describe('with explicit storyboardIds', () => { it('emits untested entry when the runner did not run a requested storyboard', () => { const result = makeResult([