adcontextprotocol · EmmaLouise2018 · May 8, 2026
diff --git a/.changeset/final-cleanup-drop-test-history-and-last-test-columns.md b/.changeset/final-cleanup-drop-test-history-and-last-test-columns.md
@@ -0,0 +1,59 @@
+---
+---
+
+Final cleanup of the #4247 compliance-state unification stack. Drops
+`agent_test_history` and the `agent_contexts.last_test_*` columns now
+that owner test runs persist canonically via
+`agent_compliance_runs.triggered_by='owner_test'` (PR #4250) and
+read-side derivation goes through `agent_context_with_latest_test`
+(PR #4268).
+
+**Pre-merge gate (load-bearing — destructive migration):**
+
+1. PR #4250 ≥ 14 days live in prod with zero canonical-write incidents.
+2. PR #4263 ≥ 7 days live in prod with the dashboard rendering identical
+   verdicts via the view-derived path.
+3. PR #4264's migration 472 has run; row-count delta on staging is ±0
+   (every owner-triggered `agent_test_history` row backfilled into
+   `agent_compliance_runs`).
+4. Third-party (`user_id IS NULL`) rows from `agent_test_history`
+   exported to S3 cold storage. Export evidence committed to the ops
+   runbook before the migration runs. Reversibility path is the export,
+   not pg_dump.
+5. PR #4268's view + reader migration confirmed working in prod.
+
+**What this PR does.**
+
+- **Migration 474.** Redefines `agent_context_summary` view without
+  references to the dropped table/columns; drops
+  `agent_contexts.last_test_*` columns; drops `agent_test_history`
+  table; refreshes `agent_context_with_latest_test` so the view's
+  `ac.*` projection no longer carries the removed columns.
+- **`agent-context-db.ts`.** Removes `recordTest`, `getTestHistory`,
+  `getLatestTestForUser`, the `AgentTestHistory` interface, and the
+  `RecordTestInput` interface. The `last_test_*` SET branches in
+  `update()` go away; the method now refetches via `getById()` after
+  the UPDATE so derived view fields stay populated.
+- **`evaluate_agent_quality`.** The third-party `recordTest()` write
+  path is removed. Non-owner runs are now session-scoped — they return
+  results in the response and do not persist.
+- **`run_storyboard`.** The `recordTest()` write path is removed.
+  Single-storyboard runs remain session-scoped (they don't write
+  canonical state because that would over-state the test coverage of
+  a single storyboard). A future `triggered_by = 'storyboard_test'`
+  enum value would expand canonical writes here, but that's a separate
+  design discussion.
+
+**Behavior change.**
+
+- Third-party / non-owner `evaluate_agent_quality` runs against
+  someone else's agent no longer leave any persistent state in the
+  registry. Matches the "owner-only canonical writes" policy from
+  #4247. Stranger-runs return results to the caller in the same
+  shape; they just don't persist.
+- `run_storyboard` runs (any caller) no longer leave persistent state
+  in the registry. The dashboard's "tested at" timestamps for an org
+  reflect only `evaluate_agent_quality` runs (which exercise the full
+  comply suite); single-storyboard runs are exploratory tooling.
+
+**Stacked on** #4268 (PR 4) → #4264 (PR 3) → #4263 (PR 2) → #4250 (PR 1).
diff --git a/server/src/addie/mcp/member-tools.ts b/server/src/addie/mcp/member-tools.ts
@@ -3618,38 +3618,12 @@ export function createMemberToolHandlers(
           }
         }
 
-        // Legacy write to agent_contexts + agent_test_history. Retained ONLY
-        // for non-owner runs so a third-party who runs evaluate_agent_quality
-        // against someone else's agent still has a session-scoped audit trail
-        // (their own org's agent_test_history). Owner runs already wrote
-        // canonical state above (PR #4250); writing twice would split the
-        // audit and re-introduce the dual-write bug PR #4247 is closing.
-        //
-        // PR 4 of #4247 collapses agent_contexts.last_test_* into a derived
-        // view, after which this legacy block (and recordTest itself) drop
-        // entirely.
-        if (!isAgentOwner) {
-          try {
-            const context = await agentContextDb.getByOrgAndUrl(organizationId, resolved.resolvedUrl);
-            if (context) {
-              await agentContextDb.recordTest({
-                agent_context_id: context.id,
-                scenario: 'quality_evaluation',
-                overall_passed: result.overall_status === 'passing',
-                steps_passed: result.summary.tracks_passed,
-                steps_failed: result.summary.tracks_failed,
-                total_duration_ms: result.total_duration_ms,
-                summary: result.summary.headline,
-                dry_run: true,
-                triggered_by: 'user',
-                user_id: memberContext?.workos_user?.workos_user_id,
-                agent_profile_json: result.agent_profile,
-              });
-            }
-          } catch (error) {
-            logger.debug({ error }, 'Could not record quality evaluation result');
-          }
-        }
+        // Non-owner runs are now session-scoped — they return results to
+        // the caller in the response and do not persist anywhere. The legacy
+        // recordTest path that wrote to agent_test_history was dropped in
+        // migration 474 (#4247 final cleanup). Strangers testing someone
+        // else's agent no longer leave persistent state in the registry,
+        // matching the "owner-only canonical writes" policy from #4247.
       }
 
       // Build structured output for Addie to interpret
@@ -4123,33 +4097,18 @@ export function createMemberToolHandlers(
         return `Agent at ${resolved.resolvedUrl} requires authentication. Use \`save_agent\` to store credentials first, then try again.`;
       }
 
-      // Record the run in agent_test_history when we have a saved
-      // agent_context for this org+url. Mirrors evaluate_agent_quality's
-      // pattern; powers the "agent not tested in 14d" prompt rule.
-      // Storyboard runs don't carry a structured agent_profile (only
-      // evaluate_agent_quality probes get_adcp_capabilities), so we
-      // omit agent_profile_json — readers tolerate null.
-      if (organizationId) {
-        try {
-          const context = await agentContextDb.getByOrgAndUrl(organizationId, resolved.resolvedUrl);
-          if (context) {
-            await agentContextDb.recordTest({
-              agent_context_id: context.id,
-              scenario: `storyboard:${sb.id}`,
-              overall_passed: result.overall_passed,
-              steps_passed: result.passed_count,
-              steps_failed: result.failed_count,
-              total_duration_ms: result.total_duration_ms,
-              summary: result.storyboard_title,
-              dry_run: dryRun,
-              triggered_by: 'user',
-              user_id: memberContext?.workos_user?.workos_user_id,
-            });
-          }
-        } catch (error) {
-          logger.debug({ error }, 'Could not record storyboard run');
-        }
-      }
+      // run_storyboard runs a single storyboard (vs evaluate_agent_quality's
+      // full comply suite). Single-storyboard runs do not currently write
+      // canonical state — that would require synthesizing a comply-shaped
+      // result with one track, which over-states the test coverage. The
+      // legacy recordTest call that wrote to agent_test_history was dropped
+      // in migration 474 (#4247 final cleanup); single-storyboard runs are
+      // exploratory and remain session-scoped.
+      //
+      // If a future track wants to surface storyboard runs in
+      // agent_compliance_runs as a distinct triggered_by value (e.g.
+      // 'storyboard_test'), open a follow-up — that's a schema change with
+      // its own design discussion.
 
       let output = '';
       if (resolved.source === 'saved') output += '_Using saved credentials._\n\n';