adcontextprotocol · EmmaLouise2018 · May 8, 2026 · May 9, 2026
diff --git a/.changeset/derive-agent-context-last-test-from-canonical.md b/.changeset/derive-agent-context-last-test-from-canonical.md
@@ -0,0 +1,45 @@
+---
+---
+
+PR 4 of the #4247 unification stack. Replaces direct reads of
+`agent_contexts.last_test_*` with a view that derives them from
+`agent_compliance_runs` — the canonical source PR #4250 unified onto.
+
+**What changes.**
+
+- New column `agent_compliance_runs.triggered_org_id` (nullable). Populated
+  by the owner-test write path in `evaluate_agent_quality` using the
+  caller's `organizationId`. Heartbeat / manual / webhook writes leave it
+  NULL — they don't have an org dimension. Without this column, two orgs
+  that own the same agent URL (e.g. staging and prod orgs of one publisher)
+  would conflate their test history through a join on `agent_url` alone.
+- New view `agent_context_with_latest_test`: `agent_contexts.*` joined to
+  the latest non-dry-run `agent_compliance_runs` row scoped by
+  `(triggered_org_id, agent_url)` via `LEFT JOIN LATERAL`, plus a COUNT
+  scalar subquery for `total_tests_run`. Surfaces the derived fields as
+  `canonical_last_test_*` so the column-rename in the SELECT is explicit.
+- `AgentContextDatabase.getByOrganization`, `getById`, `getByOrgAndUrl`
+  now SELECT from the view and alias `canonical_last_test_*` →
+  `last_test_*` so callers see no shape change.
+
+**Backward compat.** The legacy `agent_contexts.last_test_*` columns stay.
+Third-party (non-owner) `recordTest()` writes still update them — that's
+the session-scoped audit trail PR 3 of #4247 retained for non-owner runs.
+The columns become dead-letter once `agent_test_history` is dropped (gated
+on the soak windows in #4247) and `recordTest()` retires in the follow-up
+"final cleanup" PR.
+
+**Semantic shift (last_test_scenario).** For owner test runs,
+`last_test_scenario` now returns `tracks_json[0].track` (e.g.
+`'quality_evaluation'`) rather than the literal string the old
+`recordTest()` write path stored directly. No existing callers branch on
+this value, but downstream consumers that read `last_test_scenario` should
+expect a track name sourced from the canonical run record rather than the
+legacy scenario string.
+
+**Index.** `idx_agent_compliance_runs_triggered_org_url_at` on
+`(triggered_org_id, agent_url, tested_at DESC)` (partial, only where
+`triggered_org_id IS NOT NULL`) supports the view's per-org `DISTINCT ON`
+lookup as a single index scan.
+
+**Stacked on** #4264 (PR 3) → #4263 (PR 2) → #4250 (PR 1).
diff --git a/server/src/addie/mcp/member-tools.ts b/server/src/addie/mcp/member-tools.ts
@@ -3602,6 +3602,11 @@ export function createMemberToolHandlers(
                 // Owner test runs are not dry runs — they update the live public record.
                 // (complianceResultToDbInput hard-codes dry_run: true; override here.)
                 dry_run: false,
+                // Org scope for the per-org `agent_context_with_latest_test` view.
+                // Without this, two orgs that own the same agent URL (staging vs
+                // prod orgs of one publisher) would conflate their test history.
+                // See migration 473.
+                triggered_org_id: organizationId,
               };
               await complianceDb.recordComplianceRun(dbInput);
               // notifyComplianceChange intentionally omitted: owner test runs are

diff --git a/server/src/db/agent-context-db.ts b/server/src/db/agent-context-db.ts
@@ -182,15 +182,20 @@ export class AgentContextDatabase {
           AND oauth_cc_client_secret_encrypted IS NOT NULL) as has_oauth_client_credentials,
         tools_discovered,
         last_discovered_at,
-        last_test_scenario,
-        last_test_passed,
-        last_test_summary,
-        last_tested_at,
-        total_tests_run,
+        -- Derived from agent_compliance_runs via the view (canonical source).
+        -- The legacy agent_contexts.last_test_* columns stay for backward
+        -- compat with non-owner third-party writes through recordTest, but
+        -- reads come from the view so the unification is consistent. See
+        -- migration 473.
+        canonical_last_test_scenario AS last_test_scenario,
+        canonical_last_test_passed AS last_test_passed,
+        canonical_last_test_summary AS last_test_summary,
+        canonical_last_tested_at AS last_tested_at,
+        canonical_total_tests_run AS total_tests_run,
         created_at,
         updated_at,
         created_by
-      FROM agent_contexts
+      FROM agent_context_with_latest_test
       WHERE organization_id = $1
       ORDER BY updated_at DESC`,
       [organizationId]
@@ -222,15 +227,20 @@ export class AgentContextDatabase {
           AND oauth_cc_client_secret_encrypted IS NOT NULL) as has_oauth_client_credentials,
         tools_discovered,
         last_discovered_at,
-        last_test_scenario,
-        last_test_passed,
-        last_test_summary,
-        last_tested_at,
-        total_tests_run,
+        -- Derived from agent_compliance_runs via the view (canonical source).
+        -- The legacy agent_contexts.last_test_* columns stay for backward
+        -- compat with non-owner third-party writes through recordTest, but
+        -- reads come from the view so the unification is consistent. See
+        -- migration 473.
+        canonical_last_test_scenario AS last_test_scenario,
+        canonical_last_test_passed AS last_test_passed,
+        canonical_last_test_summary AS last_test_summary,
+        canonical_last_tested_at AS last_tested_at,
+        canonical_total_tests_run AS total_tests_run,
         created_at,
         updated_at,
         created_by
-      FROM agent_contexts
+      FROM agent_context_with_latest_test
       WHERE id = $1`,
       [id]
     );
@@ -261,15 +271,20 @@ export class AgentContextDatabase {
           AND oauth_cc_client_secret_encrypted IS NOT NULL) as has_oauth_client_credentials,
         tools_discovered,
         last_discovered_at,
-        last_test_scenario,
-        last_test_passed,
-        last_test_summary,
-        last_tested_at,
-        total_tests_run,
+        -- Derived from agent_compliance_runs via the view (canonical source).
+        -- The legacy agent_contexts.last_test_* columns stay for backward
+        -- compat with non-owner third-party writes through recordTest, but
+        -- reads come from the view so the unification is consistent. See
+        -- migration 473.
+        canonical_last_test_scenario AS last_test_scenario,
+        canonical_last_test_passed AS last_test_passed,
+        canonical_last_test_summary AS last_test_summary,
+        canonical_last_tested_at AS last_tested_at,
+        canonical_total_tests_run AS total_tests_run,
         created_at,
         updated_at,
         created_by
-      FROM agent_contexts
+      FROM agent_context_with_latest_test
       WHERE organization_id = $1 AND agent_url = $2`,
       [organizationId, agentUrl]
     );
@@ -305,11 +320,16 @@ export class AgentContextDatabase {
         FALSE as has_oauth_client,
         tools_discovered,
         last_discovered_at,
-        last_test_scenario,
-        last_test_passed,
-        last_test_summary,
-        last_tested_at,
-        total_tests_run,
+        -- Derived from agent_compliance_runs via the view (canonical source).
+        -- The legacy agent_contexts.last_test_* columns stay for backward
+        -- compat with non-owner third-party writes through recordTest, but
+        -- reads come from the view so the unification is consistent. See
+        -- migration 473.
+        canonical_last_test_scenario AS last_test_scenario,
+        canonical_last_test_passed AS last_test_passed,
+        canonical_last_test_summary AS last_test_summary,
+        canonical_last_tested_at AS last_tested_at,
+        canonical_total_tests_run AS total_tests_run,
         created_at,
         updated_at,
         created_by`,

diff --git a/server/src/db/compliance-db.ts b/server/src/db/compliance-db.ts
@@ -184,6 +184,14 @@ export interface RecordComplianceRunInput {
   agent_profile_json?: any;
   observations_json?: any;
   triggered_by?: TriggeredBy;
+  /**
+   * WorkOS organization id of the org that triggered the run. Populated only
+   * for triggered_by='owner_test'; heartbeat / manual / webhook leave it NULL.
+   * Required for the per-org scoping of `agent_context_with_latest_test` so
+   * two orgs that own the same agent (e.g. staging vs prod orgs of one
+   * publisher) don't conflate their test history. See migration 473.
+   */
+  triggered_org_id?: string | null;
   dry_run?: boolean;
   storyboard_statuses?: StoryboardStatusEntry[];
 }
@@ -265,8 +273,8 @@ export class ComplianceDatabase {
           agent_url, lifecycle_stage, overall_status, headline,
           total_duration_ms, tracks_json, tracks_passed, tracks_failed,
           tracks_skipped, tracks_partial, agent_profile_json,
-          observations_json, triggered_by, dry_run
-        ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
+          observations_json, triggered_by, triggered_org_id, dry_run
+        ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)
         RETURNING *`,
         [
           input.agent_url,
@@ -282,6 +290,7 @@ export class ComplianceDatabase {
           input.agent_profile_json ? JSON.stringify(input.agent_profile_json) : null,
           input.observations_json ? JSON.stringify(input.observations_json) : null,
           input.triggered_by ?? 'heartbeat',
+          input.triggered_org_id ?? null,
           input.dry_run ?? true,
         ],
       );

diff --git a/server/src/db/migrations/473_agent_compliance_runs_triggered_org_id.sql b/server/src/db/migrations/473_agent_compliance_runs_triggered_org_id.sql
@@ -0,0 +1,80 @@
+-- Migration 473: add triggered_org_id to agent_compliance_runs and create
+-- agent_context_with_latest_test view.
+--
+-- Part of the #4247 compliance-state unification (PR 4 of 4). Today,
+-- agent_contexts.last_test_* columns carry the most-recent test verdict
+-- per (organization_id, agent_url). After PR #4250 owner runs write
+-- canonical state via agent_compliance_runs, but that table tracks only
+-- agent_url — there's no org dimension, so a derived "latest owner test"
+-- can't be accurately scoped per org. Two orgs that own the same agent
+-- (rare but possible — staging vs prod org of one publisher, for example)
+-- would conflate.
+--
+-- Adding triggered_org_id closes the gap. Populated by the owner-test
+-- write path in evaluate_agent_quality (this PR). Heartbeat / manual /
+-- webhook writes leave it NULL.
+
+ALTER TABLE agent_compliance_runs
+  ADD COLUMN IF NOT EXISTS triggered_org_id TEXT;
+
+COMMENT ON COLUMN agent_compliance_runs.triggered_org_id IS
+  'WorkOS organization ID of the org that triggered the run. Stored as TEXT (no FK) because WorkOS IDs are foreign-system keys — referential integrity against organizations.workos_organization_id is not enforced at the DB layer. Populated only for triggered_by=''owner_test''; heartbeat / manual / webhook rows leave it NULL.';
+
+-- Index supports the derived `agent_context_with_latest_test` view's
+-- per-(org, url) DISTINCT ON lookup. tested_at DESC keeps the latest-row
+-- pull as a single index scan.
+CREATE INDEX IF NOT EXISTS idx_agent_compliance_runs_triggered_org_url_at
+  ON agent_compliance_runs (triggered_org_id, agent_url, tested_at DESC)
+  WHERE triggered_org_id IS NOT NULL;
+
+-- View: agent_context joined with the latest agent_compliance_runs row
+-- scoped to that org via triggered_org_id. Replaces direct reads of
+-- agent_contexts.last_test_* columns.
+--
+-- The columns on agent_contexts stay for backward compat — recordTest()
+-- still writes them for third-party (non-owner) runs, and a follow-up
+-- migration drops them once recordTest() retires (gated on the
+-- agent_test_history drop, which is itself gated on the soak windows
+-- documented in #4247).
+--
+-- last_test_passed: derived from overall_status='passing'.
+-- last_test_scenario: tracks_json[0].track when present, else 'compliance'
+--   (heartbeat/manual writes don't carry the legacy 'quality_evaluation'
+--   scenario string — the closest semantic in the canonical schema is the
+--   first track of the run).
+-- last_test_summary: agent_compliance_runs.headline.
+-- last_tested_at: agent_compliance_runs.tested_at.
+-- total_tests_run: COUNT(*) of agent_compliance_runs rows scoped to the
+--   org+url. Was a per-context counter on the old column; the COUNT-based
+--   derivation matches the new canonical semantics.
+CREATE OR REPLACE VIEW agent_context_with_latest_test AS
+SELECT
+  ac.*,
+  latest.tested_at AS canonical_last_tested_at,
+  latest.overall_status = 'passing' AS canonical_last_test_passed,
+  COALESCE(
+    (latest.tracks_json -> 0 ->> 'track'),
+    'compliance'
+  ) AS canonical_last_test_scenario,
+  latest.headline AS canonical_last_test_summary,
+  COALESCE(run_counts.total, 0) AS canonical_total_tests_run
+FROM agent_contexts ac
+LEFT JOIN LATERAL (
+  SELECT tested_at, overall_status, tracks_json, headline
+  FROM agent_compliance_runs acr
+  WHERE acr.triggered_org_id = ac.organization_id
+    AND acr.agent_url = ac.agent_url
+    AND acr.dry_run = FALSE
+  ORDER BY tested_at DESC
+  LIMIT 1
+) AS latest ON TRUE
+LEFT JOIN LATERAL (
+  SELECT COUNT(*)::INT AS total
+  FROM agent_compliance_runs acr
+  WHERE acr.triggered_org_id = ac.organization_id
+    AND acr.agent_url = ac.agent_url
+    AND acr.dry_run = FALSE
+) AS run_counts ON TRUE;
+
+COMMENT ON VIEW agent_context_with_latest_test IS
+  'Derives last_test_* fields from agent_compliance_runs (triggered_org_id-scoped). Replaces direct reads of agent_contexts.last_test_*. The legacy columns stay for backward compat until recordTest() retires (#4247 PR-after-drop).';