From aa17175750411ea46d65a8a70976c4c687ed0826 Mon Sep 17 00:00:00 2001 From: Emma Mulitz Date: Fri, 8 May 2026 16:28:49 -0400 Subject: [PATCH 1/2] feat(compliance): derive agent_context.last_test_* from canonical runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR 4 of the #4247 unification stack. Adds triggered_org_id to agent_compliance_runs so per-org scoping of the new agent_context_with_latest_test view is accurate. Without it, two orgs that own the same agent URL would conflate test history. Owner-test write path in evaluate_agent_quality populates it from the caller's organizationId; heartbeat/manual/webhook leave it NULL. agent_context_with_latest_test view: agent_contexts.* joined LATERAL to the latest non-dry-run agent_compliance_runs row scoped by (triggered_org_id, agent_url), plus COUNT for total_tests_run. agent-context-db.ts readers (getByOrganization, getById, getByOrgAndUrl) SELECT from the view and alias canonical_last_test_* → last_test_* so callers see no shape change. Legacy columns stay for backward compat — third-party recordTest() writes still hit them (session-scoped audit retained per PR 3). The columns + recordTest retire in the follow-up "final cleanup" PR that drops agent_test_history. Stacked on #4264 → #4263 → #4250. --- ...-agent-context-last-test-from-canonical.md | 37 +++++++++ server/src/addie/mcp/member-tools.ts | 5 ++ server/src/db/agent-context-db.ts | 66 ++++++++++------ server/src/db/compliance-db.ts | 13 +++- ...agent_compliance_runs_triggered_org_id.sql | 77 +++++++++++++++++++ 5 files changed, 173 insertions(+), 25 deletions(-) create mode 100644 .changeset/derive-agent-context-last-test-from-canonical.md create mode 100644 server/src/db/migrations/473_agent_compliance_runs_triggered_org_id.sql diff --git a/.changeset/derive-agent-context-last-test-from-canonical.md b/.changeset/derive-agent-context-last-test-from-canonical.md new file mode 100644 index 0000000000..68668a6c43 --- /dev/null +++ b/.changeset/derive-agent-context-last-test-from-canonical.md @@ -0,0 +1,37 @@ +--- +--- + +PR 4 of the #4247 unification stack. Replaces direct reads of +`agent_contexts.last_test_*` with a view that derives them from +`agent_compliance_runs` — the canonical source PR #4250 unified onto. + +**What changes.** + +- New column `agent_compliance_runs.triggered_org_id` (nullable). Populated + by the owner-test write path in `evaluate_agent_quality` using the + caller's `organizationId`. Heartbeat / manual / webhook writes leave it + NULL — they don't have an org dimension. Without this column, two orgs + that own the same agent URL (e.g. staging and prod orgs of one publisher) + would conflate their test history through a join on `agent_url` alone. +- New view `agent_context_with_latest_test`: `agent_contexts.*` joined to + the latest non-dry-run `agent_compliance_runs` row scoped by + `(triggered_org_id, agent_url)` via `LEFT JOIN LATERAL`, plus a COUNT + scalar subquery for `total_tests_run`. Surfaces the derived fields as + `canonical_last_test_*` so the column-rename in the SELECT is explicit. +- `AgentContextDatabase.getByOrganization`, `getById`, `getByOrgAndUrl` + now SELECT from the view and alias `canonical_last_test_*` → + `last_test_*` so callers see no shape change. + +**Backward compat.** The legacy `agent_contexts.last_test_*` columns stay. +Third-party (non-owner) `recordTest()` writes still update them — that's +the session-scoped audit trail PR 3 of #4247 retained for non-owner runs. +The columns become dead-letter once `agent_test_history` is dropped (gated +on the soak windows in #4247) and `recordTest()` retires in the follow-up +"final cleanup" PR. + +**Index.** `idx_agent_compliance_runs_triggered_org_url_at` on +`(triggered_org_id, agent_url, tested_at DESC)` (partial, only where +`triggered_org_id IS NOT NULL`) supports the view's per-org `DISTINCT ON` +lookup as a single index scan. + +**Stacked on** #4264 (PR 3) → #4263 (PR 2) → #4250 (PR 1). diff --git a/server/src/addie/mcp/member-tools.ts b/server/src/addie/mcp/member-tools.ts index e3b4733cf8..8d9ff72bce 100644 --- a/server/src/addie/mcp/member-tools.ts +++ b/server/src/addie/mcp/member-tools.ts @@ -3602,6 +3602,11 @@ export function createMemberToolHandlers( // Owner test runs are not dry runs — they update the live public record. // (complianceResultToDbInput hard-codes dry_run: true; override here.) dry_run: false, + // Org scope for the per-org `agent_context_with_latest_test` view. + // Without this, two orgs that own the same agent URL (staging vs + // prod orgs of one publisher) would conflate their test history. + // See migration 473. + triggered_org_id: organizationId, }; await complianceDb.recordComplianceRun(dbInput); // notifyComplianceChange intentionally omitted: owner test runs are diff --git a/server/src/db/agent-context-db.ts b/server/src/db/agent-context-db.ts index 164fcda049..3e72998d84 100644 --- a/server/src/db/agent-context-db.ts +++ b/server/src/db/agent-context-db.ts @@ -182,15 +182,20 @@ export class AgentContextDatabase { AND oauth_cc_client_secret_encrypted IS NOT NULL) as has_oauth_client_credentials, tools_discovered, last_discovered_at, - last_test_scenario, - last_test_passed, - last_test_summary, - last_tested_at, - total_tests_run, + -- Derived from agent_compliance_runs via the view (canonical source). + -- The legacy agent_contexts.last_test_* columns stay for backward + -- compat with non-owner third-party writes through recordTest, but + -- reads come from the view so the unification is consistent. See + -- migration 473. + canonical_last_test_scenario AS last_test_scenario, + canonical_last_test_passed AS last_test_passed, + canonical_last_test_summary AS last_test_summary, + canonical_last_tested_at AS last_tested_at, + canonical_total_tests_run AS total_tests_run, created_at, updated_at, created_by - FROM agent_contexts + FROM agent_context_with_latest_test WHERE organization_id = $1 ORDER BY updated_at DESC`, [organizationId] @@ -222,15 +227,20 @@ export class AgentContextDatabase { AND oauth_cc_client_secret_encrypted IS NOT NULL) as has_oauth_client_credentials, tools_discovered, last_discovered_at, - last_test_scenario, - last_test_passed, - last_test_summary, - last_tested_at, - total_tests_run, + -- Derived from agent_compliance_runs via the view (canonical source). + -- The legacy agent_contexts.last_test_* columns stay for backward + -- compat with non-owner third-party writes through recordTest, but + -- reads come from the view so the unification is consistent. See + -- migration 473. + canonical_last_test_scenario AS last_test_scenario, + canonical_last_test_passed AS last_test_passed, + canonical_last_test_summary AS last_test_summary, + canonical_last_tested_at AS last_tested_at, + canonical_total_tests_run AS total_tests_run, created_at, updated_at, created_by - FROM agent_contexts + FROM agent_context_with_latest_test WHERE id = $1`, [id] ); @@ -261,15 +271,20 @@ export class AgentContextDatabase { AND oauth_cc_client_secret_encrypted IS NOT NULL) as has_oauth_client_credentials, tools_discovered, last_discovered_at, - last_test_scenario, - last_test_passed, - last_test_summary, - last_tested_at, - total_tests_run, + -- Derived from agent_compliance_runs via the view (canonical source). + -- The legacy agent_contexts.last_test_* columns stay for backward + -- compat with non-owner third-party writes through recordTest, but + -- reads come from the view so the unification is consistent. See + -- migration 473. + canonical_last_test_scenario AS last_test_scenario, + canonical_last_test_passed AS last_test_passed, + canonical_last_test_summary AS last_test_summary, + canonical_last_tested_at AS last_tested_at, + canonical_total_tests_run AS total_tests_run, created_at, updated_at, created_by - FROM agent_contexts + FROM agent_context_with_latest_test WHERE organization_id = $1 AND agent_url = $2`, [organizationId, agentUrl] ); @@ -305,11 +320,16 @@ export class AgentContextDatabase { FALSE as has_oauth_client, tools_discovered, last_discovered_at, - last_test_scenario, - last_test_passed, - last_test_summary, - last_tested_at, - total_tests_run, + -- Derived from agent_compliance_runs via the view (canonical source). + -- The legacy agent_contexts.last_test_* columns stay for backward + -- compat with non-owner third-party writes through recordTest, but + -- reads come from the view so the unification is consistent. See + -- migration 473. + canonical_last_test_scenario AS last_test_scenario, + canonical_last_test_passed AS last_test_passed, + canonical_last_test_summary AS last_test_summary, + canonical_last_tested_at AS last_tested_at, + canonical_total_tests_run AS total_tests_run, created_at, updated_at, created_by`, diff --git a/server/src/db/compliance-db.ts b/server/src/db/compliance-db.ts index d3770fb889..94c857f9d2 100644 --- a/server/src/db/compliance-db.ts +++ b/server/src/db/compliance-db.ts @@ -184,6 +184,14 @@ export interface RecordComplianceRunInput { agent_profile_json?: any; observations_json?: any; triggered_by?: TriggeredBy; + /** + * WorkOS organization id of the org that triggered the run. Populated only + * for triggered_by='owner_test'; heartbeat / manual / webhook leave it NULL. + * Required for the per-org scoping of `agent_context_with_latest_test` so + * two orgs that own the same agent (e.g. staging vs prod orgs of one + * publisher) don't conflate their test history. See migration 473. + */ + triggered_org_id?: string | null; dry_run?: boolean; storyboard_statuses?: StoryboardStatusEntry[]; } @@ -265,8 +273,8 @@ export class ComplianceDatabase { agent_url, lifecycle_stage, overall_status, headline, total_duration_ms, tracks_json, tracks_passed, tracks_failed, tracks_skipped, tracks_partial, agent_profile_json, - observations_json, triggered_by, dry_run - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14) + observations_json, triggered_by, triggered_org_id, dry_run + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15) RETURNING *`, [ input.agent_url, @@ -282,6 +290,7 @@ export class ComplianceDatabase { input.agent_profile_json ? JSON.stringify(input.agent_profile_json) : null, input.observations_json ? JSON.stringify(input.observations_json) : null, input.triggered_by ?? 'heartbeat', + input.triggered_org_id ?? null, input.dry_run ?? true, ], ); diff --git a/server/src/db/migrations/473_agent_compliance_runs_triggered_org_id.sql b/server/src/db/migrations/473_agent_compliance_runs_triggered_org_id.sql new file mode 100644 index 0000000000..7c07a2435a --- /dev/null +++ b/server/src/db/migrations/473_agent_compliance_runs_triggered_org_id.sql @@ -0,0 +1,77 @@ +-- Migration 473: add triggered_org_id to agent_compliance_runs and create +-- agent_context_with_latest_test view. +-- +-- Part of the #4247 compliance-state unification (PR 4 of 4). Today, +-- agent_contexts.last_test_* columns carry the most-recent test verdict +-- per (organization_id, agent_url). After PR #4250 owner runs write +-- canonical state via agent_compliance_runs, but that table tracks only +-- agent_url — there's no org dimension, so a derived "latest owner test" +-- can't be accurately scoped per org. Two orgs that own the same agent +-- (rare but possible — staging vs prod org of one publisher, for example) +-- would conflate. +-- +-- Adding triggered_org_id closes the gap. Populated by the owner-test +-- write path in evaluate_agent_quality (this PR). Heartbeat / manual / +-- webhook writes leave it NULL. + +ALTER TABLE agent_compliance_runs + ADD COLUMN IF NOT EXISTS triggered_org_id TEXT; + +-- Index supports the derived `agent_context_with_latest_test` view's +-- per-(org, url) DISTINCT ON lookup. tested_at DESC keeps the latest-row +-- pull as a single index scan. +CREATE INDEX IF NOT EXISTS idx_agent_compliance_runs_triggered_org_url_at + ON agent_compliance_runs (triggered_org_id, agent_url, tested_at DESC) + WHERE triggered_org_id IS NOT NULL; + +-- View: agent_context joined with the latest agent_compliance_runs row +-- scoped to that org via triggered_org_id. Replaces direct reads of +-- agent_contexts.last_test_* columns. +-- +-- The columns on agent_contexts stay for backward compat — recordTest() +-- still writes them for third-party (non-owner) runs, and a follow-up +-- migration drops them once recordTest() retires (gated on the +-- agent_test_history drop, which is itself gated on the soak windows +-- documented in #4247). +-- +-- last_test_passed: derived from overall_status='passing'. +-- last_test_scenario: tracks_json[0].track when present, else 'compliance' +-- (heartbeat/manual writes don't carry the legacy 'quality_evaluation' +-- scenario string — the closest semantic in the canonical schema is the +-- first track of the run). +-- last_test_summary: agent_compliance_runs.headline. +-- last_tested_at: agent_compliance_runs.tested_at. +-- total_tests_run: COUNT(*) of agent_compliance_runs rows scoped to the +-- org+url. Was a per-context counter on the old column; the COUNT-based +-- derivation matches the new canonical semantics. +CREATE OR REPLACE VIEW agent_context_with_latest_test AS +SELECT + ac.*, + latest.tested_at AS canonical_last_tested_at, + latest.overall_status = 'passing' AS canonical_last_test_passed, + COALESCE( + (latest.tracks_json -> 0 ->> 'track'), + 'compliance' + ) AS canonical_last_test_scenario, + latest.headline AS canonical_last_test_summary, + COALESCE(run_counts.total, 0) AS canonical_total_tests_run +FROM agent_contexts ac +LEFT JOIN LATERAL ( + SELECT tested_at, overall_status, tracks_json, headline + FROM agent_compliance_runs acr + WHERE acr.triggered_org_id = ac.organization_id + AND acr.agent_url = ac.agent_url + AND acr.dry_run = FALSE + ORDER BY tested_at DESC + LIMIT 1 +) AS latest ON TRUE +LEFT JOIN LATERAL ( + SELECT COUNT(*)::INT AS total + FROM agent_compliance_runs acr + WHERE acr.triggered_org_id = ac.organization_id + AND acr.agent_url = ac.agent_url + AND acr.dry_run = FALSE +) AS run_counts ON TRUE; + +COMMENT ON VIEW agent_context_with_latest_test IS + 'Derives last_test_* fields from agent_compliance_runs (triggered_org_id-scoped). Replaces direct reads of agent_contexts.last_test_*. The legacy columns stay for backward compat until recordTest() retires (#4247 PR-after-drop).'; From 1103d56e8208f80de3ec85fa821be308ca6c6ce9 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Fri, 8 May 2026 20:05:32 -0400 Subject: [PATCH 2/2] nit: comment on triggered_org_id TEXT rationale; call out last_test_scenario semantic shift in changeset --- .../derive-agent-context-last-test-from-canonical.md | 8 ++++++++ .../473_agent_compliance_runs_triggered_org_id.sql | 3 +++ 2 files changed, 11 insertions(+) diff --git a/.changeset/derive-agent-context-last-test-from-canonical.md b/.changeset/derive-agent-context-last-test-from-canonical.md index 68668a6c43..6e0ca3e7b4 100644 --- a/.changeset/derive-agent-context-last-test-from-canonical.md +++ b/.changeset/derive-agent-context-last-test-from-canonical.md @@ -29,6 +29,14 @@ The columns become dead-letter once `agent_test_history` is dropped (gated on the soak windows in #4247) and `recordTest()` retires in the follow-up "final cleanup" PR. +**Semantic shift (last_test_scenario).** For owner test runs, +`last_test_scenario` now returns `tracks_json[0].track` (e.g. +`'quality_evaluation'`) rather than the literal string the old +`recordTest()` write path stored directly. No existing callers branch on +this value, but downstream consumers that read `last_test_scenario` should +expect a track name sourced from the canonical run record rather than the +legacy scenario string. + **Index.** `idx_agent_compliance_runs_triggered_org_url_at` on `(triggered_org_id, agent_url, tested_at DESC)` (partial, only where `triggered_org_id IS NOT NULL`) supports the view's per-org `DISTINCT ON` diff --git a/server/src/db/migrations/473_agent_compliance_runs_triggered_org_id.sql b/server/src/db/migrations/473_agent_compliance_runs_triggered_org_id.sql index 7c07a2435a..6344a41478 100644 --- a/server/src/db/migrations/473_agent_compliance_runs_triggered_org_id.sql +++ b/server/src/db/migrations/473_agent_compliance_runs_triggered_org_id.sql @@ -17,6 +17,9 @@ ALTER TABLE agent_compliance_runs ADD COLUMN IF NOT EXISTS triggered_org_id TEXT; +COMMENT ON COLUMN agent_compliance_runs.triggered_org_id IS + 'WorkOS organization ID of the org that triggered the run. Stored as TEXT (no FK) because WorkOS IDs are foreign-system keys — referential integrity against organizations.workos_organization_id is not enforced at the DB layer. Populated only for triggered_by=''owner_test''; heartbeat / manual / webhook rows leave it NULL.'; + -- Index supports the derived `agent_context_with_latest_test` view's -- per-(org, url) DISTINCT ON lookup. tested_at DESC keeps the latest-row -- pull as a single index scan.