From 91435cac45b7be29c80aee71c5fd5bb9202c63f9 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 8 May 2026 19:00:50 +0000
Subject: [PATCH 1/5] fix(addie): owner evaluate_agent_quality writes to
 canonical compliance state

Closes the 12-hour gap between owner-triggered storyboard runs and the public
/api/registry/agents/:url/compliance endpoint (issue #4247, PR 1 of 4).

When evaluate_agent_quality is triggered by the agent owner, the result is now
written to agent_compliance_status + agent_compliance_runs + agent_storyboard_status
with triggered_by = 'owner_test'. Non-owner runs continue writing to agent_test_history
(deprecated in PR 3). Migration 471 adds 'owner_test' to both triggered_by CHECK
constraints. notifyComplianceChange is intentionally suppressed for owner runs to
prevent iteration-loop Slack spam.

https://claude.ai/code/session_01UNHkGhBXk9XD2dpzvSLdhb
---
 .changeset/unify-owner-compliance-writes.md   | 10 ++++
 server/src/addie/config-version.ts            |  2 +-
 server/src/addie/mcp/member-tools.ts          | 56 ++++++++++++++++++-
 server/src/db/compliance-db.ts                |  2 +-
 .../471_owner_test_triggered_by.sql           | 16 ++++++
 5 files changed, 83 insertions(+), 3 deletions(-)
 create mode 100644 .changeset/unify-owner-compliance-writes.md
 create mode 100644 server/src/db/migrations/471_owner_test_triggered_by.sql

diff --git a/.changeset/unify-owner-compliance-writes.md b/.changeset/unify-owner-compliance-writes.md
new file mode 100644
index 0000000000..1bdb5c7f9e
--- /dev/null
+++ b/.changeset/unify-owner-compliance-writes.md
@@ -0,0 +1,10 @@
+---
+---
+
+PR 1 of 4 in the compliance-state unification initiative (issue #4247): owner-triggered
+`evaluate_agent_quality` runs now write to canonical compliance tables
+(`agent_compliance_status`, `agent_compliance_runs`, `agent_storyboard_status`) with
+`triggered_by = 'owner_test'`, closing the 12-hour gap between owner tests and the
+public `/api/registry/agents/:url/compliance` endpoint. Non-owner runs continue
+writing to `agent_test_history` (deprecated in PR 3). Adds `'owner_test'` to both
+`triggered_by` CHECK constraints via migration 471.
diff --git a/server/src/addie/config-version.ts b/server/src/addie/config-version.ts
index f184b82fcd..bf00b2c4d6 100644
--- a/server/src/addie/config-version.ts
+++ b/server/src/addie/config-version.ts
@@ -30,7 +30,7 @@ import { loadRules, loadResponseStyle } from './rules/index.js';
  * Format: YYYY.MM.N where N is incremented for multiple changes in a month
  * Example: 2025.01.1, 2025.01.2, 2025.02.1
  */
-export const CODE_VERSION = '2026.04.6';
+export const CODE_VERSION = '2026.05.1';
 
 // Types
 export interface ConfigVersion {
diff --git a/server/src/addie/mcp/member-tools.ts b/server/src/addie/mcp/member-tools.ts
index a37b108c1d..d2046c5e7c 100644
--- a/server/src/addie/mcp/member-tools.ts
+++ b/server/src/addie/mcp/member-tools.ts
@@ -34,6 +34,7 @@ import {
   SAMPLE_BRIEFS,
   classifyCapabilityResolutionError,
   presentCapabilityResolutionError,
+  complianceResultToDbInput,
   type ComplyOptions,
   type ComplianceTrack,
 } from '../services/compliance-testing.js';
@@ -3559,8 +3560,61 @@ export function createMemberToolHandlers(
         );
       }
 
-      // Record result if the user has an org with this agent saved
+      // Record result when the user has an org context for this agent.
       if (organizationId) {
+        // Write to canonical compliance tables when the calling org owns this agent.
+        // Mirrors resolveAgentOwnerOrg (registry-api.ts:4733) — joins organization_memberships
+        // to verify the acting user is still an active member of the owning org.
+        // Non-owner runs skip the canonical write and fall through to the legacy
+        // agent_test_history path below.
+        const workosUserId = memberContext?.workos_user?.workos_user_id;
+        let isAgentOwner = false;
+        if (workosUserId) {
+          try {
+            const ownerCheck = await query(
+              `SELECT 1 FROM member_profiles mp
+               JOIN organization_memberships om
+                 ON om.workos_organization_id = mp.workos_organization_id
+               WHERE mp.workos_organization_id = $1
+                 AND mp.agents @> $2::jsonb
+                 AND om.workos_user_id = $3
+               LIMIT 1`,
+              [organizationId, JSON.stringify([{ url: resolved.resolvedUrl }]), workosUserId],
+            );
+            isAgentOwner = ownerCheck.rows.length > 0;
+          } catch (ownerCheckError) {
+            logger.warn({ ownerCheckError }, 'evaluate_agent_quality: owner check failed, skipping canonical write');
+          }
+        }
+
+        if (isAgentOwner) {
+          try {
+            const metadata = await complianceDb.getRegistryMetadata(resolved.resolvedUrl);
+            // Skip canonical write if the owner has opted out of compliance monitoring.
+            if (!metadata?.compliance_opt_out) {
+              const dbInput = {
+                ...complianceResultToDbInput(
+                  result,
+                  resolved.resolvedUrl,
+                  metadata?.lifecycle_stage ?? 'production',
+                  'owner_test',
+                ),
+                // Owner test runs are not dry runs — they update the live public record.
+                // (complianceResultToDbInput hard-codes dry_run: true; override here.)
+                dry_run: false,
+              };
+              await complianceDb.recordComplianceRun(dbInput);
+              // notifyComplianceChange intentionally omitted: owner test runs are
+              // exploratory; compliance-change notifications fire on heartbeat
+              // transitions only to prevent iteration-loop spam.
+            }
+          } catch (error) {
+            logger.warn({ error, agentUrl: resolved.resolvedUrl }, 'Could not write owner test result to canonical compliance state');
+          }
+        }
+
+        // Legacy write to agent_contexts + agent_test_history. Retained for
+        // backward compatibility until PR 3 migrates callers and drops the table.
         try {
           const context = await agentContextDb.getByOrgAndUrl(organizationId, resolved.resolvedUrl);
           if (context) {
diff --git a/server/src/db/compliance-db.ts b/server/src/db/compliance-db.ts
index 236abff905..0ff9180249 100644
--- a/server/src/db/compliance-db.ts
+++ b/server/src/db/compliance-db.ts
@@ -11,7 +11,7 @@ const logger = baseLogger.child({ module: 'compliance-db' });
 export type LifecycleStage = 'development' | 'testing' | 'production' | 'deprecated';
 export type ComplianceStatus = 'passing' | 'degraded' | 'failing' | 'unknown';
 export type OverallRunStatus = 'passing' | 'failing' | 'partial';
-export type TriggeredBy = 'heartbeat' | 'manual' | 'webhook';
+export type TriggeredBy = 'heartbeat' | 'manual' | 'webhook' | 'owner_test';
 export type TrackStatus = 'pass' | 'fail' | 'partial' | 'skip' | 'silent';
 
 /**
diff --git a/server/src/db/migrations/471_owner_test_triggered_by.sql b/server/src/db/migrations/471_owner_test_triggered_by.sql
new file mode 100644
index 0000000000..0e450bb1ad
--- /dev/null
+++ b/server/src/db/migrations/471_owner_test_triggered_by.sql
@@ -0,0 +1,16 @@
+-- Add 'owner_test' to triggered_by CHECK constraints in compliance tables.
+-- Owner-triggered storyboard runs (via evaluate_agent_quality) now write to
+-- canonical compliance state, distinguished from heartbeat and dashboard-manual
+-- runs by triggered_by = 'owner_test'. See issue #4247.
+
+ALTER TABLE agent_compliance_runs
+  DROP CONSTRAINT IF EXISTS valid_triggered_by,
+  ADD CONSTRAINT valid_triggered_by CHECK (
+    triggered_by IN ('heartbeat', 'manual', 'webhook', 'owner_test')
+  );
+
+ALTER TABLE agent_storyboard_status
+  DROP CONSTRAINT IF EXISTS valid_storyboard_triggered_by,
+  ADD CONSTRAINT valid_storyboard_triggered_by CHECK (
+    triggered_by IS NULL OR triggered_by IN ('heartbeat', 'manual', 'webhook', 'owner_test')
+  );

From 705914717d58c04c795fad6512c6a4b78defe8d9 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 8 May 2026 19:02:21 +0000
Subject: [PATCH 2/5] chore(dist): add generated onboarding-openapi schema
 files

Build-generated output produced by npm run build; matches the tracking
pattern of member-agents-openapi.js and registry.js already in dist/schemas/.

https://claude.ai/code/session_01UNHkGhBXk9XD2dpzvSLdhb
---
 dist/schemas/onboarding-openapi.d.ts     |  27 +++++
 dist/schemas/onboarding-openapi.d.ts.map |   1 +
 dist/schemas/onboarding-openapi.js       | 135 +++++++++++++++++++++++
 dist/schemas/onboarding-openapi.js.map   |   1 +
 4 files changed, 164 insertions(+)
 create mode 100644 dist/schemas/onboarding-openapi.d.ts
 create mode 100644 dist/schemas/onboarding-openapi.d.ts.map
 create mode 100644 dist/schemas/onboarding-openapi.js
 create mode 100644 dist/schemas/onboarding-openapi.js.map

diff --git a/dist/schemas/onboarding-openapi.d.ts b/dist/schemas/onboarding-openapi.d.ts
new file mode 100644
index 0000000000..ed29eaf6de
--- /dev/null
+++ b/dist/schemas/onboarding-openapi.d.ts
@@ -0,0 +1,27 @@
+/**
+ * OpenAPI registrations for the onboarding REST surface.
+ *
+ * `POST /api/organizations` has existed in production for a long time but
+ * has only ever been documented as a private endpoint exercised by the AAO
+ * dashboard's `/onboarding` form. Surfacing it in the public spec is the
+ * minimum-surface answer to the storefront-bootstrap question: a
+ * third-party app holding only a user's OAuth token needs *one* documented
+ * call to materialize the org, then `POST /api/me/agents` to land an agent
+ * (which auto-creates the member profile on first call).
+ *
+ * Two fields the handler accepts but the public schema deliberately omits:
+ *
+ * - `membership_tier` — owned exclusively by the Stripe webhook. Accepting
+ *   it from the caller would let any user stamp tier intent on their org
+ *   row, leaking tier-gated UI state until/unless a real subscription
+ *   overwrites the column.
+ * - `corporate_domain` — server derives the value from the authenticated
+ *   user's email. Accepting it as a field invited 400s when a caller's
+ *   value disagreed with their email and gave nothing back when it agreed.
+ *
+ * Kept in its own module so the spec generator's import graph stays free
+ * of route handlers (each route file's transitive imports pull in WorkOS
+ * init, which fails at module load without env vars).
+ */
+export {};
+//# sourceMappingURL=onboarding-openapi.d.ts.map
\ No newline at end of file
diff --git a/dist/schemas/onboarding-openapi.d.ts.map b/dist/schemas/onboarding-openapi.d.ts.map
new file mode 100644
index 0000000000..9e449fbf0f
--- /dev/null
+++ b/dist/schemas/onboarding-openapi.d.ts.map
@@ -0,0 +1 @@
+{"version":3,"file":"onboarding-openapi.d.ts","sourceRoot":"","sources":["../../server/src/schemas/onboarding-openapi.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG"}
\ No newline at end of file
diff --git a/dist/schemas/onboarding-openapi.js b/dist/schemas/onboarding-openapi.js
new file mode 100644
index 0000000000..d3e3074486
--- /dev/null
+++ b/dist/schemas/onboarding-openapi.js
@@ -0,0 +1,135 @@
+/**
+ * OpenAPI registrations for the onboarding REST surface.
+ *
+ * `POST /api/organizations` has existed in production for a long time but
+ * has only ever been documented as a private endpoint exercised by the AAO
+ * dashboard's `/onboarding` form. Surfacing it in the public spec is the
+ * minimum-surface answer to the storefront-bootstrap question: a
+ * third-party app holding only a user's OAuth token needs *one* documented
+ * call to materialize the org, then `POST /api/me/agents` to land an agent
+ * (which auto-creates the member profile on first call).
+ *
+ * Two fields the handler accepts but the public schema deliberately omits:
+ *
+ * - `membership_tier` — owned exclusively by the Stripe webhook. Accepting
+ *   it from the caller would let any user stamp tier intent on their org
+ *   row, leaking tier-gated UI state until/unless a real subscription
+ *   overwrites the column.
+ * - `corporate_domain` — server derives the value from the authenticated
+ *   user's email. Accepting it as a field invited 400s when a caller's
+ *   value disagreed with their email and gave nothing back when it agreed.
+ *
+ * Kept in its own module so the spec generator's import graph stays free
+ * of route handlers (each route file's transitive imports pull in WorkOS
+ * init, which fails at module load without env vars).
+ */
+import { z } from 'zod';
+import { registry, ErrorSchema } from './registry.js';
+const OrganizationCompanyTypeSchema = z
+    .enum(['adtech', 'agency', 'brand', 'publisher', 'data', 'ai', 'other'])
+    .openapi('OrganizationCompanyType', {
+    description: "Coarse classification of the organization's role in the open ad ecosystem. Drives default verification badges and the member profile's display category.",
+});
+const OrganizationRevenueTierSchema = z
+    .enum(['under_1m', '1m_5m', '5m_50m', '50m_250m', '250m_1b', '1b_plus'])
+    .openapi('OrganizationRevenueTier', {
+    description: 'Annual revenue band, USD. Drives membership-tier eligibility for company-tier seats.',
+});
+const CreateOrganizationInputSchema = z
+    .object({
+    organization_name: z.string().min(1).max(200).openapi({
+        description: "Display name for the organization. Used both as the org row name and (when auto-bootstrapping a member profile via the first agent registration) as the profile's `display_name`.",
+        example: 'Acme Media',
+    }),
+    is_personal: z.boolean().optional().openapi({
+        description: 'Set to `true` to create a personal workspace instead of a corporate organization. Personal workspaces skip corporate-domain verification, are limited to one per user, and cannot host the `company_*` membership tiers.',
+        default: false,
+    }),
+    company_type: OrganizationCompanyTypeSchema.optional(),
+    revenue_tier: OrganizationRevenueTierSchema.optional(),
+    marketing_opt_in: z.boolean().optional().openapi({
+        description: 'Whether the caller opted in to AAO marketing communications. Recorded once per user (not overwritten on subsequent calls). Independent of Terms-of-Service consent, which is recorded server-side from the request context.',
+        default: false,
+    }),
+})
+    .openapi('CreateOrganizationInput', {
+    description: [
+        'Request body for `POST /api/organizations`.',
+        "Bootstraps a WorkOS organization, mirrors the caller as `owner`, records the caller's ToS / privacy-policy acceptance, and (for non-personal orgs) inserts an email-verified record into `organization_domains` so subsequent registry calls can skip explicit domain-verification.",
+        "Membership tier and corporate domain are *not* caller-supplied: the tier is set by the Stripe webhook on subscription events, and the corporate domain is derived from the authenticated user's email.",
+    ].join('\n\n'),
+});
+const CreateOrganizationResponseSchema = z
+    .object({
+    success: z.boolean().optional(),
+    organization: z
+        .object({
+        id: z.string().openapi({ example: 'org_01HXZAB123' }),
+        name: z.string().openapi({ example: 'Acme Media' }),
+    })
+        .optional(),
+    id: z.string().optional().openapi({
+        description: "Set on the **prospect-adoption** path: when an org with the user's email domain already exists in a `prospect` state (i.e. the registry pre-recorded it from a brand crawl but no human had claimed it yet), this call adopts that org for the caller instead of creating a new one.",
+    }),
+    name: z.string().optional(),
+    adopted: z.boolean().optional().openapi({
+        description: '`true` when the response is the prospect-adoption path. When `true`, no new WorkOS organization was created — the caller is now the owner of an existing prospect record.',
+    }),
+})
+    .openapi('CreateOrganizationResponse', {
+    description: 'Response from `POST /api/organizations`. The body shape varies by path: a fresh creation returns `{ success: true, organization: { id, name } }`; a prospect adoption returns `{ id, name, adopted: true }` directly. Both paths are 2xx; downstream callers should treat any `2xx` as "the org now exists and you are an owner of it" and read whichever id is present.',
+});
+registry.registerPath({
+    method: 'post',
+    path: '/api/organizations',
+    operationId: 'createOrganization',
+    summary: 'Create or adopt my organization',
+    description: [
+        "Bootstrap the caller's organization explicitly. Use this when the caller wants to control the organization name, `company_type`, `revenue_tier`, or `is_personal` flag before any agents are registered.",
+        "**Most storefront-style integrations don't need this call** — `POST /api/me/agents` will auto-create an org for a fresh OAuth user (corporate or personal workspace based on the email domain) and surface `org_auto_created: true` in the response. Reach for `POST /api/organizations` only when the auto-derived defaults aren't acceptable.",
+        'Three outcomes depending on the caller\'s state:',
+        "- **Fresh create** (most common): a new WorkOS organization is created, the caller is added as `owner`, the corporate domain is recorded as email-verified, and ToS / privacy-policy acceptance is logged from the request context. Returns `{ success: true, organization: { id, name } }`.",
+        "- **Prospect adoption**: an organization with the caller's email domain already exists as a `prospect` (the registry pre-recorded it from a brand crawl but no human had claimed it yet). The caller is promoted to `owner` of the existing record instead of forking a duplicate. Returns `{ id, name, adopted: true }`.",
+        '- **Already-active conflict**: the org exists and is already claimed by another paying member or a previously joined user. Returns `409` with the existing org id so the caller can switch to a join-request flow (`POST /api/organizations/:orgId/join-requests`) instead of trying to register a duplicate.',
+        'Tier transitions happen via the billing flow only — there is no `membership_tier` field on this endpoint. After org creation, send the user to `POST /api/checkout-session` (or the dashboard `/membership` page) to start a subscription; the Stripe webhook is the sole writer of `organizations.membership_tier`.',
+        'Rate-limited per user: `15` failed attempts per hour; successful calls do not count against the limit so a legitimate registration is never penalized by earlier validation errors.',
+    ].join('\n\n'),
+    tags: ['Onboarding'],
+    security: [{ bearerAuth: [] }, { oauth2: [] }],
+    request: {
+        body: { content: { 'application/json': { schema: CreateOrganizationInputSchema } } },
+    },
+    responses: {
+        200: {
+            description: 'Prospect adoption — an existing prospect organization for this domain was claimed by the caller. Body is `{ id, name, adopted: true }`.',
+            content: { 'application/json': { schema: CreateOrganizationResponseSchema } },
+        },
+        201: {
+            description: 'New organization created. Body is `{ success: true, organization: { id, name } }`. The caller is the `owner`; the corporate domain is recorded as email-verified for downstream registry calls.',
+            content: { 'application/json': { schema: CreateOrganizationResponseSchema } },
+        },
+        400: {
+            description: [
+                'One of:',
+                '- `organization_name` missing or invalid',
+                '- `company_type` / `revenue_tier` value not in the documented enum',
+                "- caller is on a personal-email domain (gmail.com, yahoo.com, …) and is trying to register a corporate org — register `is_personal: true` instead",
+                '- per-user organization cap reached (10 orgs per user)',
+            ].join('\n'),
+            content: { 'application/json': { schema: ErrorSchema } },
+        },
+        401: {
+            description: 'Authentication required',
+            content: { 'application/json': { schema: ErrorSchema } },
+        },
+        409: {
+            description: "An active organization already exists for this caller's email domain. The body includes `existing_org_id` and `existing_org_name`; the caller should switch to the join-request flow rather than retrying.",
+            content: { 'application/json': { schema: ErrorSchema } },
+        },
+        429: {
+            description: 'Rate limit exceeded — 15 failed attempts per hour per user. Successful calls do not count against the limit.',
+            content: { 'application/json': { schema: ErrorSchema } },
+        },
+    },
+});
+//# sourceMappingURL=onboarding-openapi.js.map
\ No newline at end of file
diff --git a/dist/schemas/onboarding-openapi.js.map b/dist/schemas/onboarding-openapi.js.map
new file mode 100644
index 0000000000..9fb894c315
--- /dev/null
+++ b/dist/schemas/onboarding-openapi.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"onboarding-openapi.js","sourceRoot":"","sources":["../../server/src/schemas/onboarding-openapi.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAEtD,MAAM,6BAA6B,GAAG,CAAC;KACpC,IAAI,CAAC,CAAC,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;KACvE,OAAO,CAAC,yBAAyB,EAAE;IAClC,WAAW,EACT,0JAA0J;CAC7J,CAAC,CAAC;AAEL,MAAM,6BAA6B,GAAG,CAAC;KACpC,IAAI,CAAC,CAAC,UAAU,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;KACvE,OAAO,CAAC,yBAAyB,EAAE;IAClC,WAAW,EACT,sFAAsF;CACzF,CAAC,CAAC;AAEL,MAAM,6BAA6B,GAAG,CAAC;KACpC,MAAM,CAAC;IACN,iBAAiB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC;QACpD,WAAW,EACT,mLAAmL;QACrL,OAAO,EAAE,YAAY;KACtB,CAAC;IACF,WAAW,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC;QAC1C,WAAW,EACT,0NAA0N;QAC5N,OAAO,EAAE,KAAK;KACf,CAAC;IACF,YAAY,EAAE,6BAA6B,CAAC,QAAQ,EAAE;IACtD,YAAY,EAAE,6BAA6B,CAAC,QAAQ,EAAE;IACtD,gBAAgB,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC;QAC/C,WAAW,EACT,6NAA6N;QAC/N,OAAO,EAAE,KAAK;KACf,CAAC;CACH,CAAC;KACD,OAAO,CAAC,yBAAyB,EAAE;IAClC,WAAW,EAAE;QACX,6CAA6C;QAC7C,qRAAqR;QACrR,wMAAwM;KACzM,CAAC,IAAI,CAAC,MAAM,CAAC;CACf,CAAC,CAAC;AAEL,MAAM,gCAAgC,GAAG,CAAC;KACvC,MAAM,CAAC;IACN,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;IAC/B,YAAY,EAAE,CAAC;SACZ,MAAM,CAAC;QACN,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,EAAE,OAAO,EAAE,gBAAgB,EAAE,CAAC;QACrD,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC;KACpD,CAAC;SACD,QAAQ,EAAE;IACb,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC;QAChC,WAAW,EACT,sRAAsR;KACzR,CAAC;IACF,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC3B,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC;QACtC,WAAW,EACT,2KAA2K;KAC9K,CAAC;CACH,CAAC;KACD,OAAO,CAAC,4BAA4B,EAAE;IACrC,WAAW,EACT,0WAA0W;CAC7W,CAAC,CAAC;AAEL,QAAQ,CAAC,YAAY,CAAC;IACpB,MAAM,EAAE,MAAM;IACd,IAAI,EAAE,oBAAoB;IAC1B,WAAW,EAAE,oBAAoB;IACjC,OAAO,EAAE,iCAAiC;IAC1C,WAAW,EAAE;QACX,0MAA0M;QAC1M,iVAAiV;QACjV,kDAAkD;QAClD,8RAA8R;QAC9R,2TAA2T;QAC3T,+SAA+S;QAC/S,sTAAsT;QACtT,qLAAqL;KACtL,CAAC,IAAI,CAAC,MAAM,CAAC;IACd,IAAI,EAAE,CAAC,YAAY,CAAC;IACpB,QAAQ,EAAE,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;IAC9C,OAAO,EAAE;QACP,IAAI,EAAE,EAAE,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,6BAA6B,EAAE,EAAE,EAAE;KACrF;IACD,SAAS,EAAE;QACT,GAAG,EAAE;YACH,WAAW,EACT,yIAAyI;YAC3I,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,gCAAgC,EAAE,EAAE;SAC9E;QACD,GAAG,EAAE;YACH,WAAW,EACT,iMAAiM;YACnM,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,gCAAgC,EAAE,EAAE;SAC9E;QACD,GAAG,EAAE;YACH,WAAW,EAAE;gBACX,SAAS;gBACT,0CAA0C;gBAC1C,oEAAoE;gBACpE,mJAAmJ;gBACnJ,wDAAwD;aACzD,CAAC,IAAI,CAAC,IAAI,CAAC;YACZ,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE;SACzD;QACD,GAAG,EAAE;YACH,WAAW,EAAE,yBAAyB;YACtC,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE;SACzD;QACD,GAAG,EAAE;YACH,WAAW,EACT,4MAA4M;YAC9M,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE;SACzD;QACD,GAAG,EAAE;YACH,WAAW,EACT,8GAA8G;YAChH,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE;SACzD;KACF;CACF,CAAC,CAAC"}
\ No newline at end of file

From 0c996a6cae6ff76af4fbf04203dd653a8ccfad31 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 8 May 2026 19:25:56 +0000
Subject: [PATCH 3/5] fix(addie): add verdict_source to compliance response +
 last-write-wins test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address review feedback from @EmmaLouise2018 on PR #4250:

1. `verdict_source` field on /api/registry/agents/:url/compliance
   — `AgentComplianceDetailSchema` gains optional `verdict_source`:
     'heartbeat' | 'owner_test' | 'manual' | 'webhook' | null
   — `getComplianceStatus` and `bulkGetComplianceStatus` join
     `agent_compliance_runs` via LATERAL subquery (dry_run=false,
     ORDER BY tested_at DESC LIMIT 1) to surface the triggered_by
     of the most recent run.  No migration needed.
   — Endpoint response emits `verdict_source: status.last_triggered_by`.
   — `AgentComplianceStatus` interface gets `last_triggered_by` field.

2. Last-write-wins contract test
   — New `compliance-db-last-write-wins.test.ts` pins the ON CONFLICT
     DO UPDATE semantics: every recordComplianceRun call overwrites
     agent_compliance_status regardless of triggered_by source.  A
     future change to first-write-wins or priority ordering would
     break these tests.

https://claude.ai/code/session_01NVVqgeSGevUGXgDbMw1XKZ
---
 server/src/db/compliance-db.ts                |  18 +-
 server/src/routes/registry-api.ts             |   1 +
 server/src/schemas/registry.ts                |   2 +
 .../compliance-db-last-write-wins.test.ts     | 204 ++++++++++++++++++
 4 files changed, 223 insertions(+), 2 deletions(-)
 create mode 100644 server/tests/unit/compliance-db-last-write-wins.test.ts

diff --git a/server/src/db/compliance-db.ts b/server/src/db/compliance-db.ts
index 0ff9180249..d3770fb889 100644
--- a/server/src/db/compliance-db.ts
+++ b/server/src/db/compliance-db.ts
@@ -118,6 +118,8 @@ export interface AgentComplianceStatus {
   previous_status: string | null;
   status_changed_at: Date | null;
   updated_at: Date;
+  /** triggered_by of the most recent non-dry-run in agent_compliance_runs */
+  last_triggered_by: TriggeredBy | null;
 }
 
 export type StoryboardStatus = 'passing' | 'failing' | 'partial' | 'untested';
@@ -427,9 +429,15 @@ export class ComplianceDatabase {
 
   async getComplianceStatus(agentUrl: string): Promise<AgentComplianceStatus | null> {
     const result = await query(
-      `SELECT s.*, COALESCE(m.lifecycle_stage, 'production') AS lifecycle_stage
+      `SELECT s.*, COALESCE(m.lifecycle_stage, 'production') AS lifecycle_stage,
+              r.triggered_by AS last_triggered_by
        FROM agent_compliance_status s
        LEFT JOIN agent_registry_metadata m ON m.agent_url = s.agent_url
+       LEFT JOIN LATERAL (
+         SELECT triggered_by FROM agent_compliance_runs
+         WHERE agent_url = s.agent_url AND dry_run = false
+         ORDER BY tested_at DESC LIMIT 1
+       ) r ON true
        WHERE s.agent_url = $1`,
       [agentUrl],
     );
@@ -455,9 +463,15 @@ export class ComplianceDatabase {
     if (agentUrls.length === 0) return new Map();
 
     const result = await query(
-      `SELECT s.*, COALESCE(m.lifecycle_stage, 'production') AS lifecycle_stage
+      `SELECT s.*, COALESCE(m.lifecycle_stage, 'production') AS lifecycle_stage,
+              r.triggered_by AS last_triggered_by
        FROM agent_compliance_status s
        LEFT JOIN agent_registry_metadata m ON m.agent_url = s.agent_url
+       LEFT JOIN LATERAL (
+         SELECT triggered_by FROM agent_compliance_runs
+         WHERE agent_url = s.agent_url AND dry_run = false
+         ORDER BY tested_at DESC LIMIT 1
+       ) r ON true
        WHERE s.agent_url = ANY($1)`,
       [agentUrls],
     );
diff --git a/server/src/routes/registry-api.ts b/server/src/routes/registry-api.ts
index 925aec6cda..936b7b5dc0 100644
--- a/server/src/routes/registry-api.ts
+++ b/server/src/routes/registry-api.ts
@@ -4253,6 +4253,7 @@ export function createRegistryApiRouter(config: RegistryApiConfig): Router {
         membership_tier_label: ownerMembership.membership_tier_label,
         subscription_status: ownerMembership.subscription_status,
         is_api_access_tier: ownerMembership.is_api_access_tier,
+        verdict_source: status.last_triggered_by ?? null,
         verified: badges.length > 0,
         verified_badges: badges.map(b => ({
           role: b.role,
diff --git a/server/src/schemas/registry.ts b/server/src/schemas/registry.ts
index 4a59bdd2be..f29a961f11 100644
--- a/server/src/schemas/registry.ts
+++ b/server/src/schemas/registry.ts
@@ -341,6 +341,8 @@ export const AgentComplianceDetailSchema = z
     membership_tier_label: z.string().nullable().optional().openapi({ description: "Owner-scoped: human-readable label for membership_tier (e.g. 'Builder'). Null for non-owners." }),
     subscription_status: z.string().nullable().optional().openapi({ description: "Owner-scoped: the agent owner's subscription status (active, past_due, trialing, etc.). Null for non-owners." }),
     is_api_access_tier: z.boolean().optional().openapi({ description: "Owner-scoped: true when the owner's tier and subscription status grant badge eligibility. False for non-owners. Single source of truth — UI should not re-derive." }),
+    verdict_source: z.enum(["heartbeat", "owner_test", "manual", "webhook"]).nullable().optional()
+      .openapi({ description: "triggered_by value of the most recent non-dry-run compliance check. 'heartbeat' = scheduled run; 'owner_test' = agent owner triggered via evaluate_agent_quality. Null when no run has been recorded yet." }),
     verified: z.boolean().optional(),
     verified_badges: z.array(VerificationBadgeSchema).optional(),
   })
diff --git a/server/tests/unit/compliance-db-last-write-wins.test.ts b/server/tests/unit/compliance-db-last-write-wins.test.ts
new file mode 100644
index 0000000000..bec028e4bc
--- /dev/null
+++ b/server/tests/unit/compliance-db-last-write-wins.test.ts
@@ -0,0 +1,204 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+
+vi.mock('../../src/db/client.js', () => ({
+  query: vi.fn(),
+  getClient: vi.fn(),
+}));
+
+vi.mock('../../src/db/encryption.js', () => ({
+  decrypt: vi.fn(),
+  encrypt: vi.fn(),
+  deriveKey: vi.fn(),
+}));
+
+import { ComplianceDatabase } from '../../src/db/compliance-db.js';
+import { query, getClient } from '../../src/db/client.js';
+
+const mockedQuery = vi.mocked(query);
+const mockedGetClient = vi.mocked(getClient);
+
+const EMPTY = { rows: [], rowCount: 0, command: '', oid: 0, fields: [] };
+
+function makeTransactionClient(queryResponses: Array<{ rows: any[] }>) {
+  const calls: string[] = [];
+  let idx = 0;
+  const client = {
+    query: vi.fn(async (sql: string) => {
+      calls.push(typeof sql === 'string' ? sql.trim().split(/\s+/)[0] : sql);
+      const resp = queryResponses[idx] ?? EMPTY;
+      idx++;
+      return { ...EMPTY, ...resp };
+    }),
+    release: vi.fn(),
+    _calls: calls,
+  };
+  return client;
+}
+
+const AGENT_URL = 'https://agent.example.com';
+
+function makeRunRow(triggeredBy: string) {
+  return {
+    id: 'run-001',
+    agent_url: AGENT_URL,
+    lifecycle_stage: 'production',
+    overall_status: 'passing',
+    headline: null,
+    total_duration_ms: 100,
+    tested_at: new Date(),
+    tracks_json: [],
+    tracks_passed: 1,
+    tracks_failed: 0,
+    tracks_skipped: 0,
+    tracks_partial: 0,
+    agent_profile_json: null,
+    observations_json: null,
+    triggered_by: triggeredBy,
+    dry_run: false,
+  };
+}
+
+const minimalInput = (triggeredBy: 'heartbeat' | 'owner_test') => ({
+  agent_url: AGENT_URL,
+  lifecycle_stage: 'production' as const,
+  overall_status: 'passing' as const,
+  tracks_json: [{ track: 'core', status: 'pass' as const, scenario_count: 1, passed_count: 1, duration_ms: 100 }],
+  tracks_passed: 1,
+  tracks_failed: 0,
+  tracks_skipped: 0,
+  tracks_partial: 0,
+  triggered_by: triggeredBy,
+  dry_run: false,
+});
+
+describe('ComplianceDatabase — last-write-wins on agent_compliance_status', () => {
+  let db: ComplianceDatabase;
+
+  beforeEach(() => {
+    db = new ComplianceDatabase();
+    vi.clearAllMocks();
+  });
+
+  /**
+   * Contract: agent_compliance_status uses ON CONFLICT DO UPDATE (not DO NOTHING).
+   * Every recordComplianceRun call — regardless of triggered_by — overwrites the
+   * materialized status row. A future change to "pick highest-priority source" or
+   * "first-write-wins" would break this test.
+   */
+  it('always upserts status regardless of triggered_by — last-write-wins', async () => {
+    const statusRow = { rows: [{ status: 'passing', previous_status: null }] };
+
+    const client = makeTransactionClient([
+      EMPTY,            // BEGIN
+      { rows: [makeRunRow('heartbeat')] },  // INSERT agent_compliance_runs
+      statusRow,        // UPSERT agent_compliance_status
+      EMPTY,            // COMMIT
+    ]);
+    mockedGetClient.mockResolvedValueOnce(client as any);
+
+    await db.recordComplianceRun(minimalInput('heartbeat'));
+
+    const upsertCall = client.query.mock.calls.find(
+      ([sql]: [string]) => typeof sql === 'string' && sql.includes('ON CONFLICT (agent_url) DO UPDATE'),
+    );
+    expect(upsertCall).toBeDefined();
+  });
+
+  it('owner_test write at T+1 wins over prior heartbeat — triggered_by is forwarded verbatim', async () => {
+    const statusRow = { rows: [{ status: 'passing', previous_status: 'passing' }] };
+
+    const client1 = makeTransactionClient([
+      EMPTY,
+      { rows: [makeRunRow('heartbeat')] },
+      statusRow,
+      EMPTY,
+    ]);
+    mockedGetClient.mockResolvedValueOnce(client1 as any);
+    await db.recordComplianceRun(minimalInput('heartbeat'));
+
+    const heartbeatRunInsert = client1.query.mock.calls.find(
+      ([sql]: [string]) => typeof sql === 'string' && sql.includes('INSERT INTO agent_compliance_runs'),
+    );
+    expect(heartbeatRunInsert).toBeDefined();
+    expect(heartbeatRunInsert![1]).toContain('heartbeat');
+
+    const client2 = makeTransactionClient([
+      EMPTY,
+      { rows: [makeRunRow('owner_test')] },
+      statusRow,
+      EMPTY,
+    ]);
+    mockedGetClient.mockResolvedValueOnce(client2 as any);
+    await db.recordComplianceRun(minimalInput('owner_test'));
+
+    const ownerTestRunInsert = client2.query.mock.calls.find(
+      ([sql]: [string]) => typeof sql === 'string' && sql.includes('INSERT INTO agent_compliance_runs'),
+    );
+    expect(ownerTestRunInsert).toBeDefined();
+    expect(ownerTestRunInsert![1]).toContain('owner_test');
+
+    const ownerTestUpsert = client2.query.mock.calls.find(
+      ([sql]: [string]) => typeof sql === 'string' && sql.includes('ON CONFLICT (agent_url) DO UPDATE'),
+    );
+    expect(ownerTestUpsert).toBeDefined();
+  });
+
+  it('heartbeat at T+3 wins over prior owner_test at T+2 — no source-priority filtering', async () => {
+    const statusRow = { rows: [{ status: 'passing', previous_status: 'passing' }] };
+
+    const client = makeTransactionClient([
+      EMPTY,
+      { rows: [makeRunRow('heartbeat')] },
+      statusRow,
+      EMPTY,
+    ]);
+    mockedGetClient.mockResolvedValueOnce(client as any);
+    await db.recordComplianceRun(minimalInput('heartbeat'));
+
+    const runInsert = client.query.mock.calls.find(
+      ([sql]: [string]) => typeof sql === 'string' && sql.includes('INSERT INTO agent_compliance_runs'),
+    );
+    expect(runInsert![1]).toContain('heartbeat');
+
+    const upsert = client.query.mock.calls.find(
+      ([sql]: [string]) => typeof sql === 'string' && sql.includes('ON CONFLICT (agent_url) DO UPDATE'),
+    );
+    expect(upsert).toBeDefined();
+  });
+
+  it('getComplianceStatus LATERAL join returns last_triggered_by from most recent non-dry run', async () => {
+    const now = new Date();
+    mockedQuery.mockResolvedValueOnce({
+      rows: [{
+        agent_url: AGENT_URL,
+        status: 'passing',
+        lifecycle_stage: 'production',
+        last_checked_at: now,
+        last_passed_at: now,
+        last_failed_at: null,
+        streak_days: 1,
+        streak_started_at: now,
+        tracks_summary_json: { core: 'pass' },
+        headline: null,
+        previous_status: null,
+        status_changed_at: null,
+        updated_at: now,
+        last_triggered_by: 'owner_test',
+      }],
+      rowCount: 1,
+      command: '',
+      oid: 0,
+      fields: [],
+    });
+
+    const status = await db.getComplianceStatus(AGENT_URL);
+
+    expect(status).not.toBeNull();
+    expect(status!.last_triggered_by).toBe('owner_test');
+
+    const [sql] = mockedQuery.mock.calls[0];
+    expect(sql).toContain('dry_run = false');
+    expect(sql).toContain('ORDER BY tested_at DESC');
+    expect(sql).toContain('LIMIT 1');
+  });
+});

From 47b26d43f60cebd1adaf189f05441072d707a822 Mon Sep 17 00:00:00 2001
From: Emma Mulitz <emulitz@scope3.com>
Date: Fri, 8 May 2026 16:18:57 -0400
Subject: [PATCH 4/5] feat(dashboard): surface verdict_source + per-run
 triggered_by badge
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR 2 of the #4247 unification stack. Reads two fields PR #4250 added
to the compliance API but the dashboard wasn't yet rendering:

- compliance tile: appends "(your test)" / "(heartbeat)" / "(manual)"
  / "(webhook)" after Last checked, so operators see whether the
  current verdict came from their own evaluate_agent_quality run or
  the scheduled heartbeat.
- history panel: per-run badge with the same source label, info-blue
  for owner_test and neutral for the rest. Pre-PR-1 rows render with
  neutral — no regression.

No backend changes; pure UI surfacing of fields already in the API.
Stacked on PR #4250.
---
 .../dashboard-surfaces-verdict-source.md      | 31 +++++++++++++++++++
 server/public/dashboard-agents.html           | 31 ++++++++++++++++++-
 2 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 .changeset/dashboard-surfaces-verdict-source.md

diff --git a/.changeset/dashboard-surfaces-verdict-source.md b/.changeset/dashboard-surfaces-verdict-source.md
new file mode 100644
index 0000000000..e7fa44366d
--- /dev/null
+++ b/.changeset/dashboard-surfaces-verdict-source.md
@@ -0,0 +1,31 @@
+---
+---
+
+Dashboard `/dashboard/agents` surfaces the new `verdict_source` field on the
+compliance tile and a per-run "Your test / Heartbeat / Manual / Webhook"
+badge in the History panel. PR 2 of the #4247 unification stack —
+read-side cleanup that lets owners distinguish their own on-demand
+runs from scheduled heartbeat verdicts at a glance.
+
+**Context.** PR #4250 added `verdict_source` to
+`/api/registry/agents/:url/compliance` and `triggered_by` to each row
+returned by `/api/registry/agents/:url/compliance/history`. Both fields
+were unrendered in the dashboard until this PR.
+
+**What changes.**
+
+- Compliance tile shows `Last checked: 3m ago (your test)` /
+  `(heartbeat)` / `(manual)` / `(webhook)` after the timestamp. Empty
+  string when `verdict_source` is null (never run).
+- History panel renders a colored badge per run row:
+  - `Your test` (info-blue) for `triggered_by = 'owner_test'`
+  - `Heartbeat` (neutral) for `triggered_by = 'heartbeat'`
+  - `Manual` / `Webhook` (neutral) for the other enum values
+
+No backend changes; this is pure UI surfacing of fields the API already
+emits. Pre-PR-1 rows (which only have `'heartbeat'` / `'manual'` /
+`'webhook'`) render with the neutral badge — no regression.
+
+**Out of scope** (PR 3 of #4247): dropping `agent_test_history` and
+backfilling owner-triggered rows. Tracked separately so the destructive
+migration soaks behind the read-only UI change.
diff --git a/server/public/dashboard-agents.html b/server/public/dashboard-agents.html
index ae39c1d0a8..f8b9521219 100644
--- a/server/public/dashboard-agents.html
+++ b/server/public/dashboard-agents.html
@@ -1501,6 +1501,19 @@ <h1>Agents</h1>
           ? timeAgo(new Date(cs.last_checked_at))
           : 'never';
 
+        // Surface the verdict source so the operator knows whether the
+        // current status came from the scheduled heartbeat or their own
+        // owner-triggered test run. PR #4250 populates cs.verdict_source
+        // ('heartbeat' | 'owner_test' | 'manual' | 'webhook' | null when
+        // never run). Displayed inline with "Last checked" so the
+        // semantic shift on the public compliance contract is visible
+        // to the operator without having to read the changelog.
+        const verdictSourceLabel = cs.verdict_source === 'owner_test' ? ' (your test)'
+          : cs.verdict_source === 'heartbeat' ? ' (heartbeat)'
+          : cs.verdict_source === 'manual' ? ' (manual)'
+          : cs.verdict_source === 'webhook' ? ' (webhook)'
+          : '';
+
         const isPublic = cs.status !== 'opted_out';
 
         return `
@@ -1527,7 +1540,7 @@ <h1>Agents</h1>
             ${visibilitySelectorHtml}
             <div class="agent-meta-row">
               <div class="agent-meta-row-left">
-                <span>Last checked: ${escapeHtml(lastChecked)}</span>
+                <span>Last checked: ${escapeHtml(lastChecked)}${escapeHtml(verdictSourceLabel)}</span>
                 <span class="agent-meta-sep" aria-hidden="true">·</span>
                 <label class="agent-toggle" title="${isPublic ? 'Pause automated compliance and health checks' : 'Re-enable monitoring (Show on registry) before pausing'}">
                   <input type="checkbox" class="monitoring-pause-toggle" data-agent-url="${escapeHtml(agent.url)}" ${cs.monitoring_paused ? 'checked' : ''} ${isPublic ? '' : 'disabled'}>
@@ -2813,6 +2826,21 @@ <h1>Agents</h1>
               : 'var(--color-warning-500)';
             const date = new Date(run.tested_at).toLocaleString();
 
+            // Distinguish owner-triggered tests from scheduled heartbeat runs
+            // so the operator sees their own on-demand evaluations interleaved
+            // with the cron-driven verdicts. `triggered_by` enum is populated
+            // by the unification PR 1 (#4250); pre-PR-1 rows have
+            // 'heartbeat'/'manual'/'webhook' only — surface those neutrally.
+            const triggeredBy = run.triggered_by || 'heartbeat';
+            const sourceLabel = triggeredBy === 'owner_test' ? 'Your test'
+              : triggeredBy === 'heartbeat' ? 'Heartbeat'
+              : triggeredBy === 'manual' ? 'Manual'
+              : triggeredBy === 'webhook' ? 'Webhook'
+              : escapeHtml(String(triggeredBy));
+            const sourceBg = triggeredBy === 'owner_test' ? 'var(--color-info-50)' : 'var(--color-neutral-100)';
+            const sourceBorder = triggeredBy === 'owner_test' ? 'var(--color-info-200)' : 'var(--color-neutral-200)';
+            const sourceFg = triggeredBy === 'owner_test' ? 'var(--color-info-700)' : 'var(--color-text-secondary)';
+
             let runTracks = '';
             if (run.tracks_json) {
               for (const t of run.tracks_json) {
@@ -2829,6 +2857,7 @@ <h1>Agents</h1>
             html += '<span>' + escapeHtml(date) + '</span>';
             html += '<span>' + escapeHtml(run.overall_status) + '</span>';
             html += '<span>' + parseInt(run.tracks_passed, 10) + '/' + (parseInt(run.tracks_passed, 10) + parseInt(run.tracks_failed, 10) + parseInt(run.tracks_partial, 10)) + ' tracks</span>';
+            html += '<span class="history-run-source" style="font-size:var(--text-xs);padding:2px 8px;border-radius:var(--radius-full);background:' + sourceBg + ';border:1px solid ' + sourceBorder + ';color:' + sourceFg + ';">' + sourceLabel + '</span>';
             html += '</div>';
             if (runTracks) {
               html += '<div style="padding:var(--space-1) 0 var(--space-2) var(--space-5);display:flex;gap:3px;flex-wrap:wrap;">' + runTracks + '</div>';

From 0f0104c0de5b77ccfb5f6073150750f39ee4a771 Mon Sep 17 00:00:00 2001
From: Emma Mulitz <emulitz@scope3.com>
Date: Fri, 8 May 2026 16:23:01 -0400
Subject: [PATCH 5/5] fix(addie): backfill owner test history + stop dual-write
 for owner runs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR 3 of the #4247 unification stack.

Migration 472 backfills every agent_test_history row with a user_id
into agent_compliance_runs as triggered_by='owner_test', carrying the
source id in observations_json.backfill_source_id for idempotent re-runs.
Each agent's latest backfilled row upserts into agent_compliance_status
so the dashboard immediately reflects a real verdict for agents tested
through Addie pre-PR-#4250.

evaluate_agent_quality stops calling recordTest() when the caller owns
the agent — that was the dual-write that #4247 is closing. recordTest
is retained ONLY for third-party runs so strangers testing someone
else's agent still have a session-scoped audit trail.

Drop of agent_test_history table is deferred behind the 14-day soak
from #4250 + 7-day soak from #4263 + S3 cold-storage export of
non-owner rows. Migration 472 documents this in its trailing comment.

Stacked on #4263 → #4250.
---
 ...-owner-evaluate-agent-stop-legacy-write.md |  33 ++++
 server/src/addie/mcp/member-tools.ts          |  50 +++---
 ..._agent_test_history_to_compliance_runs.sql | 149 ++++++++++++++++++
 3 files changed, 212 insertions(+), 20 deletions(-)
 create mode 100644 .changeset/unify-owner-evaluate-agent-stop-legacy-write.md
 create mode 100644 server/src/db/migrations/472_backfill_agent_test_history_to_compliance_runs.sql

diff --git a/.changeset/unify-owner-evaluate-agent-stop-legacy-write.md b/.changeset/unify-owner-evaluate-agent-stop-legacy-write.md
new file mode 100644
index 0000000000..c917d80bd2
--- /dev/null
+++ b/.changeset/unify-owner-evaluate-agent-stop-legacy-write.md
@@ -0,0 +1,33 @@
+---
+---
+
+PR 3 of the #4247 unification stack. Two coupled changes:
+
+**Backfill historical owner-triggered tests into the canonical compliance
+tables.** Migration `472_backfill_agent_test_history_to_compliance_runs.sql`
+copies every `agent_test_history` row with a `user_id` into
+`agent_compliance_runs` as `triggered_by = 'owner_test'` (carrying the
+source row id in `observations_json.backfill_source_id` so a re-run is a
+no-op via `WHERE NOT EXISTS`). Each agent's most-recent backfilled row
+also upserts into `agent_compliance_status` so the dashboard's compliance
+tile immediately reflects a real verdict for any agent that was tested
+through Addie pre-PR-#4250 and never ran the heartbeat.
+
+**Stop the dual write for owner runs.** `evaluate_agent_quality` no longer
+calls `agentContextDb.recordTest()` when the caller owns the agent — that
+path was the dual-write bug #4247 is closing. The legacy `recordTest` call
+is retained ONLY for third-party runs so a stranger who tests someone
+else's agent still has a session-scoped audit trail in their own
+`agent_test_history`. Owner-triggered runs persist exclusively to
+canonical state going forward.
+
+**Out of scope** (deferred to a follow-up after the soak gates):
+
+- Drop `agent_test_history` table — gated on the 14-day soak from #4250
+  deploy + 7-day soak from #4263 + S3 cold-storage export of the
+  remaining (`user_id IS NULL`) third-party rows. Migration 472 documents
+  this in its trailing comment.
+- Collapse `agent_contexts.last_test_*` into a derived view — PR 4 of
+  the #4247 stack.
+
+**Stacked on** #4263 (PR 2 of #4247) → #4250 (PR 1 of #4247).
diff --git a/server/src/addie/mcp/member-tools.ts b/server/src/addie/mcp/member-tools.ts
index d2046c5e7c..e3b4733cf8 100644
--- a/server/src/addie/mcp/member-tools.ts
+++ b/server/src/addie/mcp/member-tools.ts
@@ -3613,27 +3613,37 @@ export function createMemberToolHandlers(
           }
         }
 
-        // Legacy write to agent_contexts + agent_test_history. Retained for
-        // backward compatibility until PR 3 migrates callers and drops the table.
-        try {
-          const context = await agentContextDb.getByOrgAndUrl(organizationId, resolved.resolvedUrl);
-          if (context) {
-            await agentContextDb.recordTest({
-              agent_context_id: context.id,
-              scenario: 'quality_evaluation',
-              overall_passed: result.overall_status === 'passing',
-              steps_passed: result.summary.tracks_passed,
-              steps_failed: result.summary.tracks_failed,
-              total_duration_ms: result.total_duration_ms,
-              summary: result.summary.headline,
-              dry_run: true,
-              triggered_by: 'user',
-              user_id: memberContext?.workos_user?.workos_user_id,
-              agent_profile_json: result.agent_profile,
-            });
+        // Legacy write to agent_contexts + agent_test_history. Retained ONLY
+        // for non-owner runs so a third-party who runs evaluate_agent_quality
+        // against someone else's agent still has a session-scoped audit trail
+        // (their own org's agent_test_history). Owner runs already wrote
+        // canonical state above (PR #4250); writing twice would split the
+        // audit and re-introduce the dual-write bug PR #4247 is closing.
+        //
+        // PR 4 of #4247 collapses agent_contexts.last_test_* into a derived
+        // view, after which this legacy block (and recordTest itself) drop
+        // entirely.
+        if (!isAgentOwner) {
+          try {
+            const context = await agentContextDb.getByOrgAndUrl(organizationId, resolved.resolvedUrl);
+            if (context) {
+              await agentContextDb.recordTest({
+                agent_context_id: context.id,
+                scenario: 'quality_evaluation',
+                overall_passed: result.overall_status === 'passing',
+                steps_passed: result.summary.tracks_passed,
+                steps_failed: result.summary.tracks_failed,
+                total_duration_ms: result.total_duration_ms,
+                summary: result.summary.headline,
+                dry_run: true,
+                triggered_by: 'user',
+                user_id: memberContext?.workos_user?.workos_user_id,
+                agent_profile_json: result.agent_profile,
+              });
+            }
+          } catch (error) {
+            logger.debug({ error }, 'Could not record quality evaluation result');
           }
-        } catch (error) {
-          logger.debug({ error }, 'Could not record quality evaluation result');
         }
       }
 
diff --git a/server/src/db/migrations/472_backfill_agent_test_history_to_compliance_runs.sql b/server/src/db/migrations/472_backfill_agent_test_history_to_compliance_runs.sql
new file mode 100644
index 0000000000..7f53b2f376
--- /dev/null
+++ b/server/src/db/migrations/472_backfill_agent_test_history_to_compliance_runs.sql
@@ -0,0 +1,149 @@
+-- Migration 472: backfill owner-triggered agent_test_history rows into
+-- agent_compliance_runs as triggered_by='owner_test' rows.
+--
+-- Part of the #4247 compliance-state unification (PR 3 of 4). PR #4250
+-- (PR 1) made evaluate_agent_quality write canonical for owner runs going
+-- forward; this migration backfills the historical rows so the compliance
+-- API and dashboard reflect the full test history, not just runs from the
+-- PR #4250 deploy onward.
+--
+-- Scope: backfill ONLY rows with user_id IS NOT NULL (real owner-triggered
+-- tests). Third-party / scheduled / unattributed rows are NOT touched here
+-- — the table drop is a separate follow-up that includes an S3 cold-storage
+-- export of those rows so audit history isn't silently lost (see #4247
+-- Acceptance Criteria).
+--
+-- Mapping:
+--   agent_test_history.agent_context_id → agent_contexts.agent_url
+--   agent_test_history.overall_passed   → overall_status ('passing' | 'failing')
+--   agent_test_history.steps_passed     → tracks_passed
+--   agent_test_history.steps_failed     → tracks_failed
+--   agent_test_history.total_duration_ms→ total_duration_ms
+--   agent_test_history.summary          → headline
+--   agent_test_history.agent_profile_json → agent_profile_json
+--   agent_test_history.started_at       → tested_at
+--   triggered_by                        → 'owner_test' (constant)
+--   dry_run                             → false (PR #4250's owner path uses dry_run=false)
+--
+-- Idempotency: backfilled rows carry the source agent_test_history.id in
+-- observations_json.{backfill_source} so a re-run is a no-op via the
+-- WHERE NOT EXISTS guard.
+
+INSERT INTO agent_compliance_runs (
+  agent_url,
+  lifecycle_stage,
+  overall_status,
+  headline,
+  total_duration_ms,
+  tracks_json,
+  tracks_passed,
+  tracks_failed,
+  tracks_skipped,
+  tracks_partial,
+  agent_profile_json,
+  observations_json,
+  triggered_by,
+  dry_run,
+  tested_at
+)
+SELECT
+  ac.agent_url,
+  COALESCE(arm.lifecycle_stage, 'production') AS lifecycle_stage,
+  CASE WHEN ath.overall_passed THEN 'passing' ELSE 'failing' END AS overall_status,
+  ath.summary AS headline,
+  ath.total_duration_ms,
+  '[]'::jsonb AS tracks_json,
+  COALESCE(ath.steps_passed, 0) AS tracks_passed,
+  COALESCE(ath.steps_failed, 0) AS tracks_failed,
+  0 AS tracks_skipped,
+  0 AS tracks_partial,
+  ath.agent_profile_json,
+  jsonb_build_object(
+    'backfill_source', 'agent_test_history',
+    'backfill_source_id', ath.id::text,
+    'backfill_migration', '472',
+    'original_scenario', ath.scenario
+  ) AS observations_json,
+  'owner_test' AS triggered_by,
+  FALSE AS dry_run,
+  ath.started_at AS tested_at
+FROM agent_test_history ath
+JOIN agent_contexts ac ON ac.id = ath.agent_context_id
+LEFT JOIN agent_registry_metadata arm ON arm.agent_url = ac.agent_url
+WHERE ath.user_id IS NOT NULL
+  AND NOT EXISTS (
+    SELECT 1 FROM agent_compliance_runs acr
+    WHERE acr.observations_json->>'backfill_source_id' = ath.id::text
+  );
+
+-- Update agent_compliance_status from the latest backfilled row per agent
+-- so the dashboard immediately reflects the most recent owner-triggered
+-- verdict for any agent that didn't yet have a heartbeat row. Skipped
+-- when a status row already exists from a more recent heartbeat — heartbeat
+-- always wins on freshness, last-write-wins is the contract pinned in
+-- PR #4250's tests.
+INSERT INTO agent_compliance_status (
+  agent_url,
+  status,
+  lifecycle_stage,
+  last_checked_at,
+  last_passed_at,
+  last_failed_at,
+  tracks_summary_json,
+  headline,
+  status_changed_at,
+  last_triggered_by
+)
+SELECT DISTINCT ON (acr.agent_url)
+  acr.agent_url,
+  CASE WHEN acr.overall_status = 'passing' THEN 'passing' ELSE 'failing' END,
+  acr.lifecycle_stage,
+  acr.tested_at,
+  CASE WHEN acr.overall_status = 'passing' THEN acr.tested_at ELSE NULL END,
+  CASE WHEN acr.overall_status = 'failing' THEN acr.tested_at ELSE NULL END,
+  '{}'::jsonb,
+  acr.headline,
+  acr.tested_at,
+  'owner_test'
+FROM agent_compliance_runs acr
+WHERE acr.observations_json->>'backfill_migration' = '472'
+ORDER BY acr.agent_url, acr.tested_at DESC
+ON CONFLICT (agent_url) DO UPDATE SET
+  status = CASE
+    WHEN agent_compliance_status.last_checked_at IS NULL
+      OR agent_compliance_status.last_checked_at < EXCLUDED.last_checked_at
+    THEN EXCLUDED.status
+    ELSE agent_compliance_status.status
+  END,
+  last_checked_at = GREATEST(
+    COALESCE(agent_compliance_status.last_checked_at, EXCLUDED.last_checked_at),
+    EXCLUDED.last_checked_at
+  ),
+  last_passed_at = CASE
+    WHEN EXCLUDED.last_passed_at IS NOT NULL
+      AND (agent_compliance_status.last_passed_at IS NULL
+        OR agent_compliance_status.last_passed_at < EXCLUDED.last_passed_at)
+    THEN EXCLUDED.last_passed_at
+    ELSE agent_compliance_status.last_passed_at
+  END,
+  last_failed_at = CASE
+    WHEN EXCLUDED.last_failed_at IS NOT NULL
+      AND (agent_compliance_status.last_failed_at IS NULL
+        OR agent_compliance_status.last_failed_at < EXCLUDED.last_failed_at)
+    THEN EXCLUDED.last_failed_at
+    ELSE agent_compliance_status.last_failed_at
+  END,
+  last_triggered_by = CASE
+    WHEN agent_compliance_status.last_checked_at IS NULL
+      OR agent_compliance_status.last_checked_at < EXCLUDED.last_checked_at
+    THEN EXCLUDED.last_triggered_by
+    ELSE agent_compliance_status.last_triggered_by
+  END;
+
+-- NOTE: this migration does NOT drop agent_test_history. The drop is
+-- deferred to a follow-up migration that runs after:
+--   (a) the 14-day soak window from PR #4250 deploy,
+--   (b) the 7-day soak window from PR #4263 deploy,
+--   (c) S3 cold-storage export of third-party rows (user_id IS NULL),
+--   (d) row-count delta verification on staging.
+-- See #4247 acceptance criteria.