diff --git a/.changeset/dashboard-surfaces-verdict-source.md b/.changeset/dashboard-surfaces-verdict-source.md new file mode 100644 index 0000000000..e7fa44366d --- /dev/null +++ b/.changeset/dashboard-surfaces-verdict-source.md @@ -0,0 +1,31 @@ +--- +--- + +Dashboard `/dashboard/agents` surfaces the new `verdict_source` field on the +compliance tile and a per-run "Your test / Heartbeat / Manual / Webhook" +badge in the History panel. PR 2 of the #4247 unification stack — +read-side cleanup that lets owners distinguish their own on-demand +runs from scheduled heartbeat verdicts at a glance. + +**Context.** PR #4250 added `verdict_source` to +`/api/registry/agents/:url/compliance` and `triggered_by` to each row +returned by `/api/registry/agents/:url/compliance/history`. Both fields +were unrendered in the dashboard until this PR. + +**What changes.** + +- Compliance tile shows `Last checked: 3m ago (your test)` / + `(heartbeat)` / `(manual)` / `(webhook)` after the timestamp. Empty + string when `verdict_source` is null (never run). +- History panel renders a colored badge per run row: + - `Your test` (info-blue) for `triggered_by = 'owner_test'` + - `Heartbeat` (neutral) for `triggered_by = 'heartbeat'` + - `Manual` / `Webhook` (neutral) for the other enum values + +No backend changes; this is pure UI surfacing of fields the API already +emits. Pre-PR-1 rows (which only have `'heartbeat'` / `'manual'` / +`'webhook'`) render with the neutral badge — no regression. + +**Out of scope** (PR 3 of #4247): dropping `agent_test_history` and +backfilling owner-triggered rows. Tracked separately so the destructive +migration soaks behind the read-only UI change. diff --git a/.changeset/unify-owner-compliance-writes.md b/.changeset/unify-owner-compliance-writes.md new file mode 100644 index 0000000000..1bdb5c7f9e --- /dev/null +++ b/.changeset/unify-owner-compliance-writes.md @@ -0,0 +1,10 @@ +--- +--- + +PR 1 of 4 in the compliance-state unification initiative (issue #4247): owner-triggered +`evaluate_agent_quality` runs now write to canonical compliance tables +(`agent_compliance_status`, `agent_compliance_runs`, `agent_storyboard_status`) with +`triggered_by = 'owner_test'`, closing the 12-hour gap between owner tests and the +public `/api/registry/agents/:url/compliance` endpoint. Non-owner runs continue +writing to `agent_test_history` (deprecated in PR 3). Adds `'owner_test'` to both +`triggered_by` CHECK constraints via migration 471. diff --git a/.changeset/unify-owner-evaluate-agent-stop-legacy-write.md b/.changeset/unify-owner-evaluate-agent-stop-legacy-write.md new file mode 100644 index 0000000000..c917d80bd2 --- /dev/null +++ b/.changeset/unify-owner-evaluate-agent-stop-legacy-write.md @@ -0,0 +1,33 @@ +--- +--- + +PR 3 of the #4247 unification stack. Two coupled changes: + +**Backfill historical owner-triggered tests into the canonical compliance +tables.** Migration `472_backfill_agent_test_history_to_compliance_runs.sql` +copies every `agent_test_history` row with a `user_id` into +`agent_compliance_runs` as `triggered_by = 'owner_test'` (carrying the +source row id in `observations_json.backfill_source_id` so a re-run is a +no-op via `WHERE NOT EXISTS`). Each agent's most-recent backfilled row +also upserts into `agent_compliance_status` so the dashboard's compliance +tile immediately reflects a real verdict for any agent that was tested +through Addie pre-PR-#4250 and never ran the heartbeat. + +**Stop the dual write for owner runs.** `evaluate_agent_quality` no longer +calls `agentContextDb.recordTest()` when the caller owns the agent — that +path was the dual-write bug #4247 is closing. The legacy `recordTest` call +is retained ONLY for third-party runs so a stranger who tests someone +else's agent still has a session-scoped audit trail in their own +`agent_test_history`. Owner-triggered runs persist exclusively to +canonical state going forward. + +**Out of scope** (deferred to a follow-up after the soak gates): + +- Drop `agent_test_history` table — gated on the 14-day soak from #4250 + deploy + 7-day soak from #4263 + S3 cold-storage export of the + remaining (`user_id IS NULL`) third-party rows. Migration 472 documents + this in its trailing comment. +- Collapse `agent_contexts.last_test_*` into a derived view — PR 4 of + the #4247 stack. + +**Stacked on** #4263 (PR 2 of #4247) → #4250 (PR 1 of #4247). diff --git a/dist/schemas/onboarding-openapi.d.ts b/dist/schemas/onboarding-openapi.d.ts new file mode 100644 index 0000000000..ed29eaf6de --- /dev/null +++ b/dist/schemas/onboarding-openapi.d.ts @@ -0,0 +1,27 @@ +/** + * OpenAPI registrations for the onboarding REST surface. + * + * `POST /api/organizations` has existed in production for a long time but + * has only ever been documented as a private endpoint exercised by the AAO + * dashboard's `/onboarding` form. Surfacing it in the public spec is the + * minimum-surface answer to the storefront-bootstrap question: a + * third-party app holding only a user's OAuth token needs *one* documented + * call to materialize the org, then `POST /api/me/agents` to land an agent + * (which auto-creates the member profile on first call). + * + * Two fields the handler accepts but the public schema deliberately omits: + * + * - `membership_tier` — owned exclusively by the Stripe webhook. Accepting + * it from the caller would let any user stamp tier intent on their org + * row, leaking tier-gated UI state until/unless a real subscription + * overwrites the column. + * - `corporate_domain` — server derives the value from the authenticated + * user's email. Accepting it as a field invited 400s when a caller's + * value disagreed with their email and gave nothing back when it agreed. + * + * Kept in its own module so the spec generator's import graph stays free + * of route handlers (each route file's transitive imports pull in WorkOS + * init, which fails at module load without env vars). + */ +export {}; +//# sourceMappingURL=onboarding-openapi.d.ts.map \ No newline at end of file diff --git a/dist/schemas/onboarding-openapi.d.ts.map b/dist/schemas/onboarding-openapi.d.ts.map new file mode 100644 index 0000000000..9e449fbf0f --- /dev/null +++ b/dist/schemas/onboarding-openapi.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"onboarding-openapi.d.ts","sourceRoot":"","sources":["../../server/src/schemas/onboarding-openapi.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG"} \ No newline at end of file diff --git a/dist/schemas/onboarding-openapi.js b/dist/schemas/onboarding-openapi.js new file mode 100644 index 0000000000..d3e3074486 --- /dev/null +++ b/dist/schemas/onboarding-openapi.js @@ -0,0 +1,135 @@ +/** + * OpenAPI registrations for the onboarding REST surface. + * + * `POST /api/organizations` has existed in production for a long time but + * has only ever been documented as a private endpoint exercised by the AAO + * dashboard's `/onboarding` form. Surfacing it in the public spec is the + * minimum-surface answer to the storefront-bootstrap question: a + * third-party app holding only a user's OAuth token needs *one* documented + * call to materialize the org, then `POST /api/me/agents` to land an agent + * (which auto-creates the member profile on first call). + * + * Two fields the handler accepts but the public schema deliberately omits: + * + * - `membership_tier` — owned exclusively by the Stripe webhook. Accepting + * it from the caller would let any user stamp tier intent on their org + * row, leaking tier-gated UI state until/unless a real subscription + * overwrites the column. + * - `corporate_domain` — server derives the value from the authenticated + * user's email. Accepting it as a field invited 400s when a caller's + * value disagreed with their email and gave nothing back when it agreed. + * + * Kept in its own module so the spec generator's import graph stays free + * of route handlers (each route file's transitive imports pull in WorkOS + * init, which fails at module load without env vars). + */ +import { z } from 'zod'; +import { registry, ErrorSchema } from './registry.js'; +const OrganizationCompanyTypeSchema = z + .enum(['adtech', 'agency', 'brand', 'publisher', 'data', 'ai', 'other']) + .openapi('OrganizationCompanyType', { + description: "Coarse classification of the organization's role in the open ad ecosystem. Drives default verification badges and the member profile's display category.", +}); +const OrganizationRevenueTierSchema = z + .enum(['under_1m', '1m_5m', '5m_50m', '50m_250m', '250m_1b', '1b_plus']) + .openapi('OrganizationRevenueTier', { + description: 'Annual revenue band, USD. Drives membership-tier eligibility for company-tier seats.', +}); +const CreateOrganizationInputSchema = z + .object({ + organization_name: z.string().min(1).max(200).openapi({ + description: "Display name for the organization. Used both as the org row name and (when auto-bootstrapping a member profile via the first agent registration) as the profile's `display_name`.", + example: 'Acme Media', + }), + is_personal: z.boolean().optional().openapi({ + description: 'Set to `true` to create a personal workspace instead of a corporate organization. Personal workspaces skip corporate-domain verification, are limited to one per user, and cannot host the `company_*` membership tiers.', + default: false, + }), + company_type: OrganizationCompanyTypeSchema.optional(), + revenue_tier: OrganizationRevenueTierSchema.optional(), + marketing_opt_in: z.boolean().optional().openapi({ + description: 'Whether the caller opted in to AAO marketing communications. Recorded once per user (not overwritten on subsequent calls). Independent of Terms-of-Service consent, which is recorded server-side from the request context.', + default: false, + }), +}) + .openapi('CreateOrganizationInput', { + description: [ + 'Request body for `POST /api/organizations`.', + "Bootstraps a WorkOS organization, mirrors the caller as `owner`, records the caller's ToS / privacy-policy acceptance, and (for non-personal orgs) inserts an email-verified record into `organization_domains` so subsequent registry calls can skip explicit domain-verification.", + "Membership tier and corporate domain are *not* caller-supplied: the tier is set by the Stripe webhook on subscription events, and the corporate domain is derived from the authenticated user's email.", + ].join('\n\n'), +}); +const CreateOrganizationResponseSchema = z + .object({ + success: z.boolean().optional(), + organization: z + .object({ + id: z.string().openapi({ example: 'org_01HXZAB123' }), + name: z.string().openapi({ example: 'Acme Media' }), + }) + .optional(), + id: z.string().optional().openapi({ + description: "Set on the **prospect-adoption** path: when an org with the user's email domain already exists in a `prospect` state (i.e. the registry pre-recorded it from a brand crawl but no human had claimed it yet), this call adopts that org for the caller instead of creating a new one.", + }), + name: z.string().optional(), + adopted: z.boolean().optional().openapi({ + description: '`true` when the response is the prospect-adoption path. When `true`, no new WorkOS organization was created — the caller is now the owner of an existing prospect record.', + }), +}) + .openapi('CreateOrganizationResponse', { + description: 'Response from `POST /api/organizations`. The body shape varies by path: a fresh creation returns `{ success: true, organization: { id, name } }`; a prospect adoption returns `{ id, name, adopted: true }` directly. Both paths are 2xx; downstream callers should treat any `2xx` as "the org now exists and you are an owner of it" and read whichever id is present.', +}); +registry.registerPath({ + method: 'post', + path: '/api/organizations', + operationId: 'createOrganization', + summary: 'Create or adopt my organization', + description: [ + "Bootstrap the caller's organization explicitly. Use this when the caller wants to control the organization name, `company_type`, `revenue_tier`, or `is_personal` flag before any agents are registered.", + "**Most storefront-style integrations don't need this call** — `POST /api/me/agents` will auto-create an org for a fresh OAuth user (corporate or personal workspace based on the email domain) and surface `org_auto_created: true` in the response. Reach for `POST /api/organizations` only when the auto-derived defaults aren't acceptable.", + 'Three outcomes depending on the caller\'s state:', + "- **Fresh create** (most common): a new WorkOS organization is created, the caller is added as `owner`, the corporate domain is recorded as email-verified, and ToS / privacy-policy acceptance is logged from the request context. Returns `{ success: true, organization: { id, name } }`.", + "- **Prospect adoption**: an organization with the caller's email domain already exists as a `prospect` (the registry pre-recorded it from a brand crawl but no human had claimed it yet). The caller is promoted to `owner` of the existing record instead of forking a duplicate. Returns `{ id, name, adopted: true }`.", + '- **Already-active conflict**: the org exists and is already claimed by another paying member or a previously joined user. Returns `409` with the existing org id so the caller can switch to a join-request flow (`POST /api/organizations/:orgId/join-requests`) instead of trying to register a duplicate.', + 'Tier transitions happen via the billing flow only — there is no `membership_tier` field on this endpoint. After org creation, send the user to `POST /api/checkout-session` (or the dashboard `/membership` page) to start a subscription; the Stripe webhook is the sole writer of `organizations.membership_tier`.', + 'Rate-limited per user: `15` failed attempts per hour; successful calls do not count against the limit so a legitimate registration is never penalized by earlier validation errors.', + ].join('\n\n'), + tags: ['Onboarding'], + security: [{ bearerAuth: [] }, { oauth2: [] }], + request: { + body: { content: { 'application/json': { schema: CreateOrganizationInputSchema } } }, + }, + responses: { + 200: { + description: 'Prospect adoption — an existing prospect organization for this domain was claimed by the caller. Body is `{ id, name, adopted: true }`.', + content: { 'application/json': { schema: CreateOrganizationResponseSchema } }, + }, + 201: { + description: 'New organization created. Body is `{ success: true, organization: { id, name } }`. The caller is the `owner`; the corporate domain is recorded as email-verified for downstream registry calls.', + content: { 'application/json': { schema: CreateOrganizationResponseSchema } }, + }, + 400: { + description: [ + 'One of:', + '- `organization_name` missing or invalid', + '- `company_type` / `revenue_tier` value not in the documented enum', + "- caller is on a personal-email domain (gmail.com, yahoo.com, …) and is trying to register a corporate org — register `is_personal: true` instead", + '- per-user organization cap reached (10 orgs per user)', + ].join('\n'), + content: { 'application/json': { schema: ErrorSchema } }, + }, + 401: { + description: 'Authentication required', + content: { 'application/json': { schema: ErrorSchema } }, + }, + 409: { + description: "An active organization already exists for this caller's email domain. The body includes `existing_org_id` and `existing_org_name`; the caller should switch to the join-request flow rather than retrying.", + content: { 'application/json': { schema: ErrorSchema } }, + }, + 429: { + description: 'Rate limit exceeded — 15 failed attempts per hour per user. Successful calls do not count against the limit.', + content: { 'application/json': { schema: ErrorSchema } }, + }, + }, +}); +//# sourceMappingURL=onboarding-openapi.js.map \ No newline at end of file diff --git a/dist/schemas/onboarding-openapi.js.map b/dist/schemas/onboarding-openapi.js.map new file mode 100644 index 0000000000..9fb894c315 --- /dev/null +++ b/dist/schemas/onboarding-openapi.js.map @@ -0,0 +1 @@ +{"version":3,"file":"onboarding-openapi.js","sourceRoot":"","sources":["../../server/src/schemas/onboarding-openapi.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAEtD,MAAM,6BAA6B,GAAG,CAAC;KACpC,IAAI,CAAC,CAAC,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;KACvE,OAAO,CAAC,yBAAyB,EAAE;IAClC,WAAW,EACT,0JAA0J;CAC7J,CAAC,CAAC;AAEL,MAAM,6BAA6B,GAAG,CAAC;KACpC,IAAI,CAAC,CAAC,UAAU,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;KACvE,OAAO,CAAC,yBAAyB,EAAE;IAClC,WAAW,EACT,sFAAsF;CACzF,CAAC,CAAC;AAEL,MAAM,6BAA6B,GAAG,CAAC;KACpC,MAAM,CAAC;IACN,iBAAiB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC;QACpD,WAAW,EACT,mLAAmL;QACrL,OAAO,EAAE,YAAY;KACtB,CAAC;IACF,WAAW,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC;QAC1C,WAAW,EACT,0NAA0N;QAC5N,OAAO,EAAE,KAAK;KACf,CAAC;IACF,YAAY,EAAE,6BAA6B,CAAC,QAAQ,EAAE;IACtD,YAAY,EAAE,6BAA6B,CAAC,QAAQ,EAAE;IACtD,gBAAgB,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC;QAC/C,WAAW,EACT,6NAA6N;QAC/N,OAAO,EAAE,KAAK;KACf,CAAC;CACH,CAAC;KACD,OAAO,CAAC,yBAAyB,EAAE;IAClC,WAAW,EAAE;QACX,6CAA6C;QAC7C,qRAAqR;QACrR,wMAAwM;KACzM,CAAC,IAAI,CAAC,MAAM,CAAC;CACf,CAAC,CAAC;AAEL,MAAM,gCAAgC,GAAG,CAAC;KACvC,MAAM,CAAC;IACN,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;IAC/B,YAAY,EAAE,CAAC;SACZ,MAAM,CAAC;QACN,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,EAAE,OAAO,EAAE,gBAAgB,EAAE,CAAC;QACrD,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC;KACpD,CAAC;SACD,QAAQ,EAAE;IACb,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC;QAChC,WAAW,EACT,sRAAsR;KACzR,CAAC;IACF,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC3B,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC;QACtC,WAAW,EACT,2KAA2K;KAC9K,CAAC;CACH,CAAC;KACD,OAAO,CAAC,4BAA4B,EAAE;IACrC,WAAW,EACT,0WAA0W;CAC7W,CAAC,CAAC;AAEL,QAAQ,CAAC,YAAY,CAAC;IACpB,MAAM,EAAE,MAAM;IACd,IAAI,EAAE,oBAAoB;IAC1B,WAAW,EAAE,oBAAoB;IACjC,OAAO,EAAE,iCAAiC;IAC1C,WAAW,EAAE;QACX,0MAA0M;QAC1M,iVAAiV;QACjV,kDAAkD;QAClD,8RAA8R;QAC9R,2TAA2T;QAC3T,+SAA+S;QAC/S,sTAAsT;QACtT,qLAAqL;KACtL,CAAC,IAAI,CAAC,MAAM,CAAC;IACd,IAAI,EAAE,CAAC,YAAY,CAAC;IACpB,QAAQ,EAAE,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;IAC9C,OAAO,EAAE;QACP,IAAI,EAAE,EAAE,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,6BAA6B,EAAE,EAAE,EAAE;KACrF;IACD,SAAS,EAAE;QACT,GAAG,EAAE;YACH,WAAW,EACT,yIAAyI;YAC3I,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,gCAAgC,EAAE,EAAE;SAC9E;QACD,GAAG,EAAE;YACH,WAAW,EACT,iMAAiM;YACnM,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,gCAAgC,EAAE,EAAE;SAC9E;QACD,GAAG,EAAE;YACH,WAAW,EAAE;gBACX,SAAS;gBACT,0CAA0C;gBAC1C,oEAAoE;gBACpE,mJAAmJ;gBACnJ,wDAAwD;aACzD,CAAC,IAAI,CAAC,IAAI,CAAC;YACZ,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE;SACzD;QACD,GAAG,EAAE;YACH,WAAW,EAAE,yBAAyB;YACtC,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE;SACzD;QACD,GAAG,EAAE;YACH,WAAW,EACT,4MAA4M;YAC9M,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE;SACzD;QACD,GAAG,EAAE;YACH,WAAW,EACT,8GAA8G;YAChH,OAAO,EAAE,EAAE,kBAAkB,EAAE,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE;SACzD;KACF;CACF,CAAC,CAAC"} \ No newline at end of file diff --git a/server/public/dashboard-agents.html b/server/public/dashboard-agents.html index ae39c1d0a8..f8b9521219 100644 --- a/server/public/dashboard-agents.html +++ b/server/public/dashboard-agents.html @@ -1501,6 +1501,19 @@

Agents

? timeAgo(new Date(cs.last_checked_at)) : 'never'; + // Surface the verdict source so the operator knows whether the + // current status came from the scheduled heartbeat or their own + // owner-triggered test run. PR #4250 populates cs.verdict_source + // ('heartbeat' | 'owner_test' | 'manual' | 'webhook' | null when + // never run). Displayed inline with "Last checked" so the + // semantic shift on the public compliance contract is visible + // to the operator without having to read the changelog. + const verdictSourceLabel = cs.verdict_source === 'owner_test' ? ' (your test)' + : cs.verdict_source === 'heartbeat' ? ' (heartbeat)' + : cs.verdict_source === 'manual' ? ' (manual)' + : cs.verdict_source === 'webhook' ? ' (webhook)' + : ''; + const isPublic = cs.status !== 'opted_out'; return ` @@ -1527,7 +1540,7 @@

Agents

${visibilitySelectorHtml}
- Last checked: ${escapeHtml(lastChecked)} + Last checked: ${escapeHtml(lastChecked)}${escapeHtml(verdictSourceLabel)}
'; if (runTracks) { html += '
' + runTracks + '
'; diff --git a/server/src/addie/config-version.ts b/server/src/addie/config-version.ts index f184b82fcd..bf00b2c4d6 100644 --- a/server/src/addie/config-version.ts +++ b/server/src/addie/config-version.ts @@ -30,7 +30,7 @@ import { loadRules, loadResponseStyle } from './rules/index.js'; * Format: YYYY.MM.N where N is incremented for multiple changes in a month * Example: 2025.01.1, 2025.01.2, 2025.02.1 */ -export const CODE_VERSION = '2026.04.6'; +export const CODE_VERSION = '2026.05.1'; // Types export interface ConfigVersion { diff --git a/server/src/addie/mcp/member-tools.ts b/server/src/addie/mcp/member-tools.ts index a37b108c1d..e3b4733cf8 100644 --- a/server/src/addie/mcp/member-tools.ts +++ b/server/src/addie/mcp/member-tools.ts @@ -34,6 +34,7 @@ import { SAMPLE_BRIEFS, classifyCapabilityResolutionError, presentCapabilityResolutionError, + complianceResultToDbInput, type ComplyOptions, type ComplianceTrack, } from '../services/compliance-testing.js'; @@ -3559,27 +3560,90 @@ export function createMemberToolHandlers( ); } - // Record result if the user has an org with this agent saved + // Record result when the user has an org context for this agent. if (organizationId) { - try { - const context = await agentContextDb.getByOrgAndUrl(organizationId, resolved.resolvedUrl); - if (context) { - await agentContextDb.recordTest({ - agent_context_id: context.id, - scenario: 'quality_evaluation', - overall_passed: result.overall_status === 'passing', - steps_passed: result.summary.tracks_passed, - steps_failed: result.summary.tracks_failed, - total_duration_ms: result.total_duration_ms, - summary: result.summary.headline, - dry_run: true, - triggered_by: 'user', - user_id: memberContext?.workos_user?.workos_user_id, - agent_profile_json: result.agent_profile, - }); + // Write to canonical compliance tables when the calling org owns this agent. + // Mirrors resolveAgentOwnerOrg (registry-api.ts:4733) — joins organization_memberships + // to verify the acting user is still an active member of the owning org. + // Non-owner runs skip the canonical write and fall through to the legacy + // agent_test_history path below. + const workosUserId = memberContext?.workos_user?.workos_user_id; + let isAgentOwner = false; + if (workosUserId) { + try { + const ownerCheck = await query( + `SELECT 1 FROM member_profiles mp + JOIN organization_memberships om + ON om.workos_organization_id = mp.workos_organization_id + WHERE mp.workos_organization_id = $1 + AND mp.agents @> $2::jsonb + AND om.workos_user_id = $3 + LIMIT 1`, + [organizationId, JSON.stringify([{ url: resolved.resolvedUrl }]), workosUserId], + ); + isAgentOwner = ownerCheck.rows.length > 0; + } catch (ownerCheckError) { + logger.warn({ ownerCheckError }, 'evaluate_agent_quality: owner check failed, skipping canonical write'); + } + } + + if (isAgentOwner) { + try { + const metadata = await complianceDb.getRegistryMetadata(resolved.resolvedUrl); + // Skip canonical write if the owner has opted out of compliance monitoring. + if (!metadata?.compliance_opt_out) { + const dbInput = { + ...complianceResultToDbInput( + result, + resolved.resolvedUrl, + metadata?.lifecycle_stage ?? 'production', + 'owner_test', + ), + // Owner test runs are not dry runs — they update the live public record. + // (complianceResultToDbInput hard-codes dry_run: true; override here.) + dry_run: false, + }; + await complianceDb.recordComplianceRun(dbInput); + // notifyComplianceChange intentionally omitted: owner test runs are + // exploratory; compliance-change notifications fire on heartbeat + // transitions only to prevent iteration-loop spam. + } + } catch (error) { + logger.warn({ error, agentUrl: resolved.resolvedUrl }, 'Could not write owner test result to canonical compliance state'); + } + } + + // Legacy write to agent_contexts + agent_test_history. Retained ONLY + // for non-owner runs so a third-party who runs evaluate_agent_quality + // against someone else's agent still has a session-scoped audit trail + // (their own org's agent_test_history). Owner runs already wrote + // canonical state above (PR #4250); writing twice would split the + // audit and re-introduce the dual-write bug PR #4247 is closing. + // + // PR 4 of #4247 collapses agent_contexts.last_test_* into a derived + // view, after which this legacy block (and recordTest itself) drop + // entirely. + if (!isAgentOwner) { + try { + const context = await agentContextDb.getByOrgAndUrl(organizationId, resolved.resolvedUrl); + if (context) { + await agentContextDb.recordTest({ + agent_context_id: context.id, + scenario: 'quality_evaluation', + overall_passed: result.overall_status === 'passing', + steps_passed: result.summary.tracks_passed, + steps_failed: result.summary.tracks_failed, + total_duration_ms: result.total_duration_ms, + summary: result.summary.headline, + dry_run: true, + triggered_by: 'user', + user_id: memberContext?.workos_user?.workos_user_id, + agent_profile_json: result.agent_profile, + }); + } + } catch (error) { + logger.debug({ error }, 'Could not record quality evaluation result'); } - } catch (error) { - logger.debug({ error }, 'Could not record quality evaluation result'); } } diff --git a/server/src/db/compliance-db.ts b/server/src/db/compliance-db.ts index 236abff905..d3770fb889 100644 --- a/server/src/db/compliance-db.ts +++ b/server/src/db/compliance-db.ts @@ -11,7 +11,7 @@ const logger = baseLogger.child({ module: 'compliance-db' }); export type LifecycleStage = 'development' | 'testing' | 'production' | 'deprecated'; export type ComplianceStatus = 'passing' | 'degraded' | 'failing' | 'unknown'; export type OverallRunStatus = 'passing' | 'failing' | 'partial'; -export type TriggeredBy = 'heartbeat' | 'manual' | 'webhook'; +export type TriggeredBy = 'heartbeat' | 'manual' | 'webhook' | 'owner_test'; export type TrackStatus = 'pass' | 'fail' | 'partial' | 'skip' | 'silent'; /** @@ -118,6 +118,8 @@ export interface AgentComplianceStatus { previous_status: string | null; status_changed_at: Date | null; updated_at: Date; + /** triggered_by of the most recent non-dry-run in agent_compliance_runs */ + last_triggered_by: TriggeredBy | null; } export type StoryboardStatus = 'passing' | 'failing' | 'partial' | 'untested'; @@ -427,9 +429,15 @@ export class ComplianceDatabase { async getComplianceStatus(agentUrl: string): Promise { const result = await query( - `SELECT s.*, COALESCE(m.lifecycle_stage, 'production') AS lifecycle_stage + `SELECT s.*, COALESCE(m.lifecycle_stage, 'production') AS lifecycle_stage, + r.triggered_by AS last_triggered_by FROM agent_compliance_status s LEFT JOIN agent_registry_metadata m ON m.agent_url = s.agent_url + LEFT JOIN LATERAL ( + SELECT triggered_by FROM agent_compliance_runs + WHERE agent_url = s.agent_url AND dry_run = false + ORDER BY tested_at DESC LIMIT 1 + ) r ON true WHERE s.agent_url = $1`, [agentUrl], ); @@ -455,9 +463,15 @@ export class ComplianceDatabase { if (agentUrls.length === 0) return new Map(); const result = await query( - `SELECT s.*, COALESCE(m.lifecycle_stage, 'production') AS lifecycle_stage + `SELECT s.*, COALESCE(m.lifecycle_stage, 'production') AS lifecycle_stage, + r.triggered_by AS last_triggered_by FROM agent_compliance_status s LEFT JOIN agent_registry_metadata m ON m.agent_url = s.agent_url + LEFT JOIN LATERAL ( + SELECT triggered_by FROM agent_compliance_runs + WHERE agent_url = s.agent_url AND dry_run = false + ORDER BY tested_at DESC LIMIT 1 + ) r ON true WHERE s.agent_url = ANY($1)`, [agentUrls], ); diff --git a/server/src/db/migrations/471_owner_test_triggered_by.sql b/server/src/db/migrations/471_owner_test_triggered_by.sql new file mode 100644 index 0000000000..0e450bb1ad --- /dev/null +++ b/server/src/db/migrations/471_owner_test_triggered_by.sql @@ -0,0 +1,16 @@ +-- Add 'owner_test' to triggered_by CHECK constraints in compliance tables. +-- Owner-triggered storyboard runs (via evaluate_agent_quality) now write to +-- canonical compliance state, distinguished from heartbeat and dashboard-manual +-- runs by triggered_by = 'owner_test'. See issue #4247. + +ALTER TABLE agent_compliance_runs + DROP CONSTRAINT IF EXISTS valid_triggered_by, + ADD CONSTRAINT valid_triggered_by CHECK ( + triggered_by IN ('heartbeat', 'manual', 'webhook', 'owner_test') + ); + +ALTER TABLE agent_storyboard_status + DROP CONSTRAINT IF EXISTS valid_storyboard_triggered_by, + ADD CONSTRAINT valid_storyboard_triggered_by CHECK ( + triggered_by IS NULL OR triggered_by IN ('heartbeat', 'manual', 'webhook', 'owner_test') + ); diff --git a/server/src/db/migrations/472_backfill_agent_test_history_to_compliance_runs.sql b/server/src/db/migrations/472_backfill_agent_test_history_to_compliance_runs.sql new file mode 100644 index 0000000000..7f53b2f376 --- /dev/null +++ b/server/src/db/migrations/472_backfill_agent_test_history_to_compliance_runs.sql @@ -0,0 +1,149 @@ +-- Migration 472: backfill owner-triggered agent_test_history rows into +-- agent_compliance_runs as triggered_by='owner_test' rows. +-- +-- Part of the #4247 compliance-state unification (PR 3 of 4). PR #4250 +-- (PR 1) made evaluate_agent_quality write canonical for owner runs going +-- forward; this migration backfills the historical rows so the compliance +-- API and dashboard reflect the full test history, not just runs from the +-- PR #4250 deploy onward. +-- +-- Scope: backfill ONLY rows with user_id IS NOT NULL (real owner-triggered +-- tests). Third-party / scheduled / unattributed rows are NOT touched here +-- — the table drop is a separate follow-up that includes an S3 cold-storage +-- export of those rows so audit history isn't silently lost (see #4247 +-- Acceptance Criteria). +-- +-- Mapping: +-- agent_test_history.agent_context_id → agent_contexts.agent_url +-- agent_test_history.overall_passed → overall_status ('passing' | 'failing') +-- agent_test_history.steps_passed → tracks_passed +-- agent_test_history.steps_failed → tracks_failed +-- agent_test_history.total_duration_ms→ total_duration_ms +-- agent_test_history.summary → headline +-- agent_test_history.agent_profile_json → agent_profile_json +-- agent_test_history.started_at → tested_at +-- triggered_by → 'owner_test' (constant) +-- dry_run → false (PR #4250's owner path uses dry_run=false) +-- +-- Idempotency: backfilled rows carry the source agent_test_history.id in +-- observations_json.{backfill_source} so a re-run is a no-op via the +-- WHERE NOT EXISTS guard. + +INSERT INTO agent_compliance_runs ( + agent_url, + lifecycle_stage, + overall_status, + headline, + total_duration_ms, + tracks_json, + tracks_passed, + tracks_failed, + tracks_skipped, + tracks_partial, + agent_profile_json, + observations_json, + triggered_by, + dry_run, + tested_at +) +SELECT + ac.agent_url, + COALESCE(arm.lifecycle_stage, 'production') AS lifecycle_stage, + CASE WHEN ath.overall_passed THEN 'passing' ELSE 'failing' END AS overall_status, + ath.summary AS headline, + ath.total_duration_ms, + '[]'::jsonb AS tracks_json, + COALESCE(ath.steps_passed, 0) AS tracks_passed, + COALESCE(ath.steps_failed, 0) AS tracks_failed, + 0 AS tracks_skipped, + 0 AS tracks_partial, + ath.agent_profile_json, + jsonb_build_object( + 'backfill_source', 'agent_test_history', + 'backfill_source_id', ath.id::text, + 'backfill_migration', '472', + 'original_scenario', ath.scenario + ) AS observations_json, + 'owner_test' AS triggered_by, + FALSE AS dry_run, + ath.started_at AS tested_at +FROM agent_test_history ath +JOIN agent_contexts ac ON ac.id = ath.agent_context_id +LEFT JOIN agent_registry_metadata arm ON arm.agent_url = ac.agent_url +WHERE ath.user_id IS NOT NULL + AND NOT EXISTS ( + SELECT 1 FROM agent_compliance_runs acr + WHERE acr.observations_json->>'backfill_source_id' = ath.id::text + ); + +-- Update agent_compliance_status from the latest backfilled row per agent +-- so the dashboard immediately reflects the most recent owner-triggered +-- verdict for any agent that didn't yet have a heartbeat row. Skipped +-- when a status row already exists from a more recent heartbeat — heartbeat +-- always wins on freshness, last-write-wins is the contract pinned in +-- PR #4250's tests. +INSERT INTO agent_compliance_status ( + agent_url, + status, + lifecycle_stage, + last_checked_at, + last_passed_at, + last_failed_at, + tracks_summary_json, + headline, + status_changed_at, + last_triggered_by +) +SELECT DISTINCT ON (acr.agent_url) + acr.agent_url, + CASE WHEN acr.overall_status = 'passing' THEN 'passing' ELSE 'failing' END, + acr.lifecycle_stage, + acr.tested_at, + CASE WHEN acr.overall_status = 'passing' THEN acr.tested_at ELSE NULL END, + CASE WHEN acr.overall_status = 'failing' THEN acr.tested_at ELSE NULL END, + '{}'::jsonb, + acr.headline, + acr.tested_at, + 'owner_test' +FROM agent_compliance_runs acr +WHERE acr.observations_json->>'backfill_migration' = '472' +ORDER BY acr.agent_url, acr.tested_at DESC +ON CONFLICT (agent_url) DO UPDATE SET + status = CASE + WHEN agent_compliance_status.last_checked_at IS NULL + OR agent_compliance_status.last_checked_at < EXCLUDED.last_checked_at + THEN EXCLUDED.status + ELSE agent_compliance_status.status + END, + last_checked_at = GREATEST( + COALESCE(agent_compliance_status.last_checked_at, EXCLUDED.last_checked_at), + EXCLUDED.last_checked_at + ), + last_passed_at = CASE + WHEN EXCLUDED.last_passed_at IS NOT NULL + AND (agent_compliance_status.last_passed_at IS NULL + OR agent_compliance_status.last_passed_at < EXCLUDED.last_passed_at) + THEN EXCLUDED.last_passed_at + ELSE agent_compliance_status.last_passed_at + END, + last_failed_at = CASE + WHEN EXCLUDED.last_failed_at IS NOT NULL + AND (agent_compliance_status.last_failed_at IS NULL + OR agent_compliance_status.last_failed_at < EXCLUDED.last_failed_at) + THEN EXCLUDED.last_failed_at + ELSE agent_compliance_status.last_failed_at + END, + last_triggered_by = CASE + WHEN agent_compliance_status.last_checked_at IS NULL + OR agent_compliance_status.last_checked_at < EXCLUDED.last_checked_at + THEN EXCLUDED.last_triggered_by + ELSE agent_compliance_status.last_triggered_by + END; + +-- NOTE: this migration does NOT drop agent_test_history. The drop is +-- deferred to a follow-up migration that runs after: +-- (a) the 14-day soak window from PR #4250 deploy, +-- (b) the 7-day soak window from PR #4263 deploy, +-- (c) S3 cold-storage export of third-party rows (user_id IS NULL), +-- (d) row-count delta verification on staging. +-- See #4247 acceptance criteria. diff --git a/server/src/routes/registry-api.ts b/server/src/routes/registry-api.ts index 925aec6cda..936b7b5dc0 100644 --- a/server/src/routes/registry-api.ts +++ b/server/src/routes/registry-api.ts @@ -4253,6 +4253,7 @@ export function createRegistryApiRouter(config: RegistryApiConfig): Router { membership_tier_label: ownerMembership.membership_tier_label, subscription_status: ownerMembership.subscription_status, is_api_access_tier: ownerMembership.is_api_access_tier, + verdict_source: status.last_triggered_by ?? null, verified: badges.length > 0, verified_badges: badges.map(b => ({ role: b.role, diff --git a/server/src/schemas/registry.ts b/server/src/schemas/registry.ts index 4a59bdd2be..f29a961f11 100644 --- a/server/src/schemas/registry.ts +++ b/server/src/schemas/registry.ts @@ -341,6 +341,8 @@ export const AgentComplianceDetailSchema = z membership_tier_label: z.string().nullable().optional().openapi({ description: "Owner-scoped: human-readable label for membership_tier (e.g. 'Builder'). Null for non-owners." }), subscription_status: z.string().nullable().optional().openapi({ description: "Owner-scoped: the agent owner's subscription status (active, past_due, trialing, etc.). Null for non-owners." }), is_api_access_tier: z.boolean().optional().openapi({ description: "Owner-scoped: true when the owner's tier and subscription status grant badge eligibility. False for non-owners. Single source of truth — UI should not re-derive." }), + verdict_source: z.enum(["heartbeat", "owner_test", "manual", "webhook"]).nullable().optional() + .openapi({ description: "triggered_by value of the most recent non-dry-run compliance check. 'heartbeat' = scheduled run; 'owner_test' = agent owner triggered via evaluate_agent_quality. Null when no run has been recorded yet." }), verified: z.boolean().optional(), verified_badges: z.array(VerificationBadgeSchema).optional(), }) diff --git a/server/tests/unit/compliance-db-last-write-wins.test.ts b/server/tests/unit/compliance-db-last-write-wins.test.ts new file mode 100644 index 0000000000..bec028e4bc --- /dev/null +++ b/server/tests/unit/compliance-db-last-write-wins.test.ts @@ -0,0 +1,204 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; + +vi.mock('../../src/db/client.js', () => ({ + query: vi.fn(), + getClient: vi.fn(), +})); + +vi.mock('../../src/db/encryption.js', () => ({ + decrypt: vi.fn(), + encrypt: vi.fn(), + deriveKey: vi.fn(), +})); + +import { ComplianceDatabase } from '../../src/db/compliance-db.js'; +import { query, getClient } from '../../src/db/client.js'; + +const mockedQuery = vi.mocked(query); +const mockedGetClient = vi.mocked(getClient); + +const EMPTY = { rows: [], rowCount: 0, command: '', oid: 0, fields: [] }; + +function makeTransactionClient(queryResponses: Array<{ rows: any[] }>) { + const calls: string[] = []; + let idx = 0; + const client = { + query: vi.fn(async (sql: string) => { + calls.push(typeof sql === 'string' ? sql.trim().split(/\s+/)[0] : sql); + const resp = queryResponses[idx] ?? EMPTY; + idx++; + return { ...EMPTY, ...resp }; + }), + release: vi.fn(), + _calls: calls, + }; + return client; +} + +const AGENT_URL = 'https://agent.example.com'; + +function makeRunRow(triggeredBy: string) { + return { + id: 'run-001', + agent_url: AGENT_URL, + lifecycle_stage: 'production', + overall_status: 'passing', + headline: null, + total_duration_ms: 100, + tested_at: new Date(), + tracks_json: [], + tracks_passed: 1, + tracks_failed: 0, + tracks_skipped: 0, + tracks_partial: 0, + agent_profile_json: null, + observations_json: null, + triggered_by: triggeredBy, + dry_run: false, + }; +} + +const minimalInput = (triggeredBy: 'heartbeat' | 'owner_test') => ({ + agent_url: AGENT_URL, + lifecycle_stage: 'production' as const, + overall_status: 'passing' as const, + tracks_json: [{ track: 'core', status: 'pass' as const, scenario_count: 1, passed_count: 1, duration_ms: 100 }], + tracks_passed: 1, + tracks_failed: 0, + tracks_skipped: 0, + tracks_partial: 0, + triggered_by: triggeredBy, + dry_run: false, +}); + +describe('ComplianceDatabase — last-write-wins on agent_compliance_status', () => { + let db: ComplianceDatabase; + + beforeEach(() => { + db = new ComplianceDatabase(); + vi.clearAllMocks(); + }); + + /** + * Contract: agent_compliance_status uses ON CONFLICT DO UPDATE (not DO NOTHING). + * Every recordComplianceRun call — regardless of triggered_by — overwrites the + * materialized status row. A future change to "pick highest-priority source" or + * "first-write-wins" would break this test. + */ + it('always upserts status regardless of triggered_by — last-write-wins', async () => { + const statusRow = { rows: [{ status: 'passing', previous_status: null }] }; + + const client = makeTransactionClient([ + EMPTY, // BEGIN + { rows: [makeRunRow('heartbeat')] }, // INSERT agent_compliance_runs + statusRow, // UPSERT agent_compliance_status + EMPTY, // COMMIT + ]); + mockedGetClient.mockResolvedValueOnce(client as any); + + await db.recordComplianceRun(minimalInput('heartbeat')); + + const upsertCall = client.query.mock.calls.find( + ([sql]: [string]) => typeof sql === 'string' && sql.includes('ON CONFLICT (agent_url) DO UPDATE'), + ); + expect(upsertCall).toBeDefined(); + }); + + it('owner_test write at T+1 wins over prior heartbeat — triggered_by is forwarded verbatim', async () => { + const statusRow = { rows: [{ status: 'passing', previous_status: 'passing' }] }; + + const client1 = makeTransactionClient([ + EMPTY, + { rows: [makeRunRow('heartbeat')] }, + statusRow, + EMPTY, + ]); + mockedGetClient.mockResolvedValueOnce(client1 as any); + await db.recordComplianceRun(minimalInput('heartbeat')); + + const heartbeatRunInsert = client1.query.mock.calls.find( + ([sql]: [string]) => typeof sql === 'string' && sql.includes('INSERT INTO agent_compliance_runs'), + ); + expect(heartbeatRunInsert).toBeDefined(); + expect(heartbeatRunInsert![1]).toContain('heartbeat'); + + const client2 = makeTransactionClient([ + EMPTY, + { rows: [makeRunRow('owner_test')] }, + statusRow, + EMPTY, + ]); + mockedGetClient.mockResolvedValueOnce(client2 as any); + await db.recordComplianceRun(minimalInput('owner_test')); + + const ownerTestRunInsert = client2.query.mock.calls.find( + ([sql]: [string]) => typeof sql === 'string' && sql.includes('INSERT INTO agent_compliance_runs'), + ); + expect(ownerTestRunInsert).toBeDefined(); + expect(ownerTestRunInsert![1]).toContain('owner_test'); + + const ownerTestUpsert = client2.query.mock.calls.find( + ([sql]: [string]) => typeof sql === 'string' && sql.includes('ON CONFLICT (agent_url) DO UPDATE'), + ); + expect(ownerTestUpsert).toBeDefined(); + }); + + it('heartbeat at T+3 wins over prior owner_test at T+2 — no source-priority filtering', async () => { + const statusRow = { rows: [{ status: 'passing', previous_status: 'passing' }] }; + + const client = makeTransactionClient([ + EMPTY, + { rows: [makeRunRow('heartbeat')] }, + statusRow, + EMPTY, + ]); + mockedGetClient.mockResolvedValueOnce(client as any); + await db.recordComplianceRun(minimalInput('heartbeat')); + + const runInsert = client.query.mock.calls.find( + ([sql]: [string]) => typeof sql === 'string' && sql.includes('INSERT INTO agent_compliance_runs'), + ); + expect(runInsert![1]).toContain('heartbeat'); + + const upsert = client.query.mock.calls.find( + ([sql]: [string]) => typeof sql === 'string' && sql.includes('ON CONFLICT (agent_url) DO UPDATE'), + ); + expect(upsert).toBeDefined(); + }); + + it('getComplianceStatus LATERAL join returns last_triggered_by from most recent non-dry run', async () => { + const now = new Date(); + mockedQuery.mockResolvedValueOnce({ + rows: [{ + agent_url: AGENT_URL, + status: 'passing', + lifecycle_stage: 'production', + last_checked_at: now, + last_passed_at: now, + last_failed_at: null, + streak_days: 1, + streak_started_at: now, + tracks_summary_json: { core: 'pass' }, + headline: null, + previous_status: null, + status_changed_at: null, + updated_at: now, + last_triggered_by: 'owner_test', + }], + rowCount: 1, + command: '', + oid: 0, + fields: [], + }); + + const status = await db.getComplianceStatus(AGENT_URL); + + expect(status).not.toBeNull(); + expect(status!.last_triggered_by).toBe('owner_test'); + + const [sql] = mockedQuery.mock.calls[0]; + expect(sql).toContain('dry_run = false'); + expect(sql).toContain('ORDER BY tested_at DESC'); + expect(sql).toContain('LIMIT 1'); + }); +});