diff --git a/server/src/addie/mcp/member-tools.ts b/server/src/addie/mcp/member-tools.ts index 4635f37573..d311be67e2 100644 --- a/server/src/addie/mcp/member-tools.ts +++ b/server/src/addie/mcp/member-tools.ts @@ -107,6 +107,7 @@ import { issueDomainChallenge, verifyDomainChallenge } from '../../services/bran import { getWorkos } from '../../auth/workos-client.js'; import { resolveUserRole } from '../../utils/resolve-user-role.js'; import { recordAgentTestRun } from '../../db/agent-test-db.js'; +import { canonicalizeAgentUrl } from '../../db/publisher-db.js'; const memberDb = new MemberDatabase(); const agentContextDb = new AgentContextDatabase(); @@ -5653,15 +5654,29 @@ export function createMemberToolHandlers( return 'This feature requires an organization. Visit https://agenticadvertising.org/onboarding to create one (free, takes 2 minutes). You can still use the public test agent directly via `evaluate_agent_quality` without an organization.'; } - const agentUrl = input.agent_url as string; + const rawAgentUrl = input.agent_url as string; try { - const parsed = new URL(agentUrl); + const parsed = new URL(rawAgentUrl); if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') { return 'Agent URL must use https:// or http:// protocol.'; } } catch { return 'Invalid agent URL format. Please provide a full URL like https://your-agent.example.com'; } + // Canonicalize so this writes the same row as the REST POST /api/me/agents + // path (issue #3573). Query strings and fragments are rejected at the + // boundary; canonicalizeAgentUrl itself preserves them. + if (rawAgentUrl.includes('?') || rawAgentUrl.includes('#')) { + return 'Agent URL must not contain query strings or fragments.'; + } + const canonical = canonicalizeAgentUrl(rawAgentUrl); + if (!canonical) { + return 'Invalid agent URL format. Please provide a full URL like https://your-agent.example.com'; + } + // Bind to a typed local so closures (ensureAgentInProfile) see `string` + // rather than `string | null` — TS can't carry the narrowing across the + // function boundary. + const agentUrl: string = canonical; const agentName = input.agent_name as string | undefined; const authToken = input.auth_token as string | undefined; if (authToken !== undefined) { @@ -5737,7 +5752,9 @@ export function createMemberToolHandlers( } const agents = profile.agents || []; - const existing = agents.find((a: any) => a.url === agentUrl); + // Match in canonical form so a legacy non-canonical row gets + // updated in place rather than duplicated (issue #3573). + const existing = agents.find((a: any) => (canonicalizeAgentUrl(a.url) ?? a.url) === agentUrl); if (!existing) { // Default to members_only, not public. The public directory // requires an API-access tier (Professional+); defaulting to @@ -5947,7 +5964,11 @@ export function createMemberToolHandlers( return 'This feature requires an organization. Visit https://agenticadvertising.org/onboarding to create one (free, takes 2 minutes). You can still use the public test agent directly via `evaluate_agent_quality` without an organization.'; } - const agentUrl = input.agent_url as string; + const rawAgentUrl = input.agent_url as string; + // Canonicalize so this matches whatever shape save_agent / POST + // /api/me/agents wrote (issue #3573). A fallback to the raw URL keeps + // legacy non-canonical rows reachable for removal. + const agentUrl = canonicalizeAgentUrl(rawAgentUrl) ?? rawAgentUrl; type ProfileRemoveStatus = | { ok: true; removedFromProfile: boolean; agentName: string | null } @@ -5964,12 +5985,14 @@ export function createMemberToolHandlers( const profile = await memberDb.getProfileByOrgId(removeOrgId); if (!profile) return { ok: true, removedFromProfile: false, agentName: null }; const agents = profile.agents || []; - const existing = agents.find((a: any) => a.url === agentUrl); + // Match existing rows in canonical form so a legacy non-canonical + // entry is reachable for removal (issue #3573). + const existing = agents.find((a: any) => (canonicalizeAgentUrl(a.url) ?? a.url) === agentUrl); if (!existing) return { ok: true, removedFromProfile: false, agentName: null }; if ((existing as any).visibility === 'public') { return { ok: false, reason: 'public' }; } - const next = agents.filter((a: any) => a.url !== agentUrl); + const next = agents.filter((a: any) => (canonicalizeAgentUrl(a.url) ?? a.url) !== agentUrl); await memberDb.updateProfile(profile.id, { agents: next }); return { ok: true, removedFromProfile: true, agentName: (existing as any).name ?? null }; } catch (err) { diff --git a/server/src/federated-index.ts b/server/src/federated-index.ts index 7814f39208..1ec83382b1 100644 --- a/server/src/federated-index.ts +++ b/server/src/federated-index.ts @@ -1,5 +1,6 @@ import { FederatedIndexDatabase, type AgentPublisherAuthorization, type DiscoveredProperty, type PropertyIdentifier, type PublisherPropertySelector } from './db/federated-index-db.js'; import { MemberDatabase } from './db/member-db.js'; +import { canonicalizeAgentUrl } from './db/publisher-db.js'; import type { FederatedAgent, FederatedPublisher, DomainLookupResult, AgentType } from './types.js'; /** @@ -55,8 +56,13 @@ export class FederatedIndexService { const agentType = agentConfig.type || 'unknown'; if (type && agentType !== type) continue; - registeredAgents.set(agentConfig.url, { - url: agentConfig.url, + // Canonicalize the map key so two registrations differing only in + // case / trailing slash collapse to a single entry (issue #3573). + // Fall back to the raw url if canonicalization rejects (legacy + // whitespace etc.) so we never silently drop a stored agent. + const key = canonicalizeAgentUrl(agentConfig.url) ?? agentConfig.url; + registeredAgents.set(key, { + url: key, name: agentConfig.name || profile.display_name, type: agentType as FederatedAgent['type'], protocol: 'mcp', @@ -114,10 +120,15 @@ export class FederatedIndexService { const profiles = await this.memberDb.listProfiles({}); const registeredAgentUrls = new Map(); + // Key the enrichment map on canonical form (issue #3573) so a registered + // `https://Example.com/` matches a discovered `https://example.com`. + // Fall back to the raw url if canonicalization rejects so legacy + // non-canonical rows still enrich. for (const profile of profiles) { for (const agentConfig of profile.agents || []) { if (agentConfig.visibility === 'public') { - registeredAgentUrls.set(agentConfig.url, { + const key = canonicalizeAgentUrl(agentConfig.url) ?? agentConfig.url; + registeredAgentUrls.set(key, { slug: profile.slug, display_name: profile.display_name, }); @@ -130,7 +141,8 @@ export class FederatedIndexService { const authorizedAgents = authorizations .filter(auth => auth.source === 'adagents_json') .map(auth => { - const member = registeredAgentUrls.get(auth.agent_url); + const lookupKey = canonicalizeAgentUrl(auth.agent_url) ?? auth.agent_url; + const member = registeredAgentUrls.get(lookupKey); return { url: auth.agent_url, authorized_for: auth.authorized_for, @@ -141,7 +153,8 @@ export class FederatedIndexService { // Get sales agents claiming this domain const claims = await this.db.getSalesAgentsClaimingDomain(domain); const salesAgentsClaiming = claims.map(claim => { - const member = registeredAgentUrls.get(claim.discovered_by_agent); + const lookupKey = canonicalizeAgentUrl(claim.discovered_by_agent) ?? claim.discovered_by_agent; + const member = registeredAgentUrls.get(lookupKey); return { url: claim.discovered_by_agent, ...(member ? { member } : {}), @@ -190,11 +203,15 @@ export class FederatedIndexService { async getAllAgentDomainPairs(): Promise>> { const pairs = await this.db.getAllAgentDomainPairs(); const result = new Map>(); + // Canonicalize so legacy raw rows in the DB (the write path stores + // verbatim; SQL canonicalizes on read but not on bulk fetch) collapse + // into one key when a caller looks up by canonical form (issue #3573). for (const { agent_url, publisher_domain } of pairs) { - let domains = result.get(agent_url); + const key = canonicalizeAgentUrl(agent_url) ?? agent_url; + let domains = result.get(key); if (!domains) { domains = new Set(); - result.set(agent_url, domains); + result.set(key, domains); } domains.add(publisher_domain); } diff --git a/server/src/routes/member-agents.ts b/server/src/routes/member-agents.ts index 61f051c37b..ca14b3b7f1 100644 --- a/server/src/routes/member-agents.ts +++ b/server/src/routes/member-agents.ts @@ -33,6 +33,7 @@ import { import { resolvePrimaryOrganization } from '../db/users-db.js'; import { resolveUserOrgMembership } from '../utils/resolve-user-org-membership.js'; import { getPool } from '../db/client.js'; +import { canonicalizeAgentUrl } from '../db/publisher-db.js'; import type { AgentConfig } from '../types.js'; import { isValidAgentType } from '../types.js'; import { resolveAgentTypes, logResolvedTypeChanges } from './member-profiles.js'; @@ -319,8 +320,12 @@ export function createMemberAgentsRouter(config: MemberAgentsRouterConfig): Rout // lifecycle_stage / check_interval_hours / opt-out the heartbeat or // dashboard wrote earlier; we only seed the row when it doesn't // exist. Defaults inherit from the column DDL. + // Canonicalize before seeding the metadata table. Handlers above + // already canonicalize, but this keeps any future write site honest + // and matches the canonical-form invariant the rest of the registry + // relies on (issue #3573). const urls = typed - .map(a => (a && typeof a.url === 'string' ? a.url : null)) + .map(a => (a && typeof a.url === 'string' ? canonicalizeAgentUrl(a.url) : null)) .filter((u): u is string => u !== null); if (urls.length > 0) { await client.query( @@ -405,6 +410,16 @@ export function createMemberAgentsRouter(config: MemberAgentsRouterConfig): Rout if (!isParseableUrl(body.url)) { return res.status(400).json({ error: 'url must be a valid URL' }); } + // Query strings and fragments have no place in agent identity (issue + // #3573). Reject at the boundary — `canonicalizeAgentUrl` itself + // preserves them verbatim, so the check belongs here. + if (body.url.includes('?') || body.url.includes('#')) { + return res.status(400).json({ error: 'url must not contain query strings or fragments' }); + } + const canonicalUrl = canonicalizeAgentUrl(body.url); + if (!canonicalUrl) { + return res.status(400).json({ error: 'url is not a valid agent URL' }); + } // `type` is required from the caller — never inferred. 'unknown' is // reserved for server-side smuggle protection (resolveAgentTypes), not // for client input. The caller MUST declare what kind of agent this is. @@ -414,7 +429,10 @@ export function createMemberAgentsRouter(config: MemberAgentsRouterConfig): Rout message: 'Specify one of: brand, rights, measurement, governance, creative, sales, buying, signals.', }); } - const targetUrl = body.url; + // Persist and compare in canonical form so the registered side + // collapses with the discovered side (issue #3573). + body.url = canonicalUrl; + const targetUrl = canonicalUrl; // Auto-bootstrap a private member profile if the caller's org doesn't // have one yet. Reuses `ensureMemberProfileExists` (the same helper @@ -440,7 +458,9 @@ export function createMemberAgentsRouter(config: MemberAgentsRouterConfig): Rout } const result = await applyMemberAgentMutation(orgId, (existing) => { - const idx = existing.findIndex((a) => a.url === targetUrl); + // Match existing rows in canonical form so a legacy non-canonical + // entry (pre-#3573) gets upgraded in place rather than duplicated. + const idx = existing.findIndex((a) => (canonicalizeAgentUrl(a.url) ?? a.url) === targetUrl); const isUpdate = idx !== -1; const next = isUpdate ? existing.map((a, i) => (i === idx ? { ...a, ...body } : a)) @@ -470,16 +490,26 @@ export function createMemberAgentsRouter(config: MemberAgentsRouterConfig): Rout if (!orgId) return; // Express already URL-decodes path params; do not double-decode. - const targetUrl = req.params.url; + // Canonicalize so a member submitting `HTTPS://Example.com/` matches + // the row stored canonically (issue #3573). + const targetUrl = canonicalizeAgentUrl(req.params.url); + if (!targetUrl) { + return res.status(400).json({ error: 'url is not a valid agent URL' }); + } const patch = (req.body ?? {}) as Partial; // Refuse to silently drop a `url` rename. Tell the caller; never guess. - if (typeof patch.url === 'string' && patch.url !== targetUrl) { - return res.status(400).json({ - error: 'url_immutable', - message: - 'url cannot be changed via PATCH. DELETE the old entry and POST the new url.', - }); + // Compare in canonical form so `https://Example.com/` in the path and + // `https://example.com` in the body aren't flagged as a rename. + if (typeof patch.url === 'string') { + const patchCanonical = canonicalizeAgentUrl(patch.url); + if (patchCanonical !== targetUrl) { + return res.status(400).json({ + error: 'url_immutable', + message: + 'url cannot be changed via PATCH. DELETE the old entry and POST the new url.', + }); + } } // If `type` is being patched, it must be a valid declared type. 'unknown' // is server-side-only. Omitting `type` from the patch is fine — the @@ -494,7 +524,8 @@ export function createMemberAgentsRouter(config: MemberAgentsRouterConfig): Rout } const result = await applyMemberAgentMutation(orgId, (existing) => { - const idx = existing.findIndex((a) => a.url === targetUrl); + // Canonical-form match so a legacy non-canonical row is still found. + const idx = existing.findIndex((a) => (canonicalizeAgentUrl(a.url) ?? a.url) === targetUrl); if (idx === -1) { return { kind: 'reject' as const, @@ -521,10 +552,15 @@ export function createMemberAgentsRouter(config: MemberAgentsRouterConfig): Rout if (!orgId) return; // Express already URL-decodes path params; do not double-decode. - const targetUrl = req.params.url; + // Canonicalize so non-canonical url-encoded paths still match the + // canonical row stored on disk (issue #3573). + const targetUrl = canonicalizeAgentUrl(req.params.url); + if (!targetUrl) { + return res.status(400).json({ error: 'url is not a valid agent URL' }); + } const result = await applyMemberAgentMutation(orgId, (existing) => { - const idx = existing.findIndex((a) => a.url === targetUrl); + const idx = existing.findIndex((a) => (canonicalizeAgentUrl(a.url) ?? a.url) === targetUrl); if (idx === -1) { return { kind: 'reject' as const, @@ -551,7 +587,7 @@ export function createMemberAgentsRouter(config: MemberAgentsRouterConfig): Rout } return { kind: 'commit' as const, - next: existing.filter((a) => a.url !== targetUrl), + next: existing.filter((a) => (canonicalizeAgentUrl(a.url) ?? a.url) !== targetUrl), status: 204, }; }); diff --git a/server/src/routes/member-profiles.ts b/server/src/routes/member-profiles.ts index 06e357344e..6039b4c5ef 100644 --- a/server/src/routes/member-profiles.ts +++ b/server/src/routes/member-profiles.ts @@ -19,6 +19,7 @@ import { MemberDatabase } from "../db/member-db.js"; import { BrandDatabase, resolveBrandFromJson } from "../db/brand-db.js"; import { BrandManager } from "../brand-manager.js"; import { OrganizationDatabase, hasApiAccess, readMembershipTierFromClient, resolveMembershipTier, VALID_REVENUE_TIERS, VALID_MEMBERSHIP_TIERS } from "../db/organization-db.js"; +import { canonicalizeAgentUrl } from "../db/publisher-db.js"; import { OrgKnowledgeDatabase } from "../db/org-knowledge-db.js"; import { linkDomain } from "../db/organization-domains-db.js"; import { autoLinkByVerifiedDomain } from "../db/membership-db.js"; @@ -1140,6 +1141,29 @@ export function createMemberProfileRouter(config: MemberProfileRoutesConfig): Ro // the POST create path via gateAgentVisibilityForCaller. let warnings: VisibilityWarning[] = []; if (Array.isArray(updates.agents)) { + // Canonicalize every agent url before any downstream processing + // (issue #3573). The per-agent POST/PATCH path canonicalizes at + // the handler boundary; the bulk path must match so the same write + // applied via two surfaces lands as the same row. + for (let i = 0; i < updates.agents.length; i++) { + const a = updates.agents[i] as AgentConfig & { url?: unknown }; + if (a && typeof a.url === 'string') { + if (a.url.includes('?') || a.url.includes('#')) { + return res.status(400).json({ + error: 'invalid_agent_url', + message: `agents[${i}].url must not contain query strings or fragments`, + }); + } + const canonical = canonicalizeAgentUrl(a.url); + if (!canonical) { + return res.status(400).json({ + error: 'invalid_agent_url', + message: `agents[${i}].url is not a valid agent URL`, + }); + } + a.url = canonical; + } + } const localOrgForTier = await orgDb.getOrganization(targetOrgId); const callerHasApi = hasApiAccess(resolveMembershipTier(localOrgForTier)); const gated = gateAgentVisibilityForCaller(updates.agents, callerHasApi); diff --git a/server/tests/integration/member-agents-api.test.ts b/server/tests/integration/member-agents-api.test.ts index bf6ab6e9dd..eda5a24f6b 100644 --- a/server/tests/integration/member-agents-api.test.ts +++ b/server/tests/integration/member-agents-api.test.ts @@ -698,4 +698,185 @@ describe('Per-agent REST API (/api/me/agents)', () => { expect(profile!.agents).toHaveLength(1); expect(profile!.agents[0].url).toBe('https://pub.example/mcp'); }); + + // ===== URL canonicalization (issue #3573) ===== + // + // The registered side now applies `canonicalizeAgentUrl` at every write + // boundary so the JSONB / `agent_registry_metadata` rows collapse with + // the canonical form the discovered (crawler) path already uses. + // Pins: lowercase + trailing-slash collapse; query/fragment rejected. + + it('POST canonicalizes the url before persisting (lowercase + strip trailing slash)', async () => { + const orgId = `${TEST_PREFIX}_canon_post`; + const userId = `${TEST_PREFIX}_canon_post_user`; + await seedOrg(pool, orgId, 'individual_professional'); + await provisionUser(userId, orgId); + await createProfile(orgId, 'canonpost'); + + (app as any).setCurrentUser(userId); + const res = await request(app) + .post('/api/me/agents') + .send({ url: 'HTTPS://Canon.Example/Agent/', name: 'C', type: 'sales', visibility: 'private' }); + expect(res.status).toBe(201); + expect(res.body.agent.url).toBe('https://canon.example/agent'); + + const profile = await memberDb.getProfileByOrgId(orgId); + const match = profile!.agents.find((a) => a.url === 'https://canon.example/agent'); + expect(match).toBeDefined(); + + const meta = await pool.query<{ agent_url: string }>( + `SELECT agent_url FROM agent_registry_metadata WHERE agent_url = $1`, + ['https://canon.example/agent'], + ); + expect(meta.rowCount).toBe(1); + + await pool.query('DELETE FROM agent_registry_metadata WHERE agent_url = $1', [ + 'https://canon.example/agent', + ]); + }); + + it('POST is idempotent across non-canonical and canonical forms (no duplicate JSONB or metadata rows)', async () => { + const orgId = `${TEST_PREFIX}_canon_idem`; + const userId = `${TEST_PREFIX}_canon_idem_user`; + await seedOrg(pool, orgId, 'individual_professional'); + await provisionUser(userId, orgId); + await createProfile(orgId, 'canonidem'); + + (app as any).setCurrentUser(userId); + const first = await request(app) + .post('/api/me/agents') + .send({ url: 'HTTPS://Idem.Example/MCP/', name: 'First', type: 'sales', visibility: 'private' }); + expect(first.status).toBe(201); + expect(first.body.agent.url).toBe('https://idem.example/mcp'); + + const second = await request(app) + .post('/api/me/agents') + .send({ url: 'https://idem.example/mcp', name: 'Second', type: 'sales', visibility: 'private' }); + expect(second.status).toBe(200); + expect(second.body.agent.name).toBe('Second'); + + const profile = await memberDb.getProfileByOrgId(orgId); + const matching = profile!.agents.filter((a) => a.url === 'https://idem.example/mcp'); + expect(matching).toHaveLength(1); + + const meta = await pool.query<{ agent_url: string }>( + `SELECT agent_url FROM agent_registry_metadata WHERE agent_url IN ($1, $2)`, + ['https://idem.example/mcp', 'HTTPS://Idem.Example/MCP/'], + ); + expect(meta.rowCount).toBe(1); + expect(meta.rows[0].agent_url).toBe('https://idem.example/mcp'); + + await pool.query('DELETE FROM agent_registry_metadata WHERE agent_url = $1', [ + 'https://idem.example/mcp', + ]); + }); + + it('PATCH matches the canonical row when the path is non-canonical', async () => { + const orgId = `${TEST_PREFIX}_canon_patch`; + const userId = `${TEST_PREFIX}_canon_patch_user`; + await seedOrg(pool, orgId, 'individual_professional'); + await provisionUser(userId, orgId); + await createProfile(orgId, 'canonpatch'); + + (app as any).setCurrentUser(userId); + // Seed a canonical row first via POST. + await request(app) + .post('/api/me/agents') + .send({ url: 'https://patch.example/mcp', name: 'P', type: 'sales', visibility: 'private' }); + + // PATCH with a non-canonical url-encoded path — must collapse onto the canonical row. + const noncanonical = encodeURIComponent('HTTPS://Patch.Example/MCP/'); + const res = await request(app) + .patch(`/api/me/agents/${noncanonical}`) + .send({ name: 'Renamed' }); + expect(res.status).toBe(200); + expect(res.body.agent.name).toBe('Renamed'); + expect(res.body.agent.url).toBe('https://patch.example/mcp'); + + await pool.query('DELETE FROM agent_registry_metadata WHERE agent_url = $1', [ + 'https://patch.example/mcp', + ]); + }); + + it('PATCH allows body.url to differ from the path only in case/trailing slash', async () => { + const orgId = `${TEST_PREFIX}_canon_patch_body`; + const userId = `${TEST_PREFIX}_canon_patch_body_user`; + await seedOrg(pool, orgId, 'individual_professional'); + await provisionUser(userId, orgId); + await createProfile(orgId, 'canonpatchbody'); + + (app as any).setCurrentUser(userId); + await request(app) + .post('/api/me/agents') + .send({ url: 'https://patchbody.example/mcp', name: 'P', type: 'sales', visibility: 'private' }); + + const path = encodeURIComponent('https://patchbody.example/mcp'); + const res = await request(app) + .patch(`/api/me/agents/${path}`) + .send({ name: 'OK', url: 'HTTPS://PatchBody.Example/MCP/' }); + expect(res.status).toBe(200); + + await pool.query('DELETE FROM agent_registry_metadata WHERE agent_url = $1', [ + 'https://patchbody.example/mcp', + ]); + }); + + it('DELETE matches the canonical row when the path is non-canonical', async () => { + const orgId = `${TEST_PREFIX}_canon_delete`; + const userId = `${TEST_PREFIX}_canon_delete_user`; + await seedOrg(pool, orgId, 'individual_professional'); + await provisionUser(userId, orgId); + await createProfile(orgId, 'canondelete'); + + (app as any).setCurrentUser(userId); + await request(app) + .post('/api/me/agents') + .send({ url: 'https://del.example/mcp', name: 'D', type: 'sales', visibility: 'private' }); + + const noncanonical = encodeURIComponent('HTTPS://Del.Example/MCP/'); + const res = await request(app).delete(`/api/me/agents/${noncanonical}`); + expect(res.status).toBe(204); + + const profile = await memberDb.getProfileByOrgId(orgId); + expect(profile!.agents.find((a) => a.url === 'https://del.example/mcp')).toBeUndefined(); + + await pool.query('DELETE FROM agent_registry_metadata WHERE agent_url = $1', [ + 'https://del.example/mcp', + ]); + }); + + it('POST returns 400 when url contains a query string or fragment', async () => { + const orgId = `${TEST_PREFIX}_canon_qf`; + const userId = `${TEST_PREFIX}_canon_qf_user`; + await seedOrg(pool, orgId, 'individual_professional'); + await provisionUser(userId, orgId); + await createProfile(orgId, 'canonqf'); + + (app as any).setCurrentUser(userId); + const withQuery = await request(app) + .post('/api/me/agents') + .send({ url: 'https://q.example/mcp?v=1', type: 'sales' }); + expect(withQuery.status).toBe(400); + + const withFrag = await request(app) + .post('/api/me/agents') + .send({ url: 'https://q.example/mcp#frag', type: 'sales' }); + expect(withFrag.status).toBe(400); + }); + + it('POST returns 400 when url contains an embedded wildcard', async () => { + const orgId = `${TEST_PREFIX}_canon_wild`; + const userId = `${TEST_PREFIX}_canon_wild_user`; + await seedOrg(pool, orgId, 'individual_professional'); + await provisionUser(userId, orgId); + await createProfile(orgId, 'canonwild'); + + (app as any).setCurrentUser(userId); + // `*` in the path parses cleanly via `new URL` but is rejected by + // canonicalizeAgentUrl per migration 440's CHECK constraint. + const res = await request(app) + .post('/api/me/agents') + .send({ url: 'https://wild.example/*/mcp', type: 'sales' }); + expect(res.status).toBe(400); + }); }); diff --git a/server/tests/integration/registry-crawler-cache.test.ts b/server/tests/integration/registry-crawler-cache.test.ts index c1ea167097..f61da0f656 100644 --- a/server/tests/integration/registry-crawler-cache.test.ts +++ b/server/tests/integration/registry-crawler-cache.test.ts @@ -820,4 +820,107 @@ describe('Registry crawler cache (PR 2 of #3177)', () => { expect(remaining.rows[0].source).toBe('agent_claim'); }); }); + + // ===== Cross-seam canonicalization pins (issue #3573) ===== + // + // The registered side (member_profiles.agents) and the discovered side + // (agent_publisher_authorizations) must agree on canonical form so a + // member badge enriches a discovered authorization that differs only in + // case / trailing slash. Scheme mismatch intentionally does NOT collapse. + describe('cross-seam canonicalization (issue #3573)', () => { + const XSEAM_DOMAIN = 'xseam.crawler-cache.example.com'; + const XSEAM_ORG = 'org_xseam_3573'; + const XSEAM_SLUG = 'xseam-3573'; + + beforeEach(async () => { + await pool.query( + `DELETE FROM agent_publisher_authorizations WHERE publisher_domain = $1`, + [XSEAM_DOMAIN], + ); + await pool.query( + `DELETE FROM member_profiles WHERE workos_organization_id = $1`, + [XSEAM_ORG], + ); + await pool.query( + `DELETE FROM organizations WHERE workos_organization_id = $1`, + [XSEAM_ORG], + ); + await pool.query( + `INSERT INTO organizations (workos_organization_id, name, is_personal, created_at, updated_at) + VALUES ($1, $2, true, NOW(), NOW())`, + [XSEAM_ORG, 'XSeam Test Org'], + ); + }); + + afterAll(async () => { + await pool.query( + `DELETE FROM agent_publisher_authorizations WHERE publisher_domain = $1`, + [XSEAM_DOMAIN], + ); + await pool.query( + `DELETE FROM member_profiles WHERE workos_organization_id = $1`, + [XSEAM_ORG], + ); + await pool.query( + `DELETE FROM organizations WHERE workos_organization_id = $1`, + [XSEAM_ORG], + ); + }); + + it('Pin 1: registered https://agent.example/ collapses with discovered https://agent.example (member badge enriches)', async () => { + // Member registered the URL with a trailing slash; pretend a legacy + // row landed in JSONB before the write-side canonicalization. + await pool.query( + `INSERT INTO member_profiles (workos_organization_id, display_name, slug, is_public, agents) + VALUES ($1, $2, $3, false, $4::jsonb)`, + [ + XSEAM_ORG, + 'XSeam Display', + XSEAM_SLUG, + JSON.stringify([{ url: 'https://agent.example/', visibility: 'public', type: 'sales' }]), + ], + ); + // Crawler discovered the URL without a trailing slash. + await pool.query( + `INSERT INTO agent_publisher_authorizations (agent_url, publisher_domain, source) + VALUES ($1, $2, 'adagents_json')`, + ['https://agent.example', XSEAM_DOMAIN], + ); + + const result = await federatedIndex.lookupDomain(XSEAM_DOMAIN); + expect(result.authorized_agents).toHaveLength(1); + expect(result.authorized_agents[0].url).toBe('https://agent.example'); + // Member enrichment must collapse onto the discovered authorization + // despite the trailing-slash mismatch. + expect(result.authorized_agents[0].member).toBeDefined(); + expect(result.authorized_agents[0].member?.slug).toBe(XSEAM_SLUG); + }); + + it('Pin 2: scheme mismatch (http vs https) intentionally does NOT collapse', async () => { + // Registered as http; discovered as https. Different security + // posture — the issue explicitly calls this out as non-collapse. + await pool.query( + `INSERT INTO member_profiles (workos_organization_id, display_name, slug, is_public, agents) + VALUES ($1, $2, $3, false, $4::jsonb)`, + [ + XSEAM_ORG, + 'XSeam Display', + XSEAM_SLUG, + JSON.stringify([{ url: 'http://agent.example', visibility: 'public', type: 'sales' }]), + ], + ); + await pool.query( + `INSERT INTO agent_publisher_authorizations (agent_url, publisher_domain, source) + VALUES ($1, $2, 'adagents_json')`, + ['https://agent.example', XSEAM_DOMAIN], + ); + + const result = await federatedIndex.lookupDomain(XSEAM_DOMAIN); + expect(result.authorized_agents).toHaveLength(1); + expect(result.authorized_agents[0].url).toBe('https://agent.example'); + // Member badge MUST NOT enrich — the registered http and discovered + // https are different agents per the issue. + expect(result.authorized_agents[0].member).toBeUndefined(); + }); + }); });