From effe36c28e977cd4c394eb18388ac8a08f37a4dd Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Mon, 27 Apr 2026 07:50:51 -0400 Subject: [PATCH 01/12] fix(billing): bump Stripe API version pin to 2026-04-22.dahlia Drive-by unblock for the precommit typecheck on this branch. Stripe SDK was upgraded; the apiVersion string in stripe-client.ts was missed and the type literal expected the newer date. Unrelated to the IdentityMatch spec work in the rest of this PR. Co-Authored-By: Claude Opus 4.7 (1M context) --- server/src/billing/stripe-client.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/billing/stripe-client.ts b/server/src/billing/stripe-client.ts index 11a99da9f6..3c263019e5 100644 --- a/server/src/billing/stripe-client.ts +++ b/server/src/billing/stripe-client.ts @@ -14,7 +14,7 @@ if (!STRIPE_SECRET_KEY) { export const stripe = STRIPE_SECRET_KEY ? new Stripe(STRIPE_SECRET_KEY, { - apiVersion: '2026-03-25.dahlia', + apiVersion: '2026-04-22.dahlia', }) : null; From d136a7688537c571d7f5b4823692ac0e5a9248fb Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Mon, 27 Apr 2026 07:52:05 -0400 Subject: [PATCH 02/12] spec(tmp): IdentityMatch & frequency capping architecture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Architecture-decision PR for the buyer-side IdentityMatch surface behind TMP. Wire delta is intentionally minimal — one additive field, one deprecation — so review focuses on architecture, not schema breadth. ## Wire-spec changes - identity-match-response.json: add `serve_window_sec` (1-300, default 60). Per-package single-shot fcap window: after serving the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again. Not a router response cache TTL. - identity-match-response.json: deprecate `ttl_sec`. Documented as a cache TTL but operationally functioned as a serve throttle, conflating two distinct concerns. 6-week deprecation notice in the CHANGELOG; earliest removal 2026-06-07. ## Architecture spec - specs/identitymatch-fcap-architecture.md captures the buyer-side data model: `fcap_keys[]` label model with required tenant prefix + charset constraint; no required identity canonicalization; multi-identity merge_rule semantics with MAX recommended for graph-canonicalizing operators; `sync_audiences` as the audience on-ramp; valkey schema as a convention (Redis primitives, not a database-enforced schema). - Buyer-internal records modeled directly on Redis primitives (HASH/SET/ZSET). No proto, no JSON Schema for these — cross-language interop is at the Redis-operation level, not via serialization. - TMP IdentityMatch service stays a downstream read replica. Writes to the IdentityMatch store happen via the SDK; production management plane is SDK, not a wire surface. - Five conformance scenarios with full Redis-command walkthroughs. - OpenRTB 2.6 User.eids cross-walk for buyer-side codebases bridging protocols. - Six-workstream rollout plan: this PR, doc promotion to docs/trusted-match/, @adcp/client V6 SDK methods (#1005), adcp-go/identitymatch reference impl, training agent integration, conformance harness, TMP graduation. - Eight tracked deferred follow-ups for security/privacy issues surfaced during pre-merge review (TMPX harvest, audience-membership oracle, consent revocation, side-channel via eligibility deltas, hashed_email leak surface, DoS amplification, fcap-policy wire question, identity-graph plug-point). All TMP surfaces remain x-status: experimental. Wire change in this release is purely additive; the ttl_sec removal lands in a later 3.0.x release ≥ 6 weeks after notice. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../identitymatch-fcap-architecture-spec.md | 19 + CHANGELOG.md | 6 + specs/identitymatch-fcap-architecture.md | 552 ++++++++++++++++++ static/schemas/source/index.json | 3 +- .../source/tmp/identity-match-response.json | 11 +- 5 files changed, 588 insertions(+), 3 deletions(-) create mode 100644 .changeset/identitymatch-fcap-architecture-spec.md create mode 100644 specs/identitymatch-fcap-architecture.md diff --git a/.changeset/identitymatch-fcap-architecture-spec.md b/.changeset/identitymatch-fcap-architecture-spec.md new file mode 100644 index 0000000000..360ee7e66e --- /dev/null +++ b/.changeset/identitymatch-fcap-architecture-spec.md @@ -0,0 +1,19 @@ +--- +"adcontextprotocol": patch +--- + +IdentityMatch & frequency capping architecture spec, plus a wire-side fix to the response throttle field. Adds: + +- `specs/identitymatch-fcap-architecture.md` — design spec consolidating the buyer-internal valkey schema, `fcap_keys[]` label model with required tenant-prefixing, identity-handling rules (no required canonicalization), and storyboard conformance scenarios behind TMP IdentityMatch. +- `identity-match-response.json` — adds `serve_window_sec` (integer, 1–300, default 60) and deprecates `ttl_sec`. The original `ttl_sec` field was documented as a router response cache TTL but operationally functioned as a per-package single-shot fcap, conflating two distinct concerns. `serve_window_sec` carries the corrected semantic: at most one impression per eligible package per user during this window. Multi-impression fcap is handled separately by buyer-side exposure records and policies. +- `CHANGELOG.md` — 6-week deprecation notice for `ttl_sec` removal per the experimental-status contract. Earliest landing: 2026-06-07. + +The buyer-internal records (audience, exposure, package, fcap_policy) are documented as a **valkey schema** — Redis key patterns + primitive types (HASH / SET / ZSET) + field names within each. Cross-language interop between JS impression-trackers and Go IdentityMatch services is handled by Redis client libraries; agreement is at the operation level (`HINCRBY exposure:... count 1`, `SMEMBERS audience:...`), not via a binary serialization layer. No proto, no JSON Schema for these records — they aren't wire envelopes and they aren't binary blobs. + +The TMPX wire format itself is **unchanged** — already specified in `docs/trusted-match/specification.mdx` as a compact binary layout with version/timestamp/country/8-byte nonce/typed identity entries, with replay defense via master-side nonce dedup. + +JSON Schema continues to govern wire/RPC surfaces. Buyer-internal valkey records live in the spec doc as a Redis schema. Each contract uses the right tool for its job. + +All TMP surfaces remain `x-status: experimental`. Wire change in this release is purely additive (`serve_window_sec`); the `ttl_sec` removal lands in a later 3.0.x release ≥ 6 weeks after notice. Storyboard YAML deferred until TMP graduates from `experimental_features` into `supported_protocols` — buyer SDKs implement the five conformance scenarios as integration tests now. + +Several deferred security and privacy follow-ups are documented in the spec: TMPX harvest → competitor-suppression attack, eligibility-as-audience-membership oracle, consent revocation between IdentityMatch and impression, side-channel via eligibility deltas, hashed_email leak surface, and DoS amplification via large `package_ids[]`. None block this PR; each warrants a focused follow-up. diff --git a/CHANGELOG.md b/CHANGELOG.md index bbc303a47d..36c5aabced 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Upcoming + +### Deprecation Notices — experimental surfaces + +- **TMP `identity-match-response.ttl_sec` is deprecated; replaced by `serve_window_sec`.** Notice published 2026-04-26. The `ttl_sec` field was documented as a router response cache TTL but operationally functioned as a per-package single-shot fcap, conflating two distinct concerns and silently breaking either when tuned. Replacement field `serve_window_sec` (integer, 1–300, default 60) carries the corrected semantic — *after serving the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again.* This is **not** a router response cache. Multi-impression frequency capping is a separate concern handled by buyer-side exposure records and policies, updated out-of-band via TMPX impression callbacks regardless of this window. During the deprecation period, senders SHOULD populate both `ttl_sec` and `serve_window_sec` with the same value; receivers SHOULD prefer `serve_window_sec` when both are present. Per the [experimental-status contract](docs/reference/experimental-status.mdx), the `ttl_sec` field MAY be removed no earlier than **2026-06-07** (6 weeks after this notice) in a 3.0.x release. `serve_window_sec` lands additively in 3.0.1 alongside this notice. Tracked in `specs/identitymatch-fcap-architecture.md`. + ## 3.0.0 See [release notes](docs/reference/release-notes.mdx) for migration guidance, or [prerelease upgrade notes](docs/reference/migration/prerelease-upgrades.mdx) for rc.3 adopters. diff --git a/specs/identitymatch-fcap-architecture.md b/specs/identitymatch-fcap-architecture.md new file mode 100644 index 0000000000..21eda0d567 --- /dev/null +++ b/specs/identitymatch-fcap-architecture.md @@ -0,0 +1,552 @@ +# IdentityMatch & Frequency Capping — Architecture Spec + +**Status**: draft +**Target release**: 3.0.x (TMP is `x-status: experimental`) +**Related**: `static/schemas/source/tmp/`, `specs/prebid-tmp-proposal.md`, `docs/trusted-match/` +**Branch**: `bokelley/idmatch-design` + +This spec defines the buyer-internal data model and SDK surface that sit behind TMP's IdentityMatch operation. The IdentityMatch wire spec already exists; what is missing is a clear architecture for the audience layer, exposure layer, frequency-cap policy layer, cross-language SDK scope, and conformance testing strategy. The goal is to land all of this concretely enough that feedback can be on real artifacts rather than threads. + +## Problem + +The current TMP IdentityMatch wire spec (`static/schemas/source/tmp/identity-match-{request,response}.json`) defines what flows on the wire: identity tokens in, eligible package IDs and an HPKE-encrypted exposure token (`tmpx`) out. It does not define: + +1. **Buyer-side persistence model** — what records the buyer maintains in valkey to compute eligibility (audiences, exposures, fcap policy), and how those records are keyed. +2. **Frequency-cap semantics** — what dimensions a cap can apply to (campaign, advertiser, group, …) and how multi-identity users are handled. +3. **Cross-language SDK scope** — which SDKs need to read/write valkey, what shape the artifacts that cross language boundaries take, and how HPKE key management slots into existing AdCP key plumbing. +4. **Audience freshness** — how the audience and fcap state stay current relative to router-side response caching. +5. **Conformance** — how a third party validates that an IdentityMatch implementation is correct. + +Without these decisions, the open-source IdentityMatch reference impl risks shipping with Go-shaped assumptions baked into wire-adjacent surfaces. + +## Architectural decisions + +### 1. The wire spec stays minimal; the buyer-internal model is where the design lives + +The existing IdentityMatch request/response is the public protocol surface. Audience, exposure, and fcap-policy records are buyer-internal — defined by AdCP so that cross-language SDKs can interoperate against the same valkey, but **not on the wire**. Sellers, routers, and publishers never see fcap_keys, audience records, or exposure records. + +This keeps the privacy boundary clean (publishers do not learn buyer fcap policy) and lets the buyer-internal model evolve faster than the wire spec. + +**Two contracts, with the right tool for each:** + +- **Wire / RPC** (HTTP JSON request/response) → JSON Schema under `static/schemas/source/tmp/`. Already integrated with the repo's docs and codegen pipeline. +- **Buyer-internal valkey schema** (audience, exposure, package, fcap_policy records) → documented in this spec as Redis key patterns + primitive types (HASH / SET / ZSET) + field names within each. Cross-language interop is handled by Redis client libraries; we don't need our own serialization layer for these records. + +The valkey schema is not a binary blob format. JS impression-trackers and Go IdentityMatch services interoperate by agreeing on the **Redis-level operations** (`HINCRBY exposure:... count 1`, `SMEMBERS audience:...`), not by deserializing each other's bytes. That makes proto / JSON Schema / any custom serialization unnecessary at this layer. + +### 2. `fcap_keys[]` as a label model, not hierarchy + +A frequency cap is identified by a tag of the form `tenant:dimension:value` — e.g. `buyer-acme:campaign:42`, `buyer-acme:campaign_group:7`, `buyer-acme:advertiser:13`, `buyer-acme:creative:8`. Packages declare which `fcap_keys` they belong to; exposure records are written per-key; policies (window, max count) are attached per-key. + +``` +package 2342: fcap_keys ["buyer-acme:campaign:42", + "buyer-acme:campaign_group:7", + "buyer-acme:advertiser:13"] +policy "buyer-acme:campaign:42": {window_sec: 60, max_count: 5} +policy "buyer-acme:advertiser:13": {window_sec: 60, max_count: 20} +``` + +**Tenant prefix is required.** Multi-tenant buyer-agent fleets host multiple advertiser orgs; without a tenant prefix, `campaign:42` collides on shared valkey counters and tenant A learns B's reach by watching the counter move. SDKs MUST refuse keys without a tenant prefix. + +**Charset constraint.** Each segment matches `[a-zA-Z0-9_-]+` so the `:` delimiter is unambiguous. URL-bearing or otherwise colon-bearing values must be hashed or shortened before use. + +**Why labels not hierarchy**: dimensions are heterogeneous across customers (some want creative-level caps, some line-item, some flight, some advertiser-roll-up). A fixed schema either over-prescribes or under-serves. Labels also make cross-seller fcap automatic — any policy whose key is shared across sellers (e.g., `buyer-acme:advertiser:13`) enforces across all of them with no extra mode. + +**Cross-cutting policies are explicit**, not implied. A campaign that needs both a per-campaign and a per-advertiser cap declares both keys and gets two policy lookups at check time. There is no implicit roll-up. + +### 3. No required canonicalization of user identity + +The protocol does **not** dictate a canonical user ID. Customers will use multiple identity providers (RampID, ID5, MAID, UID2, publisher-issued tokens) in parallel — Scope3's identity graph is canonical *only for Scope3-hosted IdentityMatch instances*. Other operators run their own graph or none at all. + +Records are keyed by `(uid_type, user_token)`. A user with three resolved identities produces three records on each write, and reads merge across all the user's identities at check time: + +``` +exposure:buyer-acme:campaign:42:rampid:abc → {count: 3, …} +exposure:buyer-acme:campaign:42:id5:def → {count: 2, …} +exposure:buyer-acme:campaign:42:maid:ghi → {count: 1, …} +``` + +The `merge_rule` on `FcapPolicy` is **required** — there is no implicit default, because the right rule depends on whether the buyer canonicalizes upstream. The recommendation: + +- `MAX` for buyers running an identity graph that canonicalizes upstream. Matches how Xandr / DV360 / TTD model multi-identity fcap; avoids over-counting when one impression resolves to two tokens. +- `OR` (count distinct identities exposed) only for graphless operators where identity tokens are known not to alias. Over-counts when the same impression carries multiple identities. +- `SUM` is rarely correct (assumes identities never co-occur for the same person). + +Customers who **want** canonicalization run their identity graph behind their own door — pre-canonicalizing tokens before write/read on both the `sync_audiences` path and the TMPX-decrypt path, then setting `MERGE_RULE_MAX` since the merge becomes a no-op. The protocol does not know this happened. + +### 4. Replace `ttl_sec` with `serve_window_sec` — fix a semantic drift, not just a cap + +The existing `ttl_sec` field on `identity-match-response.json` was documented as "how long the router should cache this response" but operationally functioned as a per-package single-shot fcap — buyers expected each eligible package to be served at most once per user per window, and to be re-queried thereafter. Two distinct concerns (response caching vs frequency capping) sharing one knob means anyone tuning for cost (long cache) silently breaks fcap, and anyone tuning for fcap (short cache) pays unnecessary IdentityMatch load. + +Replacement: new field `serve_window_sec` with the corrected semantic — *after serving the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again.* Default 60, max 300. Anything longer than 300 makes the per-package cap too coarse for typical campaigns; anything shorter than the IdentityMatch round-trip wastes the throttle. + +`ttl_sec` is deprecated. During the deprecation window: +- Senders SHOULD populate `serve_window_sec` AND `ttl_sec` with the same value. +- Receivers SHOULD prefer `serve_window_sec` when both are present. +- A future 3.0.x release ≥ 6 weeks after this notice publishes drops `ttl_sec` from `required` and removes it from the schema. + +This is **not** a router response cache. Multi-impression frequency capping is a separate concern, handled by buyer-side `ExposureRecord` + `FcapPolicy` and updated out-of-band via TMPX impression callbacks regardless of `serve_window_sec`. Audience freshness is a third concern, handled by `sync_audiences` cadence — entirely independent of this window. + +### 5. Two write paths into valkey, both buyer-side + +| Path | Writer | Frequency | Records produced | +|---|---|---|---| +| `sync_audiences` | Buyer's audience pipeline (already specified in AdCP) | Continuous / batched | `(uid_type, user_token) → audience_ids[]` | +| Impression callback | Buyer's impression-tracking SDK (JS today, others later) | Per impression | `(fcap_key, uid_type, user_token) → exposure counter increment` | + +A third path — package & policy CRUD — is buyer-control-plane (Nastassia's writethrough), populating `package → fcap_keys[]` and `fcap_key → policy` records. Not in the impression hot path. + +## Wire spec changes + +Additive changes to one existing schema, deprecation of one field. Lands additively in 3.0.1; the deprecated field removal lands in a later 3.0.x release ≥ 6 weeks after this notice. + +### `identity-match-response.json`: add `serve_window_sec`, deprecate `ttl_sec` + +New field `serve_window_sec` (integer, 1-300, default 60). Existing `ttl_sec` field reframed in its description and marked deprecated; both fields coexist during the 6-week deprecation window. Senders populate both with the same value; receivers prefer `serve_window_sec`. + +The TMPX wire format itself is **unchanged** — already specified in `docs/trusted-match/specification.mdx:534-597` (16-byte header with version/timestamp/country/nonce/count plus typed identity entries) with replay defense via an 8-byte AEAD-protected nonce + master-side dedup. + +## Buyer-side valkey schema (normative) + +Four record types, each modeled directly on a Redis primitive. Cross-language interop is handled by Redis client libraries; agreement is at the operation level (`HINCRBY`, `SADD`, `SMEMBERS`), not at a serialization layer. + +**This is a convention, not a schema in the database-enforced sense.** Valkey / Redis does not validate writes against a schema definition — the contract documented here is enforced by the SDK on the write side and by the IdentityMatch reader on the read side. A buggy writer can still corrupt the store; the protocol relies on library discipline, not database constraints. SDK conformance tests are how that discipline is verified. + +### Audience record + +``` +type: SET (or ZSET if strength scores are used) +key: audience:{uid_type}:{user_token} +members: audience IDs the user belongs to +``` + +Optional companion HASH at `audience_meta:{uid_type}:{user_token}` for diagnostics: + +``` +type: HASH +fields: + updated_at: unix seconds last written + expires_at: unix seconds after which the SET MUST be ignored (0 = no deadline) + source: origin pipeline (typically "sync_audiences") +``` + +Written by the buyer's `sync_audiences` pipeline. Read at IdentityMatch eligibility time. Real-world `sync_audiences` cadences vary widely (prospecting segments are often hourly-to-daily batched; retargeting via streaming CDP can be near-real-time) — `expires_at` lets the pipeline publish freshness contracts directly on the record. + +If using ZSET, the score carries audience strength (0.0–1.0); IdentityMatch can apply a strength floor at eligibility time. + +### Exposure record + +``` +type: HASH +key: exposure:{fcap_key}:{uid_type}:{user_token} +fields: + count: uint, exposures inside the current policy window + first_seen: unix seconds (sliding-window policies) + last_seen: unix seconds, most recent exposure + window_start: unix seconds when the current fixed window opened (0 = sliding) +``` + +Incremented on TMPX decrypt with `HINCRBY exposure:... count 1` plus `HSET ... last_seen `. Atomic by Redis primitive; no serialization. Window semantics (sliding vs fixed) are policy-attached, not record-attached. + +### Package record + +``` +type: HASH +key: package:{seller_agent.agent_url}:{package_id} +fields: + media_buy_id: string (optional, for diagnostics) + active: "1" | "0" + updated_at: unix seconds +``` + +Companion SETs for the multi-valued lookups: + +``` +type: SET +key: package_fcap_keys:{seller_agent.agent_url}:{package_id} +members: e.g. "buyer-acme:campaign:42", "buyer-acme:advertiser:13" +``` + +``` +type: SET +key: package_audiences:{seller_agent.agent_url}:{package_id} +members: e.g. "seg_123", "seg_456" +``` + +Written by the buyer's package-CRUD writethrough. Set membership lets eligibility checks compute audience intersection via native `SINTER` rather than client-side iteration. + +### fcap policy record + +``` +type: HASH +key: fcap_policy:{fcap_key} +fields: + window_sec: uint + window_kind: "FIXED" | "SLIDING" + max_count: uint + merge_rule: "MAX" | "OR" | "SUM" (required, no implicit default — see § 3) + active: "1" | "0" + updated_at: unix seconds +``` + +Written by the buyer's policy-CRUD writethrough. + +## Eligibility flow (pseudocode) + +``` +function evaluate_eligibility(identities, candidate_package_ids): + audiences = union(read("audience:{t.uid_type}:{t.user_token}").audience_ids for t in identities) + + eligible = [] + for pkg_id in candidate_package_ids: + pkg = read("package:{pkg.seller_agent_url}:{pkg_id}") + + // Audience match + if pkg.audience_ids and not pkg.audience_ids.intersects(audiences): + continue + + // Frequency cap check across all fcap_keys on the package + capped = false + for fcap_key in pkg.fcap_keys: + policy = read("fcap_policy:{fcap_key}") + counts = [read("exposure:{fcap_key}:{t.uid_type}:{t.user_token}").count for t in identities] + merged = merge(counts, policy.merge_rule) + if merged >= policy.max_count: + capped = true + break + if capped: + continue + + eligible.append(pkg_id) + + return eligible +``` + +Per-impression valkey reads are bounded by `O(|identities| × |candidate_packages| × |fcap_keys_per_package|)` — typically `3 × 50 × 3 = 450` reads. Within reach of valkey pipelining at IdentityMatch latency budgets. + +## Cross-language SDK scope + +### HPKE encrypt/decrypt + +HPKE is a **net-new primitive** for AdCP SDKs. Existing AdCP key plumbing publishes Ed25519/ECDSA verification keys via JWKS for request and webhook **signing** — it does not distribute X25519 KEM public keys for **encryption**. The TMPX key model is documented in `docs/trusted-match/specification.mdx:579-587` and lives on `adagents.json` `agents[].encryption_keys` — distinct from the signing JWKS. + +Each SDK that adds HPKE needs: + +- X25519 KEM keypair generation and `kid` derivation. +- ChaCha20-Poly1305 AEAD with HKDF-SHA256 KDF, per the published TMPX cryptosuite (RFC 9180 `mode_base`). +- Decrypt-side `kid` lookup against `encryption_keys`, with rejection-and-metric on unknown `kid` after refetch. +- Per-master nonce dedup window (recommended 7 days, per the published spec) for replay defense; this is the existing TMPX defense and is the buyer's responsibility, not the protocol's. + +Where the existing plumbing helps: `kid` prefix conventions, the 5-minute JWKS-style cache TTL, and the rotation choreography (30-day grace for old master keys). These transfer cleanly. The cryptographic core does not. + +| SDK | Signing today | HPKE needed | Priority | +|---|---|---|---| +| `@adcp/client` (JS) | ✅ | encrypt + decrypt | First — unblocks impression tracker | +| `adcp-go` | ✅ | decrypt (server) | Reference IdentityMatch impl | +| `adcp` (Python) | partial | encrypt + decrypt | Follows JS | + +### Reference implementations + +| Component | Repo / path | Language | Role | +|---|---|---|---| +| IdentityMatch service | `adcp-go/identitymatch` | Go | Open-source reference; processes IdentityMatch requests, applies eligibility, emits TMPX | +| Impression tracker | `@adcp/client/identitymatch` | JS/TS | Decrypts TMPX, increments exposures in valkey | +| Package/policy CRUD | `@adcp/client/identitymatch` | JS/TS | Writethrough on buyer's package & policy mutations | + +### Why JS for the writers and Go for the reader + +The impression tracker runs in the buyer's existing impression-tracking infra, which is overwhelmingly JS today (Baiyu's existing tracker). Wrapping in Go adds a process boundary for no benefit — JS appends directly to valkey. Same for package/policy CRUD: Nastassia's control plane is JS already. + +The IdentityMatch service is hot-path request handling and benefits from Go's concurrency model and the Prebid Server integration story. It reads from the same valkey schemas the JS writers populate. + +## Storyboard conformance scenarios + +The model gives clean invariants that map to runnable AdCP storyboards: + +1. **Per-key cap trips**: 5 impressions on `buyer-acme:campaign:42` → user drops off any package mapped to that key within `serve_window_sec`. +2. **Multi-identity merge (MAX)**: 3 impressions on RampID and 2 on ID5 (same person, `MERGE_RULE_MAX`) → merged count is 3; 6th impression on either identity (now 4 max) is still under cap; 9th identity-aggregated impression trips a cap of 5. +3. **Audience drift**: `sync_audiences` removes user from segment → eligibility on packages requiring that segment drops within `sync_lag + serve_window_sec`. +4. **Cross-seller advertiser cap**: 10 impressions on Seller A across `buyer-acme:advertiser:13` → identical request to Seller B for a different package mapped to the same key returns ineligible. +5. **Serve-window throttle**: After `serve_window_sec` expires, the publisher re-queries Identity Match and gets a fresh response; no router-side stale-cache surface. + +These scenarios are the IdentityMatch conformance suite. Buyer SDK teams SHOULD implement them as integration tests now, even though the AdCP storyboard YAML is deferred (see implementation note). + +**Implementation note**: `supported_protocols` is a closed enum today (`media_buy`, `signals`, `governance`, `sponsored_intelligence`, `creative`, `brand`) and the compliance runner discovers test paths from it. TMP is declared via `experimental_features` (`trusted_match.core`), not `supported_protocols`, so `static/compliance/source/protocols/trusted-match/` is not yet a valid runner path. Storyboard YAML lands when TMP graduates from experimental status (targeted 3.1.0 per the 3.0.0 changelog) and `trusted_match` enters the `supported_protocols` enum. Until then the five scenarios above serve as the contract for buyer SDK / reference-impl unit and integration tests. + +## Release plan + +| Change | Type | Vehicle | Notes | +|---|---|---|---| +| Buyer-side valkey schema spec | Additive (doc only) | 3.0.1 | This document. Records are Redis primitives; no new artifact type needed. TMPX plaintext format already specified in `docs/trusted-match/specification.mdx`. | +| Add `serve_window_sec` to `identity-match-response.json` | Additive | 3.0.1 | New field; default 60, max 300 | +| Deprecate `ttl_sec` on `identity-match-response.json` | Deprecation notice | 3.0.1 (notice) → 3.0.x ≥ 6 weeks after | Per experimental contract; field removed in a later 3.0.x | +| HPKE encrypt/decrypt in `@adcp/client` (JS) | SDK | Out of band of AdCP release | `@adcp/client` versioning | +| `adcp-go/identitymatch` reference impl | New repo/module | Out of band | Tracks AdCP versions | +| Storyboard scenarios (YAML) | New scenarios | Deferred to TMP graduation (targeted 3.1.0) | Buyer SDKs implement as integration tests now | + +## Open questions + +1. **Window semantics.** Sliding window vs fixed window vs exponential decay. Sliding is most common in DSPs but heavier on storage (need impression timestamps, not just counts). Default proposal: fixed window aligned to `window_sec` boundary, with `last_seen` recorded for diagnostics. +2. **Audience-record TTL inside valkey.** `sync_audiences` writes are continuous. How long do stale audience records linger? Proposal: `expires_at` field on the audience-meta HASH; SDK ignores SET members whose meta-hash has expired. +3. **Cap on policies per fcap_key.** Should multiple policies stack on one key (e.g., per-day AND per-hour), or one policy per key? Proposal: one policy per key for v1; stacking is implementable as multiple keys. +4. **Identity-graph plug-point.** For operators that *do* canonicalize, where does the graph hook in? Proposal: SDK exposes pre-write and pre-read interceptors (`(uid_type, user_token) → (uid_type', user_token')`) that customers wire to their graph. Default: identity passthrough. +5. **`FrequencyStore` interface for DSP coexistence.** Buyers with existing fcap stores (Aerospike/Redis/proprietary) won't migrate to valkey. SDK should expose a `FrequencyStore` interface; valkey is the reference implementation, customers plug their own. Symmetric to the canonicalization plug-point above. +6. **OpenRTB cross-walk.** OpenRTB 2.6 `User.eids[]` matches our `identities[]` shape; should the spec note the mapping for buyer-side codebases that bridge between protocols? +7. **Audience strength scores.** ZSET allows audiences to carry a strength/score; eligibility can apply a floor at check time. v1 ships SET; ZSET migration is a buyer-internal choice that doesn't affect the protocol. + +## Deferred security & privacy issues (follow-up) + +These came out of pre-merge review and are real concerns that the current design does not address. Each warrants a focused follow-up rather than a polish pass on this spec: + +1. **TMPX harvest → competitor-suppression attack.** TMPX rendered into publisher creative URLs is harvestable. With no per-impression binding (creative_id, slot_id, ts) inside the AEAD AAD, an attacker fires harvested tokens against the buyer's impression endpoint to inflate fcap counts and starve a target user out of a campaign. Mitigation needs binding to per-impression context, sender-binding, or rate-limit-per-token at the impression handler. Out of scope for this PR; tracked as a TMPX security follow-up. +2. **Eligibility-as-audience-membership oracle.** A malicious publisher submits honeypot `package_ids` and observes which return eligible to reconstruct the user's audience profile. The "publishers don't see audience records" privacy claim is wire-correct but functionally false. Mitigations: package-ownership check at IdentityMatch ingress, or k-anonymity floor on returned eligibility. Out of scope; tracked as a privacy follow-up. +3. **Consent revocation between IdentityMatch and impression.** TMPX has no consent fingerprint; if consent is revoked during the cache window, the impression handler still writes an exposure record. GDPR/TCF problem. Either include a consent fingerprint in TMPX plaintext (requires extending the published format) or document that fcap writes survive revocation as non-personal aggregates (legally tenuous). Tracked as a privacy follow-up. +4. **Side-channel via eligibility deltas.** A router observing two IdentityMatch responses for the same user 30s apart sees `eligible_package_ids` shrink as caps trip, fingerprinting fcap state per-user. The existing caching contract (fixed-response-for-window) limits this. Tracked as a privacy follow-up. +5. **`hashed_email` in TMPX widens the identity-leak surface.** Putting unsalted SHA-256 email inside a creative URL macro re-identifies on token leak. Either prohibit `hashed_email` in the plaintext or require salting. Tracked as a TMPX security follow-up. +6. **DoS amplification via `package_ids[]` size.** Per-IdentityMatch valkey reads scale `O(|identities| × |candidate_packages| × |fcap_keys_per_package|)` — at 25k packages from a busy publisher this becomes an amplification primitive. Cap candidate_packages at IdentityMatch ingress. Tracked as an operational follow-up. +7. **§13 work plan ownership gaps.** No named owner for the eligibility-evaluator hot path, observability/SLO, key-rotation drill, or load testing. Address before SDK ships. + +## Boiled-down work plan + +(Replaces the original Slack breakdown.) + +1. **Spec changes (this doc → PRs against AdCP)** + - Add `static/proto/tmp/v1/{exposure_record,audience_record,package_record,fcap_policy}.proto` plus shared `uid_type.proto` + - Add `serve_window_sec` to `identity-match-response.json` and deprecate `ttl_sec` (lands additively in 3.0.1; field removal in a 3.0.x ≥ 6 weeks out) + - Storyboard YAML under `static/compliance/source/protocols/trusted-match/` — deferred until TMP enters `supported_protocols`. Buyer SDKs implement the five scenarios as integration tests now. +2. **JS SDK (`@adcp/client`, JS team)** + - HPKE encrypt + decrypt (net-new primitive — see § HPKE) + - Impression-tracking writer (decrypts TMPX per the published binary format, increments exposures) + - Package/policy CRUD writethrough client + - `FrequencyStore` interface (valkey reference impl + plug-point) +3. **Go reference impl (`adcp-go/identitymatch`)** + - HPKE decrypt + - Eligibility evaluator against the buyer-side data model + - Conformance harness running storyboard scenarios as integration tests +4. **Prebid wiring** + - TMP router → IdentityMatch service connection + - Already scoped in `specs/prebid-tmp-proposal.md` + +## Conformance scenario walkthroughs + +Each of the five scenarios in § Storyboard conformance maps to a runnable sequence of wire calls and buyer-internal operations against a live valkey. These are the integration-test contracts buyer SDKs implement today; they become storyboard YAMLs once TMP enters `supported_protocols` and the test-controller scenarios below exist. + +All walkthroughs assume: +- `serve_window_sec = 60` on every IdentityMatch response (default) +- Identity Match service is the **buyer agent**; caller is a publisher / router (or test runner standing in for one) +- "Buyer-internal step" is a step the SDK harness executes against valkey directly, NOT a wire call. These map to `comply_test_controller` scenarios that need to be added (see § Conformance harness scope). +- `tenant = "buyer-acme"`, `package = "pkg-42"`, `seller_agent_url = "https://seller-a.example"` throughout. + +### Scenario 1 — per-key cap trips after N exposures + +**Setup (buyer-internal):** +``` +SADD package_fcap_keys:https://seller-a.example:pkg-42 buyer-acme:campaign:42 +HSET fcap_policy:buyer-acme:campaign:42 window_sec 86400 window_kind FIXED \ + max_count 5 merge_rule MAX active 1 +HSET package:https://seller-a.example:pkg-42 active 1 +SADD package_audiences:https://seller-a.example:pkg-42 seg_test_users +SADD audience:rampid:abc seg_test_users +``` + +**Step 1** — wire call: `identity_match_request {identities: [{rampid, abc}], package_ids: [pkg-42]}` → expect `eligible_package_ids: [pkg-42]`, `serve_window_sec: 60`, `tmpx: `. + +**Step 2** — buyer-internal, repeat 5×: decrypt TMPX from response, then for each (uid_type, user_token) inside: +``` +HINCRBY exposure:buyer-acme:campaign:42:rampid:abc count 1 +HSET exposure:buyer-acme:campaign:42:rampid:abc last_seen +``` +After 5 iterations: `HGET exposure:buyer-acme:campaign:42:rampid:abc count` returns `5`. + +**Step 3** — wire call: same `identity_match_request` → expect `eligible_package_ids: []` (cap tripped, package dropped). + +### Scenario 2 — multi-identity merge (MAX rule) + +**Setup:** same as Scenario 1, plus the user has two resolved identities (rampid `abc` and id5 `def`). + +**Step 1** — buyer-internal, simulate prior exposures across identities: +``` +HSET exposure:buyer-acme:campaign:42:rampid:abc count 3 +HSET exposure:buyer-acme:campaign:42:id5:def count 2 +``` + +**Step 2** — wire call: `identity_match_request {identities: [{rampid, abc}, {id5, def}], package_ids: [pkg-42]}`. + +Eligibility check inside the buyer agent reads both records and applies `MERGE_RULE_MAX`: +``` +counts = [HGET exposure:...rampid:abc count, HGET exposure:...id5:def count] + = [3, 2] +merged = MAX(3, 2) = 3 +``` +3 < max_count of 5 → `eligible_package_ids: [pkg-42]`. + +**Step 3** — buyer-internal, simulate 2 more impressions on rampid: +``` +HINCRBY exposure:buyer-acme:campaign:42:rampid:abc count 2 → count = 5 +``` + +**Step 4** — wire call: same request → `MAX(5, 2) = 5 ≥ max_count` → `eligible_package_ids: []`. + +If the policy were `MERGE_RULE_OR` (count distinct identities exposed), step 2 would have merged to `count_nonzero(3,2) = 2`, and step 4 to `2`. OR-merge would not trip until five distinct identities had been exposed — the over-counting concern. + +### Scenario 3 — audience drift via sync_audiences + +**Setup:** as Scenario 1, with the user initially in `seg_test_users`. + +**Step 1** — wire call: `identity_match_request` → `eligible_package_ids: [pkg-42]`. + +**Step 2** — buyer-internal, simulate `sync_audiences` removing the user from the segment: +``` +SREM audience:rampid:abc seg_test_users +HSET audience_meta:rampid:abc updated_at +``` + +**Step 3** — wait `serve_window_sec` seconds (60) so the publisher re-queries. + +**Step 4** — wire call: same `identity_match_request`. Buyer agent computes audience intersection: +``` +user_audiences = SMEMBERS audience:rampid:abc → [] +package_audiences = SMEMBERS package_audiences:...:pkg-42 → [seg_test_users] +intersection = ∅ → package dropped +``` +Expect `eligible_package_ids: []`. + +### Scenario 4 — cross-seller advertiser cap + +**Setup:** two packages on different sellers, both mapped to the same `advertiser:13` cap: +``` +SADD package_fcap_keys:https://seller-a.example:pkg-A buyer-acme:advertiser:13 +SADD package_fcap_keys:https://seller-b.example:pkg-B buyer-acme:advertiser:13 +HSET fcap_policy:buyer-acme:advertiser:13 window_sec 86400 max_count 10 \ + merge_rule MAX active 1 +``` + +**Step 1** — wire call to buyer agent (request from Seller A): `package_ids: [pkg-A]` → eligible. + +**Step 2** — buyer-internal, simulate 10 impressions on Seller A's package: +``` +HSET exposure:buyer-acme:advertiser:13:rampid:abc count 10 +``` + +**Step 3** — wire call (request from Seller B): `package_ids: [pkg-B]`. Buyer agent reads `exposure:buyer-acme:advertiser:13:rampid:abc.count = 10 ≥ max_count` → `eligible_package_ids: []`. + +The advertiser-level cap enforces across sellers because the `fcap_key` is shared. No cross-seller coordination needed; the buyer agent is the single source of truth. + +### Scenario 5 — serve_window throttle + +**Setup:** as Scenario 1, with audiences and policy in place. + +**Step 1** — wire call at `t=0`: `identity_match_request` → `eligible_package_ids: [pkg-42]`, `serve_window_sec: 60`. + +**Step 2** — publisher serves one impression on pkg-42 within the 60s window. + +**Step 3** — at `t=30s`, publisher receives another ad opportunity for the same user. Per `serve_window_sec` semantic, the publisher MUST NOT re-serve pkg-42 from the cached eligibility — pkg-42 is exhausted in this window. + +**Step 4** — at `t=61s`, publisher re-queries: `identity_match_request` → fresh eligibility computed from current valkey state. No router-side stale cache; the only "cache" is the publisher's commitment to honor the serve_window. + +This is the semantic the wire field encodes. The buyer agent does not need to track per-publisher window state; it just answers freshly when re-queried. + +## Conformance harness scope + +To run these scenarios automatically through the AdCP compliance runner once TMP enters `supported_protocols`, three pieces are needed: + +1. **`comply_test_controller` scenarios for buyer-internal steps.** The runner can already simulate AdCP tasks; it cannot today simulate impression callbacks or audience syncs. New scenarios: + - `simulate_impression_callback`: takes `tmpx`, `fcap_keys[]`, `count` — applies `HINCRBY` against the buyer's valkey + - `simulate_audience_membership`: takes `(uid_type, user_token, audience_ids[])` — writes `audience:` SETs + - `simulate_package_record`: takes a full PackageRecord shape — writes the package + companion SETs + - `simulate_fcap_policy`: takes a full FcapPolicy shape — writes the policy HASH + - `inspect_exposure`: returns the current exposure count for a `(fcap_key, uid_type, user_token)` triple — for assertion +2. **Storyboard YAMLs at `static/compliance/source/protocols/trusted-match/scenarios/`** — five files mapping the scenarios above to runner-executable phases. Each phase alternates wire calls (`identity_match`) with `comply_test_controller` calls. +3. **TMP enters `supported_protocols`** so the runner discovers the protocol path. Currently TMP is in `experimental_features` (`trusted_match.core`); graduation is targeted for 3.1.0. + +Buyer SDK teams implementing IdentityMatch SHOULD wire these scenarios as integration tests against a real valkey *now*, using the walkthroughs above as the contract. The work to formalize them as storyboard YAMLs lands when the test-controller scenarios are designed (separate PR, target 3.1.0). + +## OpenRTB cross-walk + +The `identities[]` shape on `identity-match-request.json` maps to OpenRTB 2.6 `User.eids[]` for buyer-side codebases that bridge protocols. Mapping: + +| AdCP TMP `identities[].uid_type` | OpenRTB 2.6 `User.eids[].source` | +|---|---| +| `rampid` / `rampid_derived` | `liveramp.com` (`atype: 1` for maintained, `atype: 3` for derived) | +| `id5` | `id5-sync.com` | +| `uid2` | `uidapi.com` (`atype: 3`) | +| `euid` | `euid.eu` | +| `pairid` | `iabtechlab.com/pair` | +| `maid` | `adid` (Android) / `idfa` (iOS) on `Device.ifa` instead of `User.eids` — atypically carried | +| `hashed_email` | `liveintent.com` or buyer-specific (`atype: 3`) | +| `publisher_first_party` | publisher-defined `source` URL | +| `other` | buyer-defined `source` URL | + +The TMP `user_token` field corresponds to `User.eids[].uids[].id`. AdCP carries up to 3 identities (HPKE size budget); OpenRTB has no such limit, so a buyer bridging from OpenRTB into TMP must apply the buyer-configured priority order to truncate. + +## Next steps & rollout plan + +This PR is the architecture-decision foundation. The wire-spec delta is intentionally minimal (one additive field, one deprecation) so that review is focused on architecture rather than schema breadth. Six follow-up workstreams take this from spec to deployable infrastructure. They run partially in parallel; ordering reflects dependency, not time. + +### 1. Doc promotion: `specs/` → `docs/trusted-match/` (target: ~1 week after this lands) + +Selected sections of this spec move to authoritative protocol docs: + +| Content | Destination | +|---|---| +| `serve_window_sec` semantic + `ttl_sec` deprecation | `docs/trusted-match/specification.mdx` (already on the wire) | +| `fcap_keys` label model + tenant-prefix + charset | new `docs/trusted-match/buyer-fcap-implementation.mdx` | +| Valkey schema (Redis primitives, key patterns, field names) | same new buyer-fcap page | +| `merge_rule` semantics + per-mode recommendations | same | +| Redis-command walkthroughs for the 5 conformance scenarios | same | +| OpenRTB `User.eids` cross-walk | folded into existing `docs/trusted-match/migration-from-axe.mdx` or sibling page | +| Architecture rationale, thread resolutions, deferred follow-ups | **stays** in this spec doc | + +The split: authoritative implementation guidance moves to `docs/`; design history stays in `specs/`. SDK teams build against `docs/`. + +### 2. JS SDK: `@adcp/client` V6 (tracked: adcp-client#1005) + +New namespace `client.identityMatch.*` with five methods that constitute the buyer-side management plane and the test harness driver: + +``` +client.identityMatch.upsertAudience(audience_id, members, opts) // wraps sync_audiences add/remove deltas +client.identityMatch.upsertPackage(seller_agent_url, package_id, fcap_keys, audience_ids, opts) +client.identityMatch.upsertFcapPolicy(fcap_key, {window_sec, window_kind, max_count, merge_rule}) +client.identityMatch.recordImpression(tmpx, opts) // decodes TMPX, HINCRBY exposure +client.identityMatch.inspectExposure(fcap_key, uid_type, user_token) // test-only assertion helper +``` + +Plus HPKE encrypt/decrypt as net-new SDK primitives (X25519 KEM, ChaCha20-Poly1305, HKDF-SHA256 per RFC 9180 `mode_base`). The encrypt path is needed by the buyer agent emitting TMPX; decrypt by the impression handler. + +### 3. Reference TMP server: `adcp-go/identitymatch` + +A read-only TMP provider implementing `POST /identity` (and `POST /context` if scope expands). Reads the valkey schema this PR documents; serves `eligible_package_ids` + TMPX. Cites the buyer-fcap docs page once doc promotion lands. + +No new endpoints — TMP stays a downstream read replica. Deploy this binary, point publishers/routers at it, populate state via the SDK. + +### 4. Training agent integration + +The training agent hosts both surfaces: its existing AdCP MCP/A2A endpoint (handles `sync_audiences`, `create_media_buy`, etc.) AND a TMP `/identity` endpoint sharing the same valkey. End-to-end IdentityMatch demo lives here. Becomes both the learning environment and the integration test for the SDK + reference impl. + +### 5. Conformance harness + +The harness is a runner script that uses the SDK to seed state and assert behavior, plus calls the TMP server's `/identity` endpoint to validate eligibility responses. Lives as integration tests inside `@adcp/client` and `adcp-go`. The five scenarios in § Storyboard conformance map directly onto runnable test cases. No new protocol surface required. + +### 6. TMP graduation (target: 3.1.0) + +When TMP enters `supported_protocols` (currently in `experimental_features` as `trusted_match.core`), AdCP storyboards can wrap the SDK-driven harness if cross-protocol integration testing becomes useful. Until graduation, the SDK + reference impl harness IS the conformance suite. + +### Tracked deferred follow-ups + +These are real concerns from pre-merge review that this PR explicitly does NOT address. Each warrants a focused follow-up issue once the architecture lands: + +- **TMPX harvest → competitor-suppression attack** (security): need per-impression binding (creative_id, slot_id, ts) inside the AEAD AAD, or rate-limit-per-token at impression handler. +- **Eligibility-as-audience-membership oracle** (privacy): need k-anonymity floor or package-ownership check at IdentityMatch ingress. +- **Consent revocation between IdentityMatch and impression** (privacy/legal): need consent fingerprint in TMPX plaintext OR documented "fcap writes survive revocation" stance. +- **Side-channel via eligibility deltas** (privacy): router observation of changing eligibility leaks fcap state. +- **`hashed_email` in TMPX widens leak surface** (security): prohibit unsalted `hashed_email` in plaintext or require salting. +- **DoS amplification via large `package_ids[]`** (operational): cap candidate_packages at IdentityMatch ingress. +- **Where do fcap policies live?** Open: SDK-only (current proposal), wire field on `create_media_buy`, or new wire task. Decide before SDK ships. +- **Identity-graph plug-point interface** for buyers running their own canonicalization: SDK pre-write/pre-read interceptors. Decide before SDK ships. + +## Threads consolidated from Slack 2026-04-26 + +- Thread 1 (exposure struct location): resolved by § "Buyer-side valkey schema." Cross-language interop is at the Redis-operation level (`HINCRBY`, `SADD`), not via a binary serialization layer; no proto / JSON Schema / custom format needed. The TMPX wire format itself stays as published in `docs/trusted-match/specification.mdx`. +- Thread 2 (campaign isn't AdCP): resolved by § fcap_keys[] label model. No fixed dimensions; customers choose. Tenant prefix required. Seller agent + package_id remains the seller-side identifier per `core/seller-agent-ref.json`. +- Thread 3 (campaign logic in IdentityMatch): resolved by § Eligibility flow. +- Thread 4 (campaign sync via Cerberus): resolved by § Two write paths. Direct CRUD writethrough; no Cerberus. diff --git a/static/schemas/source/index.json b/static/schemas/source/index.json index 646acbb773..778848cdaf 100644 --- a/static/schemas/source/index.json +++ b/static/schemas/source/index.json @@ -1555,7 +1555,8 @@ "description": "Per-package eligibility — boolean eligible plus optional intent score" } } - } + }, + "buyer-internal-valkey-schema": "Buyer-internal records (audience, exposure, package, fcap_policy) are documented in specs/identitymatch-fcap-architecture.md as a valkey schema (Redis key patterns + primitive types). Not a wire artifact and not on the JSON Schema registry." }, "brand-protocol": { "description": "Brand protocol for identity retrieval, rights discovery, acquisition, and lifecycle management", diff --git a/static/schemas/source/tmp/identity-match-response.json b/static/schemas/source/tmp/identity-match-response.json index 39e83c6946..eb1825d7c1 100644 --- a/static/schemas/source/tmp/identity-match-response.json +++ b/static/schemas/source/tmp/identity-match-response.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema#", "$id": "/schemas/tmp/identity-match-response.json", "title": "Identity Match Response", - "description": "Response indicating which packages the user is eligible for. The ttl_sec field defines a caching contract: the router caches this response and returns cached eligibility without re-querying the buyer during the TTL window. Extension fields (ext, context) are intentionally omitted to prevent data leakage across the identity privacy boundary.", + "description": "Response indicating which packages the user is eligible for. The serve_window_sec field defines a per-package single-shot fcap: after serving the user one impression on each eligible package, the publisher MUST re-query Identity Match before serving from those packages again. Extension fields (ext, context) are intentionally omitted to prevent data leakage across the identity privacy boundary.", "x-status": "experimental", "type": "object", "properties": { @@ -22,9 +22,16 @@ "type": "string" } }, + "serve_window_sec": { + "type": "integer", + "description": "Per-package single-shot fcap window, in seconds. After serving the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again. This is NOT a router response cache TTL — it is a buyer-asserted serve throttle. Multi-impression frequency caps are handled separately by buyer-side exposure records and policies, updated out-of-band via TMPX impression callbacks. Default 60. Maximum 300 — longer windows reduce IdentityMatch load but coarsen fcap granularity below what most campaigns require.", + "minimum": 1, + "maximum": 300, + "default": 60 + }, "ttl_sec": { "type": "integer", - "description": "How long the router should cache this response, in seconds. The router returns cached eligibility without re-querying the buyer during this window. A value of 0 means do not cache.", + "description": "DEPRECATED — use serve_window_sec. Originally documented as a router response cache TTL but operationally functioned as a per-package serve throttle, conflating two distinct concerns (response caching vs frequency capping). Senders during the deprecation window SHOULD populate ttl_sec and serve_window_sec with the same value; receivers SHOULD prefer serve_window_sec when both are present. Removed in a future 3.0.x release per the experimental-status contract — see CHANGELOG and specs/identitymatch-fcap-architecture.md.", "minimum": 0, "maximum": 86400 }, From 81cc744ce7f3b5c3864a617c1169cd1d2b5d4e11 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Mon, 27 Apr 2026 13:32:11 -0400 Subject: [PATCH 03/12] spec(tmp): clarify normative vs reference layering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses Oleksandr's feedback on PR #3359: the spec called the buyer-side valkey schema "normative" while also leaving an open question for a pluggable FrequencyStore interface. Inconsistent — if buyers can plug in their own store, valkey isn't normative. Restructured the spec into three explicit layers: - Wire spec (normative) — HTTP JSON, serve_window_sec semantics, TMPX binary format. Anything crossing an agent boundary. - Conformance invariants (normative) — backend-agnostic eligibility logic. Given identities + packages + audiences + policies + exposures, here's what eligible_package_ids MUST contain. Storage choice is implementation. - Reference data model (non-normative) — Scope3's valkey-backed layout. A recipe for organizing the data the invariants reference. Other buyers may use Aerospike, DynamoDB, PostgreSQL, anything. Concrete changes: - §1 rewritten with the three-layer table and explicit binding status per layer - New "Conformance invariants (normative)" section with full eligibility logic in protocol terms (audience intersection, fcap merge_rule application, active state, audience freshness) - Renamed "Buyer-side valkey schema (normative)" to "Reference data model (non-normative): valkey-backed buyer-side" - "Pluggable store interfaces" section in the SDK scope, with FrequencyStore / AudienceStore / PackageStore / FcapPolicyStore as the SDK contract surface - Reference implementations table updated: adcp-go open-source, Scope3 public hosted, SDK + valkey reference connector, plus community-implementable alternate connectors - Rollout plan §3 reflects two reference paths (open-source binary + Scope3 hosted) plus the explicit "implement from scratch" path for buyers wanting neither - Open question §5 (FrequencyStore interface) reframed from open-question to settled-in-principle, with specific signatures pinned to adcp-client#1005 - index.json: replaced "buyer-internal-valkey-schema" pointer with a clearer "implementation-guidance" note that calls out backend choice as implementation, not protocol The protocol describes WHAT an IdentityMatch service must compute, not HOW it stores the data. Co-Authored-By: Claude Opus 4.7 (1M context) --- specs/identitymatch-fcap-architecture.md | 88 ++++++++++++++++++------ static/schemas/source/index.json | 2 +- 2 files changed, 67 insertions(+), 23 deletions(-) diff --git a/specs/identitymatch-fcap-architecture.md b/specs/identitymatch-fcap-architecture.md index 21eda0d567..a9408881c5 100644 --- a/specs/identitymatch-fcap-architecture.md +++ b/specs/identitymatch-fcap-architecture.md @@ -21,18 +21,19 @@ Without these decisions, the open-source IdentityMatch reference impl risks ship ## Architectural decisions -### 1. The wire spec stays minimal; the buyer-internal model is where the design lives +### 1. Three layers, with explicit normative status -The existing IdentityMatch request/response is the public protocol surface. Audience, exposure, and fcap-policy records are buyer-internal — defined by AdCP so that cross-language SDKs can interoperate against the same valkey, but **not on the wire**. Sellers, routers, and publishers never see fcap_keys, audience records, or exposure records. +This spec is layered. Each layer has a different binding strength: -This keeps the privacy boundary clean (publishers do not learn buyer fcap policy) and lets the buyer-internal model evolve faster than the wire spec. +| Layer | Status | What it covers | +|---|---|---| +| **Wire spec** | Normative | The HTTP JSON request/response on `POST /identity`, the `serve_window_sec` semantic, the TMPX binary format. Anything that crosses an agent boundary. | +| **Conformance invariants** | Normative | The eligibility logic an IdentityMatch service MUST compute, expressed in terms of inputs (identities, packages, audiences, policies, exposures) and outputs (eligible_package_ids), independent of how the service stores its data. | +| **Reference data model** | Non-normative | Scope3's valkey-backed implementation choice — Redis key patterns, primitive types, field names. A buyer running Aerospike, DynamoDB, PostgreSQL, or anything else is conformant if the service satisfies the invariants. | -**Two contracts, with the right tool for each:** +A correctness-equivalent IdentityMatch service can use any backing store. The protocol describes **what** the service must compute, not **how** it stores the data. -- **Wire / RPC** (HTTP JSON request/response) → JSON Schema under `static/schemas/source/tmp/`. Already integrated with the repo's docs and codegen pipeline. -- **Buyer-internal valkey schema** (audience, exposure, package, fcap_policy records) → documented in this spec as Redis key patterns + primitive types (HASH / SET / ZSET) + field names within each. Cross-language interop is handled by Redis client libraries; we don't need our own serialization layer for these records. - -The valkey schema is not a binary blob format. JS impression-trackers and Go IdentityMatch services interoperate by agreeing on the **Redis-level operations** (`HINCRBY exposure:... count 1`, `SMEMBERS audience:...`), not by deserializing each other's bytes. That makes proto / JSON Schema / any custom serialization unnecessary at this layer. +The privacy boundary stays clean across all three layers: publishers and routers never see audience records, exposures, or fcap_keys regardless of backend choice. ### 2. `fcap_keys[]` as a label model, not hierarchy @@ -106,11 +107,37 @@ New field `serve_window_sec` (integer, 1-300, default 60). Existing `ttl_sec` fi The TMPX wire format itself is **unchanged** — already specified in `docs/trusted-match/specification.mdx:534-597` (16-byte header with version/timestamp/country/nonce/count plus typed identity entries) with replay defense via an 8-byte AEAD-protected nonce + master-side dedup. -## Buyer-side valkey schema (normative) +## Conformance invariants (normative) + +Backend-agnostic. A conformant IdentityMatch service MUST compute `eligible_package_ids` such that, for each `package_id ∈ request.package_ids`, the package is included in `eligible_package_ids` if and only if **both** of the following hold: + +**1. Audience eligibility.** Either the package has no audience requirement, OR there exists at least one audience identifier `a` such that: + - `a` is in the package's required audience set, AND + - `a` is in the audience-membership set of at least one identity `i ∈ request.identities` (i.e., the union of audience memberships across the user's resolved identities intersects the package's required audiences). + +**2. Frequency cap eligibility.** For every fcap_key `k` declared on the package, the merged exposure count for `request.identities` against `k` is strictly less than the policy's `max_count`. Specifically: + - Read each `(k, i.uid_type, i.user_token).count` for each `i ∈ request.identities` within the policy's window. + - Apply the policy's `merge_rule`: + - **MAX**: merged = max of all per-identity counts. + - **OR**: merged = count of identities with count > 0. + - **SUM**: merged = sum of all per-identity counts. + - If merged ≥ max_count for ANY of the package's fcap_keys, the package is ineligible. + +**3. Active state.** Packages and policies marked `active: false` are treated as if they were not present. + +**4. Audience-record freshness.** If the audience pipeline publishes an `expires_at` and the current time is past that timestamp, the audience-membership entry MUST NOT contribute to the union in (1). + +The TMPX returned with the response must encode the resolved identities so that an out-of-band impression handler can update exposures atomically — see the published TMPX format at `docs/trusted-match/specification.mdx:534-597`. + +Storage choice (valkey, Aerospike, DynamoDB, in-memory, anything) is implementation. Two services with different storage backends that satisfy these invariants for the same inputs MUST return the same eligibility output. + +## Reference data model (non-normative): valkey-backed buyer-side -Four record types, each modeled directly on a Redis primitive. Cross-language interop is handled by Redis client libraries; agreement is at the operation level (`HINCRBY`, `SADD`, `SMEMBERS`), not at a serialization layer. +This is **Scope3's reference implementation choice** — a recipe for organizing the data the conformance invariants reference, using Redis primitives. Other buyers may use entirely different backends; the protocol does not mandate this layout. -**This is a convention, not a schema in the database-enforced sense.** Valkey / Redis does not validate writes against a schema definition — the contract documented here is enforced by the SDK on the write side and by the IdentityMatch reader on the read side. A buggy writer can still corrupt the store; the protocol relies on library discipline, not database constraints. SDK conformance tests are how that discipline is verified. +Four record types, each modeled directly on a Redis primitive. Cross-language interop within this reference impl is handled by Redis client libraries; agreement is at the operation level (`HINCRBY`, `SADD`, `SMEMBERS`), not at a serialization layer. + +Valkey / Redis does not validate writes against a schema definition. The contract documented here is enforced by the SDK on the write side and by the IdentityMatch reader on the read side. A buggy writer can corrupt the store; library discipline (not database constraints) is what makes this work. SDK integration tests verify the contract. ### Audience record @@ -245,19 +272,33 @@ Where the existing plumbing helps: `kid` prefix conventions, the 5-minute JWKS-s | `adcp-go` | ✅ | decrypt (server) | Reference IdentityMatch impl | | `adcp` (Python) | partial | encrypt + decrypt | Follows JS | +### Pluggable store interfaces + +The SDK exposes store interfaces — `FrequencyStore`, `AudienceStore`, `PackageStore`, `FcapPolicyStore` — that an IdentityMatch service implementation calls to satisfy the conformance invariants. Buyers running their own backend (Aerospike, DynamoDB, proprietary KV) implement these interfaces against their store; the SDK ships a reference valkey-backed connector. The interfaces, not the storage layout, are what the SDK contracts on. + +``` +interface FrequencyStore { + increment(fcap_key, uid_type, user_token, by) -> count + read(fcap_key, uid_type, user_token) -> { count, first_seen, last_seen, window_start } + reset_window(fcap_key, uid_type, user_token, new_window_start) +} +// Equivalent shapes for AudienceStore, PackageStore, FcapPolicyStore. +``` + +Specific interface signatures are an SDK-design concern, tracked under `adcp-client#1005`. The point at protocol level: the SDK is store-agnostic by design. + ### Reference implementations | Component | Repo / path | Language | Role | |---|---|---|---| -| IdentityMatch service | `adcp-go/identitymatch` | Go | Open-source reference; processes IdentityMatch requests, applies eligibility, emits TMPX | -| Impression tracker | `@adcp/client/identitymatch` | JS/TS | Decrypts TMPX, increments exposures in valkey | -| Package/policy CRUD | `@adcp/client/identitymatch` | JS/TS | Writethrough on buyer's package & policy mutations | +| IdentityMatch service | `adcp-go/identitymatch` | Go | Open-source reference reader for `POST /identity` | +| Scope3 hosted IdentityMatch | (Scope3 infra) | — | Public deployment for buyers who don't want to host their own | +| SDK + valkey reference connector | `@adcp/client/identitymatch` | JS/TS | Default store implementation behind the SDK interfaces | +| SDK + Aerospike/Dynamo/etc. connectors | community / buyer-implemented | any | Optional alternate stores satisfying the same interfaces | -### Why JS for the writers and Go for the reader +### Why JS for the management plane and Go for the reader -The impression tracker runs in the buyer's existing impression-tracking infra, which is overwhelmingly JS today (Baiyu's existing tracker). Wrapping in Go adds a process boundary for no benefit — JS appends directly to valkey. Same for package/policy CRUD: Nastassia's control plane is JS already. - -The IdentityMatch service is hot-path request handling and benefits from Go's concurrency model and the Prebid Server integration story. It reads from the same valkey schemas the JS writers populate. +The impression tracker and CRUD writethrough run in the buyer's existing infra, which is overwhelmingly JS today. Wrapping in Go adds a process boundary for no benefit. The IdentityMatch service is hot-path request handling and benefits from Go's concurrency model + Prebid Server integration. Both consume the same store interfaces; they don't share storage assumptions, only the interface contract. ## Storyboard conformance scenarios @@ -290,7 +331,7 @@ These scenarios are the IdentityMatch conformance suite. Buyer SDK teams SHOULD 2. **Audience-record TTL inside valkey.** `sync_audiences` writes are continuous. How long do stale audience records linger? Proposal: `expires_at` field on the audience-meta HASH; SDK ignores SET members whose meta-hash has expired. 3. **Cap on policies per fcap_key.** Should multiple policies stack on one key (e.g., per-day AND per-hour), or one policy per key? Proposal: one policy per key for v1; stacking is implementable as multiple keys. 4. **Identity-graph plug-point.** For operators that *do* canonicalize, where does the graph hook in? Proposal: SDK exposes pre-write and pre-read interceptors (`(uid_type, user_token) → (uid_type', user_token')`) that customers wire to their graph. Default: identity passthrough. -5. **`FrequencyStore` interface for DSP coexistence.** Buyers with existing fcap stores (Aerospike/Redis/proprietary) won't migrate to valkey. SDK should expose a `FrequencyStore` interface; valkey is the reference implementation, customers plug their own. Symmetric to the canonicalization plug-point above. +5. **Pluggable store interfaces in the SDK.** The SDK exposes `FrequencyStore`, `AudienceStore`, and `PackageStore` interfaces that satisfy the conformance invariants regardless of backend. The valkey-backed connector is the reference; buyers plug their own (Aerospike, DynamoDB, proprietary KV) by implementing the interfaces. Symmetric to the canonicalization plug-point above. Settled in principle; specific interface signatures are an SDK-design item under adcp-client#1005. 6. **OpenRTB cross-walk.** OpenRTB 2.6 `User.eids[]` matches our `identities[]` shape; should the spec note the mapping for buyer-side codebases that bridge between protocols? 7. **Audience strength scores.** ZSET allows audiences to carry a strength/score; eligibility can apply a floor at check time. v1 ships SET; ZSET migration is a buyer-internal choice that doesn't affect the protocol. @@ -513,11 +554,14 @@ client.identityMatch.inspectExposure(fcap_key, uid_type, user_token) // test-on Plus HPKE encrypt/decrypt as net-new SDK primitives (X25519 KEM, ChaCha20-Poly1305, HKDF-SHA256 per RFC 9180 `mode_base`). The encrypt path is needed by the buyer agent emitting TMPX; decrypt by the impression handler. -### 3. Reference TMP server: `adcp-go/identitymatch` +### 3. Reference TMP server: `adcp-go/identitymatch` (open-source) + Scope3 hosted (public) + +Two reference paths, neither required: -A read-only TMP provider implementing `POST /identity` (and `POST /context` if scope expands). Reads the valkey schema this PR documents; serves `eligible_package_ids` + TMPX. Cites the buyer-fcap docs page once doc promotion lands. +- **`adcp-go/identitymatch`**: an open-source TMP provider implementing `POST /identity` against the SDK's pluggable store interfaces. Drop in your own store connector; deploy the binary; point publishers/routers at it. +- **Scope3 hosted IdentityMatch**: a public deployment buyers can route to without standing up their own service. Useful for buyers with no operational appetite for an extra service. -No new endpoints — TMP stays a downstream read replica. Deploy this binary, point publishers/routers at it, populate state via the SDK. +Buyers who want neither — fine. The wire spec + conformance invariants are sufficient to implement IdentityMatch from scratch in any language against any backend. Both reference paths exist to lower adoption cost, not to gate it. ### 4. Training agent integration diff --git a/static/schemas/source/index.json b/static/schemas/source/index.json index 778848cdaf..e96f178bf2 100644 --- a/static/schemas/source/index.json +++ b/static/schemas/source/index.json @@ -1556,7 +1556,7 @@ } } }, - "buyer-internal-valkey-schema": "Buyer-internal records (audience, exposure, package, fcap_policy) are documented in specs/identitymatch-fcap-architecture.md as a valkey schema (Redis key patterns + primitive types). Not a wire artifact and not on the JSON Schema registry." + "implementation-guidance": "Conformance invariants and a reference (non-normative) valkey-backed buyer-side data model are documented in specs/identitymatch-fcap-architecture.md. Storage backend is an implementation choice; conformant services may use any store that satisfies the invariants." }, "brand-protocol": { "description": "Brand protocol for identity retrieval, rights discovery, acquisition, and lifecycle management", From 2fe36ae3e4edbc1c1ca57e7267fc597c486be403 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Mon, 27 Apr 2026 18:38:09 -0400 Subject: [PATCH 04/12] spec(tmp): split impression handling into decode + write primitives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Slack alignment with Baiyu (Scope3 impression-tracker owner) and Brian: the SDK ships impression handling as two composable functions rather than a single bundled call. decodeTmpx(raw_tmpx) -> ExposureLog writeExposure(log, store_context) -> { ok, count } Why two functions, not one: - Topology-neutral. Scope3's production architecture is pixel -> tracking endpoint -> pub/sub topic -> frequency_writer -> Valkey. A bundled recordImpression() forces synchronous topology and prevents the buffering pattern. - Re-usable building blocks. Decode without write supports diagnostic tools, replay analysis, test harnesses. - Cleaner boundary. Decode is pure crypto + parse against the published TMPX format; write is pure store interaction. Also drops the "JS for writers, Go for reader" framing from the SDK section. Brian's earlier "JS" was shorthand for "the language the impression tracker is in" — currently Go at Scope3. Spec/SDK is language-neutral; same two primitives ship in adcp-go, adcp-ts, adcp-py. Deployment topology (sync, pub/sub, batch) and language are the implementer's choice. Co-Authored-By: Claude Opus 4.7 (1M context) --- specs/identitymatch-fcap-architecture.md | 63 +++++++++++++++++++----- 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/specs/identitymatch-fcap-architecture.md b/specs/identitymatch-fcap-architecture.md index a9408881c5..5dea1c52e7 100644 --- a/specs/identitymatch-fcap-architecture.md +++ b/specs/identitymatch-fcap-architecture.md @@ -268,9 +268,36 @@ Where the existing plumbing helps: `kid` prefix conventions, the 5-minute JWKS-s | SDK | Signing today | HPKE needed | Priority | |---|---|---|---| -| `@adcp/client` (JS) | ✅ | encrypt + decrypt | First — unblocks impression tracker | -| `adcp-go` | ✅ | decrypt (server) | Reference IdentityMatch impl | -| `adcp` (Python) | partial | encrypt + decrypt | Follows JS | +| `@adcp/client` (TS/JS) | ✅ | encrypt + decrypt | Same surface as below | +| `adcp-go` | ✅ | encrypt + decrypt | Same surface; current Scope3 impression tracker is in Go | +| `adcp` (Python) | partial | encrypt + decrypt | Same surface | + +All three SDKs ship the same primitive surface. Implementer chooses the language; spec/SDK does not dictate. + +### Impression-handling primitives (composable, two-step) + +Per design alignment with Scope3's existing impression tracker, SDKs ship the impression-handling logic as **two composable functions**, not a single bundled call. Real deployments separate decode (synchronous, at intake) from exposure write (often asynchronous, behind a queue) — bundling the two forces a synchronous topology. + +``` +decodeTmpx(raw_tmpx) -> ExposureLog + // Decrypts HPKE ciphertext, parses the published TMPX binary format + // (specification.mdx:534-597), returns the resolved identity entries + // in a structured form ready for serialization onto a topic or for + // direct write. + +writeExposure(log, store_context) -> { ok, count } + // Writes the exposure increment(s) per the resolved identities and + // declared fcap_keys. store_context wires the FrequencyStore + // implementation (valkey, Aerospike, DynamoDB, etc.). +``` + +Why two functions: + +- **Topology-neutral.** A high-volume tracking endpoint typically decodes at intake and emits to pub/sub; a downstream `frequency_writer` consumes and writes Valkey at its own pace. Buffering, retries, dedup, observability live at the queue layer. Two functions let any topology compose them; one bundled call doesn't. +- **Re-usable building blocks.** Decode without write supports diagnostic tools, replay analysis, and test harnesses that need the structured form without committing state. +- **Cleaner boundary for open-source reuse.** Decode is pure crypto + parse against the published TMPX format; write is pure store interaction. Each is independently testable. + +The same two primitives ship in adcp-go, adcp-ts, adcp-py. Pub/sub buffering, retry, and observability are deployment concerns, not protocol concerns. ### Pluggable store interfaces @@ -296,9 +323,9 @@ Specific interface signatures are an SDK-design concern, tracked under `adcp-cli | SDK + valkey reference connector | `@adcp/client/identitymatch` | JS/TS | Default store implementation behind the SDK interfaces | | SDK + Aerospike/Dynamo/etc. connectors | community / buyer-implemented | any | Optional alternate stores satisfying the same interfaces | -### Why JS for the management plane and Go for the reader +### Language is an implementer choice, not a protocol choice -The impression tracker and CRUD writethrough run in the buyer's existing infra, which is overwhelmingly JS today. Wrapping in Go adds a process boundary for no benefit. The IdentityMatch service is hot-path request handling and benefits from Go's concurrency model + Prebid Server integration. Both consume the same store interfaces; they don't share storage assumptions, only the interface contract. +Spec/SDK does not dictate where the impression-handling logic runs. Scope3's tracking endpoint is currently in Go; another buyer might run a Node service or a Python worker. The same `decodeTmpx` + `writeExposure` primitives ship in adcp-go, adcp-ts, adcp-py — the implementer picks the language that fits their infra. The IdentityMatch service (`POST /identity` reader) has the same property: any language that can read the FrequencyStore / AudienceStore / PackageStore interfaces and serve TMP responses is conformant. ## Storyboard conformance scenarios @@ -540,19 +567,29 @@ Selected sections of this spec move to authoritative protocol docs: The split: authoritative implementation guidance moves to `docs/`; design history stays in `specs/`. SDK teams build against `docs/`. -### 2. JS SDK: `@adcp/client` V6 (tracked: adcp-client#1005) +### 2. SDK primitives across `@adcp/client` (TS), `adcp-go`, `adcp` (Python) — tracked: adcp-client#1005 + +Same primitive surface in all three SDKs. Implementer chooses the language; spec/SDK does not dictate where the logic runs. + +**Impression handling (composable, two-step):** -New namespace `client.identityMatch.*` with five methods that constitute the buyer-side management plane and the test harness driver: +``` +decodeTmpx(raw_tmpx) -> ExposureLog // pure crypto + parse against published TMPX format +writeExposure(log, store_context) -> { ok, count } // pure store interaction; FrequencyStore impl pluggable +``` + +**Buyer-side management plane:** ``` -client.identityMatch.upsertAudience(audience_id, members, opts) // wraps sync_audiences add/remove deltas -client.identityMatch.upsertPackage(seller_agent_url, package_id, fcap_keys, audience_ids, opts) -client.identityMatch.upsertFcapPolicy(fcap_key, {window_sec, window_kind, max_count, merge_rule}) -client.identityMatch.recordImpression(tmpx, opts) // decodes TMPX, HINCRBY exposure -client.identityMatch.inspectExposure(fcap_key, uid_type, user_token) // test-only assertion helper +upsertAudience(audience_id, members, opts) // wraps sync_audiences add/remove deltas +upsertPackage(seller_agent_url, package_id, fcap_keys, audience_ids, opts) +upsertFcapPolicy(fcap_key, {window_sec, window_kind, max_count, merge_rule}) +inspectExposure(fcap_key, uid_type, user_token) // test-only assertion helper ``` -Plus HPKE encrypt/decrypt as net-new SDK primitives (X25519 KEM, ChaCha20-Poly1305, HKDF-SHA256 per RFC 9180 `mode_base`). The encrypt path is needed by the buyer agent emitting TMPX; decrypt by the impression handler. +Plus HPKE encrypt/decrypt as net-new SDK primitives (X25519 KEM, ChaCha20-Poly1305, HKDF-SHA256 per RFC 9180 `mode_base`). The encrypt path is needed by the IdentityMatch service emitting TMPX; decrypt by the impression handler invoking `decodeTmpx`. + +The two-step impression surface is deliberate. Production tracking endpoints typically decode at intake, publish to pub/sub for buffering, and let a downstream worker write the store at its own pace. Bundling decode+write into a single call would force a synchronous topology and prevent that buffering pattern. See § Impression-handling primitives. ### 3. Reference TMP server: `adcp-go/identitymatch` (open-source) + Scope3 hosted (public) From cd85d48d15ec37198ddd7057eedfa660bbb00dc1 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Mon, 27 Apr 2026 18:51:57 -0400 Subject: [PATCH 05/12] docs(tmp): promote IdentityMatch implementation to authoritative docs Per @brian: the spec doc lived in specs/ where SDK teams don't look. Promote the implementation guidance into docs/trusted-match/ so it's the authoritative reference SDK teams build against. Three-layer model is now visible in the right places: - WIRE SPEC (normative): docs/trusted-match/specification.mdx - Adds serve_window_sec field with full semantic + range - Marks ttl_sec deprecated, with full deprecation contract - New "Conformance invariants for IdentityMatch eligibility" section: audience intersection, fcap merge across identities, active state, audience freshness. Backend-agnostic. - Updates caching section to reflect serve-window contract. - Refines TMPX caching behavior to use serve-window terminology. - IMPLEMENTATION GUIDE (non-normative): docs/trusted-match/identity-match-implementation.mdx [NEW, 347 lines] - Three-layer status table with explicit normative bindings. - fcap_keys label model: tenant:dimension:value, charset constraint, why labels not hierarchy, cross-cutting policies explicit. - Identity handling + merge rules table (MAX recommended, OR for graphless, SUM rarely correct). - Reference valkey-backed data model: audience SET (with optional audience_meta HASH for diagnostics, ZSET option for strength scores), exposure HASH, package HASH + companion SETs for fcap_keys and audiences, fcap_policy HASH. - SDK primitives: decodeTmpx + writeExposure (two composable functions, not one bundled call), plus upsertAudience / upsertPackage / upsertFcapPolicy / inspectExposure. - Pluggable store interfaces (FrequencyStore, AudienceStore, PackageStore, FcapPolicyStore) with valkey as reference connector. - Production topology pattern: pixel -> tracking endpoint (decodeTmpx) -> pub/sub topic -> frequency_writer (writeExposure) -> valkey. Same as Scope3's deployment. - Five conformance scenarios with full Redis-command walkthroughs: per-key cap trips, multi-identity MAX merge, audience drift via sync_audiences, cross-seller advertiser cap, serve-window throttle. - BUYER GUIDE (refreshed): docs/trusted-match/buyer-guide.mdx - Identity Match response example uses serve_window_sec. - "Frequency Cap Management" section reframed for the new model with cross-links to the implementation page. - "How Buyers Learn About Exposures" now references SDK primitives. - "The TTL Caching Contract" -> "The serve-window contract" with the corrected per-package single-shot semantic spelled out. - MIGRATION: docs/trusted-match/migration-from-axe.mdx - Adds "OpenRTB User.eids cross-walk" section mapping uid_type values to OpenRTB 2.6 User.eids.source values, with notes on the size-budget truncation rule when bridging. - ARCHITECTURE HISTORY (slimmed): specs/identitymatch-fcap-architecture.md goes from 485 to 136 lines. Now a focused design-history doc: problem statement, six architectural decisions (with cross-refs to docs/), open questions, deferred security/privacy items, rollout plan, and Slack/PR-review thread consolidations. Implementation guidance promoted to docs/ rather than duplicated. Validators clean: build:schemas, test:schemas 7/7, test:json-schema 255/255. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../identitymatch-fcap-architecture-spec.md | 35 +- docs/trusted-match/buyer-guide.mdx | 42 +- .../identity-match-implementation.mdx | 347 ++++++++++ docs/trusted-match/migration-from-axe.mdx | 18 + docs/trusted-match/specification.mdx | 32 +- specs/identitymatch-fcap-architecture.md | 651 +++--------------- 6 files changed, 516 insertions(+), 609 deletions(-) create mode 100644 docs/trusted-match/identity-match-implementation.mdx diff --git a/.changeset/identitymatch-fcap-architecture-spec.md b/.changeset/identitymatch-fcap-architecture-spec.md index 360ee7e66e..69f9f519fb 100644 --- a/.changeset/identitymatch-fcap-architecture-spec.md +++ b/.changeset/identitymatch-fcap-architecture-spec.md @@ -2,18 +2,35 @@ "adcontextprotocol": patch --- -IdentityMatch & frequency capping architecture spec, plus a wire-side fix to the response throttle field. Adds: +IdentityMatch & frequency capping architecture, with both the wire-spec change and the implementation guidance landing as authoritative protocol docs. -- `specs/identitymatch-fcap-architecture.md` — design spec consolidating the buyer-internal valkey schema, `fcap_keys[]` label model with required tenant-prefixing, identity-handling rules (no required canonicalization), and storyboard conformance scenarios behind TMP IdentityMatch. -- `identity-match-response.json` — adds `serve_window_sec` (integer, 1–300, default 60) and deprecates `ttl_sec`. The original `ttl_sec` field was documented as a router response cache TTL but operationally functioned as a per-package single-shot fcap, conflating two distinct concerns. `serve_window_sec` carries the corrected semantic: at most one impression per eligible package per user during this window. Multi-impression fcap is handled separately by buyer-side exposure records and policies. -- `CHANGELOG.md` — 6-week deprecation notice for `ttl_sec` removal per the experimental-status contract. Earliest landing: 2026-06-07. +**Wire spec changes** (`identity-match-response.json`): +- Adds `serve_window_sec` (integer, 1–300, default 60) — per-package single-shot fcap window. After serving the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again. Not a router response cache TTL. +- Deprecates `ttl_sec`. Originally documented as a router cache TTL but operationally functioned as a per-package serve throttle. Senders during the deprecation window populate both fields; receivers prefer `serve_window_sec`. Removed in a 3.0.x release ≥ 6 weeks after the 2026-04-26 notice (earliest 2026-06-07). -The buyer-internal records (audience, exposure, package, fcap_policy) are documented as a **valkey schema** — Redis key patterns + primitive types (HASH / SET / ZSET) + field names within each. Cross-language interop between JS impression-trackers and Go IdentityMatch services is handled by Redis client libraries; agreement is at the operation level (`HINCRBY exposure:... count 1`, `SMEMBERS audience:...`), not via a binary serialization layer. No proto, no JSON Schema for these records — they aren't wire envelopes and they aren't binary blobs. +**Doc updates** (authoritative implementation guidance): +- `docs/trusted-match/specification.mdx` — adds `serve_window_sec` field, marks `ttl_sec` deprecated, adds normative conformance invariants for IdentityMatch eligibility (audience intersection, fcap merge across identities, active state, audience freshness). Updates the caching section for the new contract. +- `docs/trusted-match/identity-match-implementation.mdx` (new page) — implementation guide covering the `fcap_keys` label model with tenant prefix and charset, reference valkey-backed data model (audience SET, exposure HASH, package HASH, fcap_policy HASH), merge rules with MAX recommended, SDK primitives (`decodeTmpx`, `writeExposure`, `upsertAudience`, `upsertPackage`, `upsertFcapPolicy`, `inspectExposure`), pluggable store interfaces (FrequencyStore / AudienceStore / PackageStore / FcapPolicyStore), production topology pattern (pub/sub buffering between tracking endpoint and store writer), and Redis-command walkthroughs for the five conformance scenarios. +- `docs/trusted-match/buyer-guide.mdx` — updates frequency-cap management and the serve-window contract sections; cross-links to the implementation page. +- `docs/trusted-match/migration-from-axe.mdx` — adds OpenRTB 2.6 `User.eids[]` cross-walk for buyers bridging from OpenRTB-shaped pipelines. -The TMPX wire format itself is **unchanged** — already specified in `docs/trusted-match/specification.mdx` as a compact binary layout with version/timestamp/country/8-byte nonce/typed identity entries, with replay defense via master-side nonce dedup. +**Three-layer model:** +- Wire spec (normative) — what crosses an agent boundary. +- Conformance invariants (normative) — backend-agnostic eligibility logic. +- Reference data model (non-normative) — Scope3's valkey-backed implementation choice. Buyers may use Aerospike, DynamoDB, or anything else; the SDK exposes pluggable store interfaces. The protocol describes WHAT the service must compute, not HOW it stores the data. -JSON Schema continues to govern wire/RPC surfaces. Buyer-internal valkey records live in the spec doc as a Redis schema. Each contract uses the right tool for its job. +**SDK primitives** ship across `@adcp/client` (TS), `adcp-go`, and `adcp` (Python). Same primitive surface in all three languages. Impression handling is two composable functions (`decodeTmpx` + `writeExposure`), not one bundled call — production tracking endpoints decode at intake and write downstream behind a pub/sub buffer; bundling would force synchronous topology. -All TMP surfaces remain `x-status: experimental`. Wire change in this release is purely additive (`serve_window_sec`); the `ttl_sec` removal lands in a later 3.0.x release ≥ 6 weeks after notice. Storyboard YAML deferred until TMP graduates from `experimental_features` into `supported_protocols` — buyer SDKs implement the five conformance scenarios as integration tests now. +**Architecture history** preserved at `specs/identitymatch-fcap-architecture.md` (slimmed from 485 to 136 lines) — captures the design decisions, the deferred security/privacy follow-ups, the rollout plan, and consolidated Slack/PR-review threads. Implementation details now live in `docs/`. -Several deferred security and privacy follow-ups are documented in the spec: TMPX harvest → competitor-suppression attack, eligibility-as-audience-membership oracle, consent revocation between IdentityMatch and impression, side-channel via eligibility deltas, hashed_email leak surface, and DoS amplification via large `package_ids[]`. None block this PR; each warrants a focused follow-up. +All TMP surfaces remain `x-status: experimental`. Wire change is purely additive (`serve_window_sec`); the `ttl_sec` removal lands in a later 3.0.x. + +**Tracked deferred follow-ups** (not in this PR): +- TMPX harvest → competitor-suppression attack +- Eligibility-as-audience-membership oracle (honeypot package_ids) +- Consent revocation between IdentityMatch and impression +- Side-channel via eligibility deltas +- `hashed_email` in TMPX leak surface +- DoS amplification via large `package_ids[]` +- Where do fcap policies live on the wire (currently SDK-only) +- Identity-graph plug-point interface for SDK diff --git a/docs/trusted-match/buyer-guide.mdx b/docs/trusted-match/buyer-guide.mdx index c466fcc41e..41bf39aa41 100644 --- a/docs/trusted-match/buyer-guide.mdx +++ b/docs/trusted-match/buyer-guide.mdx @@ -16,7 +16,7 @@ A buyer agent exposes two HTTP/2 endpoints under a single base URL — `POST /co | Message type | Receives | Returns | |---|---|---| | `context_match_request` | Page/content signals, placement, geo | Offers with creative manifests | -| `identity_match_request` | Opaque user token, all active package IDs | Eligible package IDs + TTL | +| `identity_match_request` | Opaque user token, all active package IDs | Eligible package IDs + serve window | Each endpoint handles one message type. Both must respond in under 50ms. The router enforces this budget and will skip slow providers. @@ -120,11 +120,11 @@ The router sends you one or more opaque identity tokens and a list of ALL your a "type": "identity_match_response", "request_id": "id-9c4e", "eligible_package_ids": ["acme-outdoor-q2", "acme-loyalty-retarget"], - "ttl_sec": 60 + "serve_window_sec": 60 } ``` -Return only the package IDs that pass your eligibility checks. Packages not in the list are treated as ineligible. The `ttl_sec` tells the router how long to cache this response — during that window, the router returns cached eligibility without re-querying you. The publisher uses cached eligibility to allocate across whatever placements exist. Set the TTL based on how quickly your eligibility state changes (frequency caps, audience updates, etc.). +Return only the package IDs that pass your eligibility checks. Packages not in the list are treated as ineligible. The `serve_window_sec` is a **per-package single-shot fcap**: after the publisher serves the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again. Default 60s, max 300s. This is not a router response cache TTL — see [The serve-window contract](#the-serve-window-contract). The deprecated `ttl_sec` field still exists during a 6-week deprecation window; senders SHOULD populate both with the same value. **What you never receive** in Identity Match: page URLs, content topics, keywords, article text, or any content signal. You cannot determine what the user is looking at. @@ -143,21 +143,24 @@ You have no role in this step. The publisher controls activation. ## Frequency Cap Management -Cross-publisher frequency capping is the primary use case for Identity Match. Your agent maintains frequency state per user token: +Cross-publisher frequency capping is the primary use case for Identity Match. Your agent maintains frequency state per user identity: -- **Count impressions** by user token + package ID -- **Track recency** — when was the last impression for this token? -- **Apply caps** from the media buy: `max_impressions` per `window`, minimum `recency` between exposures -- **Exclude the package** from `eligible_package_ids` when a cap is hit -- **Set `ttl_sec`** to reflect how long this eligibility is valid — a shorter TTL means the router re-checks sooner, which is useful when a cap is close to being reached +- **Count impressions** per fcap key (campaign, advertiser, creative, line item, or whatever dimensions you cap on) per resolved user identity +- **Apply policies** with a window and max count +- **Merge across identities** for users with multiple resolved tokens (RampID + ID5 + MAID for the same person) — see [merge rules](identity-match-implementation#merge-rules) +- **Exclude packages** from `eligible_package_ids` when any cap on the package trips Because Identity Match runs across all publishers using TMP, a user who saw your ad on Publisher A will correctly show as over-frequency on Publisher B — even though you can't see which publisher sent the request. +For the implementation details — the fcap_keys label model, the reference valkey data model, merge_rule semantics, audience and exposure record shapes, the SDK primitives, and Redis-command walkthroughs for the conformance scenarios — see [Identity Match implementation](identity-match-implementation.mdx). + ### How Buyers Learn About Exposures -The `tmpx` field on the Identity Match response carries a TMPX token — an HPKE-encrypted blob containing the user's resolved identity tokens. The publisher substitutes `{TMPX}` into creative tracking URLs. When the ad serves, your impression pixel receives the encrypted token. Your cluster master decrypts it, logs the exposure against the user, and replicates updated frequency state to read replicas. This gives you real-time per-user exposure signals without the publisher seeing user identity. +The `tmpx` field on the Identity Match response carries a TMPX token — an HPKE-encrypted blob containing the user's resolved identity tokens. The publisher substitutes `{TMPX}` into creative tracking URLs. When the ad serves, your impression pixel receives the encrypted token. Your impression handler decrypts it (via the SDK's `decodeTmpx` primitive) and writes the exposure increment to your store (via `writeExposure`). Most production deployments separate decode (synchronous, at intake) from write (asynchronous, behind a queue) for buffering — see the implementation page for the topology pattern. + +This gives you real-time per-user exposure signals without the publisher seeing user identity. -See [TMPX Exposure Tokens](/docs/trusted-match/specification#tmpx-exposure-tokens) for the encryption format and binary token structure. +See [TMPX Exposure Tokens](/docs/trusted-match/specification#tmpx-exposure-tokens) for the encryption format and binary token structure, and [Identity Match implementation](identity-match-implementation.mdx#sdk-primitives) for the SDK functions. ## Provider Registration @@ -200,16 +203,19 @@ Common scenarios: - **Internal failure**: Return an error response. The router skips your provider and proceeds with other providers. - **Timeout**: If you can't respond within the latency budget, the router skips you. No error response needed — the router handles this. -## The TTL Caching Contract +## The serve-window contract + +The `serve_window_sec` field on Identity Match responses is a **per-package single-shot fcap** between the buyer and the publisher: + +- For each package in `eligible_package_ids`, the publisher MAY serve the user **at most one impression** on that package within `serve_window_sec` seconds. +- After the publisher has served one impression on each eligible package, the publisher MUST re-query Identity Match before serving any of those packages to the same user again. +- Multi-impression frequency capping (5/day, 100/month, etc.) is separate. It lives in your buyer-side state and is updated out-of-band via TMPX impression callbacks regardless of `serve_window_sec`. The serve window is the protocol-level throttle; multi-impression caps are buyer-internal policy. -The `ttl_sec` field on Identity Match responses is a caching contract between the buyer and the router: +The router MAY apply an internal deduplication cache keyed by `{identities_hash, provider_id, package_ids_hash, consent_hash}` (see spec for canonical bytes), but the publisher's binding contract is the serve-window throttle, not the router's cache window. -- The router caches the response for `ttl_sec` seconds, keyed by `{identities_hash, provider_id, package_ids_hash, consent_hash}` (see spec for canonical bytes). `identities_hash` is computed over the per-provider filtered subset you received — your cache partition is scoped to the identity types you resolve. -- During that window, the router returns cached eligibility without re-querying the buyer -- The publisher uses cached eligibility to allocate across whatever placements exist — a single pre-roll, a CTV ad pod, or a web page with multiple ad units -- The buyer doesn't need to know how many placements exist or how the publisher allocates +**Choosing a serve_window_sec value**: Default 60 seconds. Range 1–300. Anything longer than 300 makes per-package fcap too coarse for typical campaigns. Anything shorter than your IdentityMatch round-trip just adds load. 60 is a good default; tune downward if eligibility state shifts faster (close to a cap, audience just changed) or upward (max 300) if your IdentityMatch service is at load and the campaigns are tolerant of coarser fcap. -**Choosing a TTL**: Set the TTL based on how quickly your eligibility state changes. If frequency caps reset hourly, a 300-second TTL is reasonable. If a user is close to a cap limit, return a shorter TTL (e.g., 30 seconds) so the router re-checks sooner. +The deprecated `ttl_sec` field still exists during the 6-week deprecation window. Senders SHOULD populate both `ttl_sec` and `serve_window_sec` with the same value during the transition; receivers SHOULD prefer `serve_window_sec` when both are present. `ttl_sec` is removed in a future 3.0.x release ≥ 6 weeks after the 2026-04-26 notice. ## Performance Requirements diff --git a/docs/trusted-match/identity-match-implementation.mdx b/docs/trusted-match/identity-match-implementation.mdx new file mode 100644 index 0000000000..03cea98dfd --- /dev/null +++ b/docs/trusted-match/identity-match-implementation.mdx @@ -0,0 +1,347 @@ +--- +title: Identity Match Implementation Guide +sidebarTitle: IdentityMatch Implementation +description: "Implementation guidance for the buyer-side IdentityMatch service — fcap_keys label model, reference valkey data model, merge rules, SDK primitives, and conformance scenarios." +"og:title": "AdCP TMP IdentityMatch Implementation Guide" +--- + +# Identity Match Implementation Guide + +This page covers how to implement the buyer side of TMP's Identity Match operation. The wire spec lives in the [specification](specification.mdx); the conformance invariants the service must satisfy are also normative there. What lives on this page is **implementation guidance** — the data model, the SDK primitives, and the operational shape of a working IdentityMatch service. Storage backend is an implementer choice; the SDK exposes pluggable interfaces. + +The reference data model on this page is **valkey-backed** and reflects what Scope3 ships. Other buyers may use Aerospike, DynamoDB, PostgreSQL, in-memory state, or anything else — as long as the conformance invariants hold, the service is valid. + +## Three layers + +| Layer | Status | What it covers | +|---|---|---| +| Wire spec | Normative | The HTTP JSON request/response on `POST /identity`, the `serve_window_sec` semantic, the TMPX binary format. See [specification.mdx](specification.mdx). | +| Conformance invariants | Normative | The eligibility logic an IdentityMatch service MUST compute (audience intersection, fcap merge across identities, active state, audience freshness). Also in [specification.mdx](specification.mdx#conformance-invariants-for-identitymatch-eligibility). | +| Reference data model | Non-normative | Scope3's valkey-backed implementation choice — Redis primitives, key patterns, field names. The rest of this page. | + +## fcap_keys label model + +A frequency cap is identified by a tag of the form `tenant:dimension:value`: + +``` +buyer-acme:campaign:42 +buyer-acme:campaign_group:7 +buyer-acme:advertiser:13 +buyer-acme:creative:8 +``` + +Packages declare which `fcap_keys` they belong to; exposure records are written per-key; policies (window, max count, merge rule) are attached per-key. + +``` +package 2342: fcap_keys ["buyer-acme:campaign:42", + "buyer-acme:campaign_group:7", + "buyer-acme:advertiser:13"] +policy "buyer-acme:campaign:42": {window_sec: 60, max_count: 5, merge_rule: MAX} +policy "buyer-acme:advertiser:13": {window_sec: 86400, max_count: 20, merge_rule: MAX} +``` + +**Tenant prefix is required.** Multi-tenant buyer-agent fleets host multiple advertiser orgs. Without a tenant prefix, `campaign:42` collides on shared counters between tenants and tenant A learns B's reach by watching the counter move. SDKs MUST refuse keys without a tenant prefix. + +**Charset constraint.** Each segment matches `[a-zA-Z0-9_-]+` so the `:` delimiter is unambiguous. URL-bearing or otherwise colon-bearing values must be hashed or shortened before use as a value segment. + +**Why labels not hierarchy.** Cap dimensions are heterogeneous across customers — some want creative-level caps, some line-item, some flight, some advertiser-roll-up. A fixed schema either over-prescribes or under-serves. Labels also make cross-seller fcap automatic: any policy whose key is shared across sellers (e.g., `buyer-acme:advertiser:13`) enforces across all of them with no extra mode. + +**Cross-cutting policies are explicit, not implied.** A campaign that needs both a per-campaign and a per-advertiser cap declares both keys and gets two policy lookups at check time. There is no implicit roll-up. + +## Identity handling + +The protocol does not dictate a canonical user ID. Buyers will use multiple identity providers (RampID, ID5, MAID, UID2, publisher-issued tokens) in parallel — Scope3's identity graph is canonical only for Scope3-hosted IdentityMatch instances. Other operators run their own graph or none at all. + +Records are keyed by `(uid_type, user_token)`. A user with three resolved identities produces three records on each write, and reads merge across the user's identities at check time: + +``` +exposure:buyer-acme:campaign:42:rampid:abc → {count: 3, …} +exposure:buyer-acme:campaign:42:id5:def → {count: 2, …} +exposure:buyer-acme:campaign:42:maid:ghi → {count: 1, …} +``` + +### Merge rules + +The `merge_rule` on each fcap policy is **required** — there is no implicit default, because the right rule depends on whether the buyer canonicalizes upstream. + +| Rule | Behavior | When to use | +|---|---|---| +| `MAX` | Take the largest count across identities. | **Recommended.** Buyers running an identity graph that canonicalizes upstream. Matches how Xandr/DV360/TTD model multi-identity fcap. Avoids over-counting when one impression resolves to two tokens. | +| `OR` | Treat any count > 0 as one exposure per identity, sum the booleans. | Defensible only for graphless operators where identity tokens are known not to alias. Over-counts when the same impression carries multiple identities. | +| `SUM` | Add all counts. | Almost always wrong — assumes identities never co-occur for the same person. | + +Buyers who **want** canonicalization run their identity graph behind their own door — pre-canonicalizing tokens before write/read on both the `sync_audiences` path and the TMPX-decrypt path, then setting `MERGE_RULE_MAX` since the merge becomes a no-op. The protocol does not know this happened. + +## Reference data model (valkey-backed) + +Four record types, each modeled directly on a Redis primitive. Cross-language interop is at the operation level (`HINCRBY`, `SADD`, `SMEMBERS`), not via a serialization layer. Valkey/Redis does not validate writes against a schema definition — the contract is enforced by the SDK on the write side and by the IdentityMatch reader on the read side. Library discipline (not database constraints) is what makes this work. + +### Audience record + +``` +type: SET (or ZSET if strength scores are used) +key: audience:{uid_type}:{user_token} +members: audience IDs the user belongs to +``` + +Optional companion HASH at `audience_meta:{uid_type}:{user_token}` for diagnostics: + +``` +type: HASH +fields: + updated_at: unix seconds last written + expires_at: unix seconds after which the SET MUST be ignored (0 = no deadline) + source: origin pipeline (typically "sync_audiences") +``` + +Written by the buyer's `sync_audiences` pipeline. Read at IdentityMatch eligibility time. Real-world `sync_audiences` cadences vary widely (prospecting segments are often hourly-to-daily batched; retargeting via streaming CDP can be near-real-time) — `expires_at` lets the pipeline publish freshness contracts directly on the record. + +If using ZSET, the score carries audience strength (0.0–1.0); IdentityMatch can apply a strength floor at eligibility time. + +### Exposure record + +``` +type: HASH +key: exposure:{fcap_key}:{uid_type}:{user_token} +fields: + count: uint, exposures inside the current policy window + first_seen: unix seconds (sliding-window policies) + last_seen: unix seconds, most recent exposure + window_start: unix seconds when the current fixed window opened (0 = sliding) +``` + +Incremented on TMPX decrypt with `HINCRBY exposure:... count 1` plus `HSET ... last_seen `. Atomic by Redis primitive; no serialization. Window semantics (sliding vs fixed) are policy-attached, not record-attached. + +### Package record + +``` +type: HASH +key: package:{seller_agent.agent_url}:{package_id} +fields: + media_buy_id: string (optional, for diagnostics) + active: "1" | "0" + updated_at: unix seconds +``` + +Companion SETs for the multi-valued lookups: + +``` +type: SET +key: package_fcap_keys:{seller_agent.agent_url}:{package_id} +members: e.g. "buyer-acme:campaign:42", "buyer-acme:advertiser:13" +``` + +``` +type: SET +key: package_audiences:{seller_agent.agent_url}:{package_id} +members: e.g. "seg_123", "seg_456" +``` + +Written by the buyer's package-CRUD writethrough. Set membership lets eligibility checks compute audience intersection via native `SINTER` rather than client-side iteration. + +### Fcap policy record + +``` +type: HASH +key: fcap_policy:{fcap_key} +fields: + window_sec: uint + window_kind: "FIXED" | "SLIDING" + max_count: uint + merge_rule: "MAX" | "OR" | "SUM" (required, no implicit default) + active: "1" | "0" + updated_at: unix seconds +``` + +Written by the buyer's policy-CRUD writethrough. + +## SDK primitives + +The SDK ships impression handling as **two composable functions**, not a single bundled call. Production tracking endpoints typically decode at intake, publish to pub/sub for buffering, and let a downstream worker write the store at its own pace. Bundling decode+write into a single call would force a synchronous topology and prevent that buffering pattern. + +``` +decodeTmpx(raw_tmpx) -> ExposureLog + Decrypts HPKE ciphertext, parses the published TMPX binary format + (specification.mdx#binary-format), returns the resolved identity entries + in a structured form ready for serialization onto a topic or for direct write. + +writeExposure(log, store_context) -> { ok, count } + Writes the exposure increment(s) per the resolved identities and declared + fcap_keys. store_context wires the FrequencyStore implementation + (valkey, Aerospike, DynamoDB, etc.). +``` + +Plus the buyer-side management plane: + +``` +upsertAudience(audience_id, members, opts) // wraps sync_audiences add/remove deltas +upsertPackage(seller_agent_url, package_id, fcap_keys, audience_ids, opts) +upsertFcapPolicy(fcap_key, {window_sec, window_kind, max_count, merge_rule}) +inspectExposure(fcap_key, uid_type, user_token) // test helper; returns {count, last_seen, ...} +``` + +Plus HPKE encrypt/decrypt as net-new SDK primitives (X25519 KEM, ChaCha20-Poly1305, HKDF-SHA256 per RFC 9180 `mode_base`). The encrypt path is needed by the IdentityMatch service emitting TMPX; decrypt by the impression handler invoking `decodeTmpx`. + +The same primitive surface ships in `@adcp/client` (TS), `adcp-go`, and `adcp` (Python). Implementer chooses the language; spec/SDK does not dictate where the logic runs. + +## Pluggable store interfaces + +The SDK exposes store interfaces — `FrequencyStore`, `AudienceStore`, `PackageStore`, `FcapPolicyStore` — that an IdentityMatch service implementation calls to satisfy the conformance invariants. Buyers running their own backend (Aerospike, DynamoDB, proprietary KV) implement these interfaces against their store; the SDK ships a reference valkey-backed connector. The interfaces, not the storage layout, are what the SDK contracts on. + +``` +interface FrequencyStore { + increment(fcap_key, uid_type, user_token, by) -> count + read(fcap_key, uid_type, user_token) -> { count, first_seen, last_seen, window_start } + reset_window(fcap_key, uid_type, user_token, new_window_start) +} +// Equivalent shapes for AudienceStore, PackageStore, FcapPolicyStore. +``` + +Specific interface signatures are tracked under `adcp-client#1005`. The point at protocol level: the SDK is store-agnostic by design. + +## Production topology pattern + +A typical Scope3-style impression pipeline: + +``` +publisher pixel fires {TMPX} → tracking endpoint + │ + decodeTmpx (synchronous, at intake) + │ + ▼ + pub/sub topic + │ + frequency_writer worker + │ + writeExposure (asynchronous) + │ + ▼ + valkey +``` + +Decode at intake; emit to pub/sub for buffering; downstream worker writes the store at its own pace. Buffering, retries, dedup, observability, and abuse protection live at the queue layer — none of that is the SDK's job. The SDK ships the two functions; deployment topology composes them. + +A simpler synchronous pipeline (decode + write in the same handler) is also valid for low-volume deployments. The SDK supports both because the primitives are composable. + +## Conformance scenarios + +The five scenarios that map to the conformance invariants. Each shows initial state, wire calls, buyer-internal operations, and expected outcomes. SDK-driven integration tests can run these against a live valkey + IdentityMatch service. + +All scenarios assume `serve_window_sec = 60` (default), `tenant = "buyer-acme"`, `package = "pkg-42"`, `seller_agent_url = "https://seller-a.example"`. + +### 1. Per-key cap trips after N exposures + +**Setup:** +``` +SADD package_fcap_keys:https://seller-a.example:pkg-42 buyer-acme:campaign:42 +SADD package_audiences:https://seller-a.example:pkg-42 seg_test +HSET fcap_policy:buyer-acme:campaign:42 window_sec 86400 window_kind FIXED \ + max_count 5 merge_rule MAX active 1 +HSET package:https://seller-a.example:pkg-42 active 1 +SADD audience:rampid:abc seg_test +``` + +**Step 1** — wire call: `identity_match_request {identities: [{rampid, abc}], package_ids: [pkg-42]}` → expect `eligible_package_ids: [pkg-42]`, `serve_window_sec: 60`, `tmpx: `. + +**Step 2** — buyer-internal, repeat 5×: decode TMPX, then for each (uid_type, user_token): +``` +HINCRBY exposure:buyer-acme:campaign:42:rampid:abc count 1 +HSET exposure:buyer-acme:campaign:42:rampid:abc last_seen +``` +After 5 iterations: `HGET exposure:buyer-acme:campaign:42:rampid:abc count` returns `5`. + +**Step 3** — wire call: same `identity_match_request` → expect `eligible_package_ids: []` (cap tripped, package dropped). + +### 2. Multi-identity merge (MAX rule) + +Same setup as Scenario 1. User has two resolved identities (`rampid:abc` and `id5:def`). + +**Step 1** — buyer-internal, simulate prior exposures across identities: +``` +HSET exposure:buyer-acme:campaign:42:rampid:abc count 3 +HSET exposure:buyer-acme:campaign:42:id5:def count 2 +``` + +**Step 2** — wire call: `identity_match_request {identities: [{rampid, abc}, {id5, def}], package_ids: [pkg-42]}`. + +Eligibility check inside the buyer agent reads both records and applies `MERGE_RULE_MAX`: +``` +counts = [HGET exposure:...rampid:abc count, HGET exposure:...id5:def count] + = [3, 2] +merged = MAX(3, 2) = 3 +``` +3 < max_count of 5 → `eligible_package_ids: [pkg-42]`. + +**Step 3** — buyer-internal, simulate 2 more impressions on rampid: +``` +HINCRBY exposure:buyer-acme:campaign:42:rampid:abc count 2 → count = 5 +``` + +**Step 4** — wire call: same request → `MAX(5, 2) = 5 ≥ max_count` → `eligible_package_ids: []`. + +If the policy were `MERGE_RULE_OR` (count distinct identities exposed), step 2 would have merged to `count_nonzero(3,2) = 2`, and step 4 to `2`. OR-merge would not trip until five distinct identities had been exposed — the over-counting concern that motivates MAX as the recommended default. + +### 3. Audience drift via sync_audiences + +Setup as Scenario 1, with the user initially in `seg_test`. + +**Step 1** — wire call: `identity_match_request` → `eligible_package_ids: [pkg-42]`. + +**Step 2** — buyer-internal, simulate `sync_audiences` removing the user from the segment: +``` +SREM audience:rampid:abc seg_test +HSET audience_meta:rampid:abc updated_at +``` + +**Step 3** — wait `serve_window_sec` seconds (60) so the publisher re-queries. + +**Step 4** — wire call: same `identity_match_request`. Buyer agent computes audience intersection: +``` +user_audiences = SMEMBERS audience:rampid:abc → [] +package_audiences = SMEMBERS package_audiences:...:pkg-42 → [seg_test] +intersection = ∅ → package dropped +``` +Expect `eligible_package_ids: []`. + +### 4. Cross-seller advertiser cap + +Setup: two packages on different sellers, both mapped to the same `advertiser:13` cap: +``` +SADD package_fcap_keys:https://seller-a.example:pkg-A buyer-acme:advertiser:13 +SADD package_fcap_keys:https://seller-b.example:pkg-B buyer-acme:advertiser:13 +HSET fcap_policy:buyer-acme:advertiser:13 window_sec 86400 max_count 10 \ + merge_rule MAX active 1 +``` + +**Step 1** — wire call from Seller A: `package_ids: [pkg-A]` → eligible. + +**Step 2** — buyer-internal, simulate 10 impressions on Seller A's package: +``` +HSET exposure:buyer-acme:advertiser:13:rampid:abc count 10 +``` + +**Step 3** — wire call from Seller B: `package_ids: [pkg-B]`. Buyer agent reads `exposure:buyer-acme:advertiser:13:rampid:abc.count = 10 ≥ max_count` → `eligible_package_ids: []`. + +The advertiser-level cap enforces across sellers because the `fcap_key` is shared. No cross-seller coordination needed; the buyer agent is the single source of truth. + +### 5. Serve-window throttle + +Setup as Scenario 1. + +**Step 1** — wire call at `t=0`: `identity_match_request` → `eligible_package_ids: [pkg-42]`, `serve_window_sec: 60`. + +**Step 2** — publisher serves one impression on pkg-42 within the 60s window. + +**Step 3** — at `t=30s`, publisher receives another ad opportunity for the same user. Per `serve_window_sec` semantic, the publisher MUST NOT re-serve pkg-42 from the cached eligibility — pkg-42 is exhausted in this window. + +**Step 4** — at `t=61s`, publisher re-queries: `identity_match_request` → fresh eligibility. The buyer agent does not need to track per-publisher window state; it just answers freshly when re-queried. + +This is the semantic the `serve_window_sec` field encodes. The buyer agent's job is correctness on each query; the publisher's job is honoring the one-impression-per-package contract within the window. + +## See also + +- [TMP Specification](specification.mdx) — wire spec, TMPX format, conformance invariants +- [Buyer Guide](buyer-guide.mdx) — buyer agent integration, Context Match + Identity Match flows +- [Migration from AXE](migration-from-axe.mdx) — for buyers transitioning from AXE-shaped pipelines, including the OpenRTB User.eids cross-walk +- [Privacy architecture](privacy-architecture.mdx) — what each party learns +- [Router architecture](router-architecture.mdx) — provider registration, fan-out, latency diff --git a/docs/trusted-match/migration-from-axe.mdx b/docs/trusted-match/migration-from-axe.mdx index 673cdbd3f6..fe24cbe81e 100644 --- a/docs/trusted-match/migration-from-axe.mdx +++ b/docs/trusted-match/migration-from-axe.mdx @@ -85,3 +85,21 @@ New media buys should omit AXE fields entirely. The buyer agent's Context Match - **`sync_creatives`** — Same creative sync - **GAM as the ad server** — TMP still sets key-values that GAM evaluates - **Geographic and other targeting overlays** — These are media buy fields, not execution-layer concerns + +## OpenRTB User.eids cross-walk + +For buyers bridging from OpenRTB-shaped pipelines, the TMP Identity Match `identities[]` shape maps to OpenRTB 2.6 `User.eids[]` as follows: + +| AdCP TMP `identities[].uid_type` | OpenRTB 2.6 `User.eids[].source` | Notes | +|---|---|---| +| `rampid` / `rampid_derived` | `liveramp.com` | `atype: 1` for maintained, `atype: 3` for derived | +| `id5` | `id5-sync.com` | | +| `uid2` | `uidapi.com` | `atype: 3` | +| `euid` | `euid.eu` | | +| `pairid` | `iabtechlab.com/pair` | | +| `maid` | `adid` (Android) / `idfa` (iOS) | Atypically carried on `Device.ifa` rather than `User.eids` in OpenRTB | +| `hashed_email` | `liveintent.com` or buyer-specific | `atype: 3` | +| `publisher_first_party` | publisher-defined `source` URL | | +| `other` | buyer-defined `source` URL | | + +The TMP `user_token` field corresponds to `User.eids[].uids[].id`. AdCP carries up to 3 identities per Identity Match request (HPKE size budget — see [TMPX size budget](/docs/trusted-match/specification#size-budget)); OpenRTB has no such limit, so a buyer bridging from OpenRTB into TMP must apply a buyer-configured priority order to truncate (typically: deterministic graphs first — UID2, RampID — then probabilistic or publisher-scoped IDs). diff --git a/docs/trusted-match/specification.mdx b/docs/trusted-match/specification.mdx index 53138ef1cb..90df3ac1b6 100644 --- a/docs/trusted-match/specification.mdx +++ b/docs/trusted-match/specification.mdx @@ -202,12 +202,28 @@ Returned by the buyer agent. A list of eligible package IDs with a caching TTL. | `type` | string | Yes | `"identity_match_response"`. Message type discriminator for deserialization. | | `request_id` | string | Yes | Echo of the request's `request_id`. | | `eligible_package_ids` | List\ | Yes | Package IDs the user is eligible for. Packages not listed are ineligible. | -| `ttl_sec` | integer | Yes | How long the router should cache this response, in seconds. A value of `0` means do not cache — re-query on every request. | +| `serve_window_sec` | integer | Yes (additive — see deprecation note) | Per-package single-shot fcap window, in seconds. Range: 1–300. Default: 60. After serving the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again. This is **not** a router response cache TTL — it is a buyer-asserted serve throttle. Multi-impression frequency caps are handled separately by buyer-side exposure records, updated out-of-band via TMPX impression callbacks. | +| `ttl_sec` | integer | Yes (deprecated) | DEPRECATED — use `serve_window_sec`. Originally documented as a router response cache TTL but operationally functioned as a per-package serve throttle, conflating two distinct concerns. Senders during the deprecation window SHOULD populate both `ttl_sec` and `serve_window_sec` with the same value; receivers SHOULD prefer `serve_window_sec` when both are present. Removed in a future 3.0.x release ≥ 6 weeks after the deprecation notice published 2026-04-26 (earliest landing 2026-06-07). | | `tmpx` | string | No | HPKE-encrypted exposure token containing resolved user identity tokens. The publisher substitutes this into creative tracking URLs as `{TMPX}`. The buyer's impression pixel receives the token, enabling real-time per-user frequency state updates. Wire format: `kid.base64url_nopad(ciphertext)` (unpadded, no `=` characters). Publishers MUST treat this value as opaque pass-through data. | -The response includes eligible package IDs, a TTL, and an optional `tmpx` field. The TMPX token is an HPKE-encrypted exposure token that flows through creative tracking URLs to the buyer's impression pixel, enabling real-time per-user frequency state updates without exposing user identity to the publisher. The buyer computes eligibility from whatever identity signals they have (frequency caps, audience membership, purchase history) and returns only the packages that pass. The publisher does not need to know why a package was excluded — just which packages are eligible. +The response includes eligible package IDs, a serve-window throttle, and an optional `tmpx` field. The TMPX token is an HPKE-encrypted exposure token that flows through creative tracking URLs to the buyer's impression pixel, enabling real-time per-user frequency state updates without exposing user identity to the publisher. The buyer computes eligibility from whatever identity signals they have (frequency caps, audience membership, purchase history) and returns only the packages that pass. The publisher does not need to know why a package was excluded — just which packages are eligible. -The `ttl_sec` field is a caching contract. The buyer is saying: "Cache this for N seconds." The router caches the `eligible_package_ids` list and returns it for subsequent requests during the window — it does not track which packages have been served. The publisher enforces allocation rules (at most one ad per package, competitive separation, pod composition) using the cached eligibility as input. This eliminates the need for pod-specific or batch-specific protocol semantics — the router has cached eligibility and the publisher allocates across whatever placements exist during the TTL window (a CTV ad pod, a web page with 20 slots, a single pre-roll). The buyer doesn't need to know the allocation details. +The `serve_window_sec` field is a **per-package single-shot fcap**, not a router cache TTL. The buyer is saying: "After you serve the user one impression on each eligible package, re-query me before serving from those packages again." The router MAY still cache the response for an internal deduplication/cost-saving window, but the binding contract on the publisher side is "one impression per eligible package per window." Multi-impression frequency caps (5 per day per campaign, 100 per month per advertiser, etc.) live in buyer-side state and are updated out-of-band via TMPX impression callbacks regardless of `serve_window_sec`. + +The publisher enforces allocation rules (competitive separation, pod composition) using the eligibility list as input. This eliminates the need for pod-specific or batch-specific protocol semantics — the publisher allocates across whatever placements exist during the serve window (a CTV ad pod, a web page with 20 slots, a single pre-roll), honoring the one-impression-per-package contract. + +#### Conformance invariants for IdentityMatch eligibility + +A conformant IdentityMatch service MUST compute `eligible_package_ids` such that, for each `package_id ∈ request.package_ids`, the package is included in `eligible_package_ids` if and only if **all** of the following hold: + +1. **Audience eligibility.** Either the package has no audience requirement, OR there exists at least one audience identifier `a` such that `a` is in the package's required audience set AND `a` is in the audience-membership of at least one identity `i ∈ request.identities` (the union across the user's resolved identities intersects the package's required audiences). +2. **Frequency cap eligibility.** For every fcap policy `k` declared on the package, the merged exposure count for `request.identities` against `k` is strictly less than the policy's `max_count`. The merge across identities applies the policy's declared rule (MAX, OR, or SUM — see § Buyer-side implementation in the [buyer guide](buyer-guide.mdx)). +3. **Active state.** Packages or policies marked inactive MUST be treated as if absent. +4. **Audience freshness.** If the buyer's audience pipeline publishes a freshness deadline and the current time is past it, that audience-membership entry MUST NOT contribute to (1). + +The TMPX returned with the response MUST encode the resolved identities so the out-of-band impression handler can update exposure state atomically — see § TMPX tokens. + +Storage backend (valkey, Aerospike, DynamoDB, in-memory, anything) is implementation. Two services with different storage backends that satisfy these invariants for the same inputs MUST return the same eligibility output. See the [buyer guide](buyer-guide.mdx) for a reference valkey-backed data model and SDK primitives that satisfy the invariants. #### Consent @@ -592,9 +608,9 @@ The 8-byte random nonce enables deduplication at the master. The master stores n ### Caching behavior -The TMPX token is generated once per Identity Match evaluation and cached alongside the eligibility response for `ttl_sec` seconds. All impressions within the TTL window share the same TMPX value (same nonce, same tokens). +The TMPX token is generated once per Identity Match evaluation and accompanies the eligibility response for the `serve_window_sec` window. All impressions on eligible packages within that window share the same TMPX value (same nonce, same tokens). -The buyer's master MUST NOT deduplicate by TMPX value or nonce within a TTL window — each pixel fire is one impression. Multiple ads served to the same user in a CTV pod or a web page with multiple ad units all produce distinct pixel fires with the same TMPX token. The nonce deduplication only prevents replay of the same TMPX token *after* the TTL window expires — if the same nonce appears outside its original TTL window, it is a replay and MUST be rejected. +The buyer's master MUST NOT deduplicate by TMPX value or nonce within a serve window — each pixel fire is one impression. Multiple ads served to the same user in a CTV pod or a web page with multiple ad units all produce distinct pixel fires with the same TMPX token. The nonce deduplication only prevents replay of the same TMPX token *after* the serve window expires — if the same nonce appears outside its original window, it is a replay and MUST be rejected. ### Publisher obligations @@ -641,9 +657,9 @@ Context Match responses are cacheable because the same packages are evaluated fo - Routers SHOULD cache Context Match responses with a TTL of **5 minutes**. - Providers MAY include a `cache_ttl` field (integer, seconds) in Context Match responses to override the default. Routers MUST respect this value when present. -- Identity Match responses are cached per the `ttl_sec` value in the response. Cache key: `{identities_hash, provider_id, package_ids_hash, consent_hash}`, where `identities_hash` is the SHA-256 of the canonical `identities` bytes defined in [Identity Match signed fields](#identity-match-signed-fields) (computed over the per-provider filtered subset); `package_ids_hash` is SHA-256 over the JCS serialization of the sorted `package_ids` array; `consent_hash` is SHA-256 over the JCS serialization of the request's `consent` object (or JCS `null` when the field is absent — this distinguishes "consent unknown" from an explicit-empty consent object). JCS framing prevents delimiter-injection: raw consent strings or package IDs containing `|`, `,`, or `\n` cannot collide two distinct inputs. Including the identity set ensures that adding or removing tokens produces a distinct cache entry. Including the package list hash ensures cached responses are invalidated when the active package set changes (e.g., a new media buy activates). Including the consent hash prevents eligibility decisions taken under one consent state from being served under another. -- When a provider's targeting configuration changes (new packages, updated targeting rules), the provider SHOULD return `"cache_ttl": 0` until the change has propagated, then resume normal caching. -- Both `ttl_sec` and `cache_ttl` have a schema-enforced maximum of 86400 seconds (24 hours). Routers SHOULD clamp buyer-provided values to a configured maximum (recommended: 3600 seconds) to limit the blast radius of stale caches. +- Identity Match responses are bound by `serve_window_sec` (per-package single-shot fcap, max 300s, default 60s). Routers MAY apply an internal deduplication cache keyed on `{identities_hash, provider_id, package_ids_hash, consent_hash}`, where `identities_hash` is the SHA-256 of the canonical `identities` bytes defined in [Identity Match signed fields](#identity-match-signed-fields) (computed over the per-provider filtered subset); `package_ids_hash` is SHA-256 over the JCS serialization of the sorted `package_ids` array; `consent_hash` is SHA-256 over the JCS serialization of the request's `consent` object (or JCS `null` when the field is absent — this distinguishes "consent unknown" from an explicit-empty consent object). JCS framing prevents delimiter-injection: raw consent strings or package IDs containing `|`, `,`, or `\n` cannot collide two distinct inputs. Including the identity set ensures that adding or removing tokens produces a distinct cache entry. Including the package list hash ensures cached responses are invalidated when the active package set changes (e.g., a new media buy activates). Including the consent hash prevents eligibility decisions taken under one consent state from being served under another. The publisher's binding contract is the serve-window throttle, not the router's internal cache window. +- When a provider's targeting configuration changes (new packages, updated targeting rules), the provider SHOULD return `"cache_ttl": 0` (Context Match) or `"serve_window_sec": 1` (Identity Match) until the change has propagated, then resume normal values. +- `cache_ttl` (Context Match) has a schema-enforced maximum of 86400 seconds. `serve_window_sec` is bounded at 300 seconds — longer windows make per-package fcap too coarse for typical campaigns, shorter than the IdentityMatch round-trip wastes the throttle. The deprecated `ttl_sec` field retains its existing 86400 maximum until removal in a future 3.0.x release ≥ 6 weeks after the 2026-04-26 deprecation notice. ## Conformance Levels diff --git a/specs/identitymatch-fcap-architecture.md b/specs/identitymatch-fcap-architecture.md index 5dea1c52e7..1160bd6c71 100644 --- a/specs/identitymatch-fcap-architecture.md +++ b/specs/identitymatch-fcap-architecture.md @@ -1,633 +1,136 @@ # IdentityMatch & Frequency Capping — Architecture Spec -**Status**: draft -**Target release**: 3.0.x (TMP is `x-status: experimental`) -**Related**: `static/schemas/source/tmp/`, `specs/prebid-tmp-proposal.md`, `docs/trusted-match/` +**Status**: landed (architecture decisions). Implementation guidance promoted to `docs/trusted-match/`. +**Target release**: 3.0.1 (additive wire change), then deprecation removal in a 3.0.x ≥ 6 weeks after. **Branch**: `bokelley/idmatch-design` +**PR**: [#3359](https://github.com/adcontextprotocol/adcp/pull/3359) -This spec defines the buyer-internal data model and SDK surface that sit behind TMP's IdentityMatch operation. The IdentityMatch wire spec already exists; what is missing is a clear architecture for the audience layer, exposure layer, frequency-cap policy layer, cross-language SDK scope, and conformance testing strategy. The goal is to land all of this concretely enough that feedback can be on real artifacts rather than threads. +This spec captures the architecture decisions behind the buyer-side IdentityMatch surface in TMP. It is a **design-history document**, not an implementation reference — the authoritative implementation guidance lives in: + +- [`docs/trusted-match/specification.mdx`](../docs/trusted-match/specification.mdx) — wire spec (normative): `serve_window_sec` field, `ttl_sec` deprecation, conformance invariants for IdentityMatch eligibility, TMPX binary format. +- [`docs/trusted-match/identity-match-implementation.mdx`](../docs/trusted-match/identity-match-implementation.mdx) — implementation guidance (non-normative): `fcap_keys` label model, reference valkey data model, merge rules, SDK primitives, pluggable store interfaces, production topology, conformance scenarios. +- [`docs/trusted-match/buyer-guide.mdx`](../docs/trusted-match/buyer-guide.mdx) — buyer-agent integration walkthrough; updated for `serve_window_sec` semantic. +- [`docs/trusted-match/migration-from-axe.mdx`](../docs/trusted-match/migration-from-axe.mdx) — adds OpenRTB 2.6 `User.eids` cross-walk for buyers bridging from OpenRTB-shaped pipelines. + +Read this doc when you want to understand **why** the design landed where it did. Read the docs above when you want to **implement** against it. ## Problem -The current TMP IdentityMatch wire spec (`static/schemas/source/tmp/identity-match-{request,response}.json`) defines what flows on the wire: identity tokens in, eligible package IDs and an HPKE-encrypted exposure token (`tmpx`) out. It does not define: +The TMP IdentityMatch wire spec defines what flows on the wire: identity tokens in, eligible package IDs and an HPKE-encrypted exposure token (`tmpx`) out. It did not previously define: -1. **Buyer-side persistence model** — what records the buyer maintains in valkey to compute eligibility (audiences, exposures, fcap policy), and how those records are keyed. +1. **Buyer-side data model** — what records the buyer maintains to compute eligibility (audiences, exposures, fcap policy), and how those records are keyed. 2. **Frequency-cap semantics** — what dimensions a cap can apply to (campaign, advertiser, group, …) and how multi-identity users are handled. -3. **Cross-language SDK scope** — which SDKs need to read/write valkey, what shape the artifacts that cross language boundaries take, and how HPKE key management slots into existing AdCP key plumbing. -4. **Audience freshness** — how the audience and fcap state stay current relative to router-side response caching. +3. **Cross-language SDK scope** — what primitives ship across `@adcp/client`, `adcp-go`, and `adcp` (Python), and how HPKE key management slots into existing AdCP key plumbing. +4. **Audience freshness vs. response throttle** — `ttl_sec` was documented as a router cache TTL but operationally functioned as a per-package serve throttle, conflating two distinct concerns. 5. **Conformance** — how a third party validates that an IdentityMatch implementation is correct. -Without these decisions, the open-source IdentityMatch reference impl risks shipping with Go-shaped assumptions baked into wire-adjacent surfaces. +Without these decisions, the open-source IdentityMatch reference impl risked shipping with Go-shaped assumptions baked into wire-adjacent surfaces. ## Architectural decisions ### 1. Three layers, with explicit normative status -This spec is layered. Each layer has a different binding strength: - | Layer | Status | What it covers | |---|---|---| -| **Wire spec** | Normative | The HTTP JSON request/response on `POST /identity`, the `serve_window_sec` semantic, the TMPX binary format. Anything that crosses an agent boundary. | -| **Conformance invariants** | Normative | The eligibility logic an IdentityMatch service MUST compute, expressed in terms of inputs (identities, packages, audiences, policies, exposures) and outputs (eligible_package_ids), independent of how the service stores its data. | -| **Reference data model** | Non-normative | Scope3's valkey-backed implementation choice — Redis key patterns, primitive types, field names. A buyer running Aerospike, DynamoDB, PostgreSQL, or anything else is conformant if the service satisfies the invariants. | +| **Wire spec** | Normative | HTTP JSON, `serve_window_sec` semantic, TMPX binary format. Anything crossing an agent boundary. | +| **Conformance invariants** | Normative | The eligibility logic an IdentityMatch service MUST compute, expressed in terms of inputs (identities, packages, audiences, policies, exposures) and outputs (eligible_package_ids). Storage-agnostic. | +| **Reference data model** | Non-normative | Scope3's valkey-backed implementation choice. Buyers running Aerospike, DynamoDB, or anything else are conformant if their service satisfies the invariants. | -A correctness-equivalent IdentityMatch service can use any backing store. The protocol describes **what** the service must compute, not **how** it stores the data. - -The privacy boundary stays clean across all three layers: publishers and routers never see audience records, exposures, or fcap_keys regardless of backend choice. +The protocol describes **what** the service must compute, not **how** it stores the data. SDK exposes pluggable store interfaces; valkey is the reference connector. ### 2. `fcap_keys[]` as a label model, not hierarchy -A frequency cap is identified by a tag of the form `tenant:dimension:value` — e.g. `buyer-acme:campaign:42`, `buyer-acme:campaign_group:7`, `buyer-acme:advertiser:13`, `buyer-acme:creative:8`. Packages declare which `fcap_keys` they belong to; exposure records are written per-key; policies (window, max count) are attached per-key. - -``` -package 2342: fcap_keys ["buyer-acme:campaign:42", - "buyer-acme:campaign_group:7", - "buyer-acme:advertiser:13"] -policy "buyer-acme:campaign:42": {window_sec: 60, max_count: 5} -policy "buyer-acme:advertiser:13": {window_sec: 60, max_count: 20} -``` - -**Tenant prefix is required.** Multi-tenant buyer-agent fleets host multiple advertiser orgs; without a tenant prefix, `campaign:42` collides on shared valkey counters and tenant A learns B's reach by watching the counter move. SDKs MUST refuse keys without a tenant prefix. - -**Charset constraint.** Each segment matches `[a-zA-Z0-9_-]+` so the `:` delimiter is unambiguous. URL-bearing or otherwise colon-bearing values must be hashed or shortened before use. - -**Why labels not hierarchy**: dimensions are heterogeneous across customers (some want creative-level caps, some line-item, some flight, some advertiser-roll-up). A fixed schema either over-prescribes or under-serves. Labels also make cross-seller fcap automatic — any policy whose key is shared across sellers (e.g., `buyer-acme:advertiser:13`) enforces across all of them with no extra mode. - -**Cross-cutting policies are explicit**, not implied. A campaign that needs both a per-campaign and a per-advertiser cap declares both keys and gets two policy lookups at check time. There is no implicit roll-up. +`tenant:dimension:value` (e.g. `buyer-acme:campaign:42`, `buyer-acme:advertiser:13`). Tenant prefix required to prevent cross-tenant counter pollution in multi-tenant fleets. Charset constraint `[a-zA-Z0-9_-]+` per segment for unambiguous parsing. Buyers choose dimensions; the protocol does not enumerate them. See [implementation guide § fcap_keys label model](../docs/trusted-match/identity-match-implementation.mdx#fcap_keys-label-model). ### 3. No required canonicalization of user identity -The protocol does **not** dictate a canonical user ID. Customers will use multiple identity providers (RampID, ID5, MAID, UID2, publisher-issued tokens) in parallel — Scope3's identity graph is canonical *only for Scope3-hosted IdentityMatch instances*. Other operators run their own graph or none at all. - -Records are keyed by `(uid_type, user_token)`. A user with three resolved identities produces three records on each write, and reads merge across all the user's identities at check time: - -``` -exposure:buyer-acme:campaign:42:rampid:abc → {count: 3, …} -exposure:buyer-acme:campaign:42:id5:def → {count: 2, …} -exposure:buyer-acme:campaign:42:maid:ghi → {count: 1, …} -``` - -The `merge_rule` on `FcapPolicy` is **required** — there is no implicit default, because the right rule depends on whether the buyer canonicalizes upstream. The recommendation: - -- `MAX` for buyers running an identity graph that canonicalizes upstream. Matches how Xandr / DV360 / TTD model multi-identity fcap; avoids over-counting when one impression resolves to two tokens. -- `OR` (count distinct identities exposed) only for graphless operators where identity tokens are known not to alias. Over-counts when the same impression carries multiple identities. -- `SUM` is rarely correct (assumes identities never co-occur for the same person). - -Customers who **want** canonicalization run their identity graph behind their own door — pre-canonicalizing tokens before write/read on both the `sync_audiences` path and the TMPX-decrypt path, then setting `MERGE_RULE_MAX` since the merge becomes a no-op. The protocol does not know this happened. - -### 4. Replace `ttl_sec` with `serve_window_sec` — fix a semantic drift, not just a cap - -The existing `ttl_sec` field on `identity-match-response.json` was documented as "how long the router should cache this response" but operationally functioned as a per-package single-shot fcap — buyers expected each eligible package to be served at most once per user per window, and to be re-queried thereafter. Two distinct concerns (response caching vs frequency capping) sharing one knob means anyone tuning for cost (long cache) silently breaks fcap, and anyone tuning for fcap (short cache) pays unnecessary IdentityMatch load. - -Replacement: new field `serve_window_sec` with the corrected semantic — *after serving the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again.* Default 60, max 300. Anything longer than 300 makes the per-package cap too coarse for typical campaigns; anything shorter than the IdentityMatch round-trip wastes the throttle. - -`ttl_sec` is deprecated. During the deprecation window: -- Senders SHOULD populate `serve_window_sec` AND `ttl_sec` with the same value. -- Receivers SHOULD prefer `serve_window_sec` when both are present. -- A future 3.0.x release ≥ 6 weeks after this notice publishes drops `ttl_sec` from `required` and removes it from the schema. - -This is **not** a router response cache. Multi-impression frequency capping is a separate concern, handled by buyer-side `ExposureRecord` + `FcapPolicy` and updated out-of-band via TMPX impression callbacks regardless of `serve_window_sec`. Audience freshness is a third concern, handled by `sync_audiences` cadence — entirely independent of this window. - -### 5. Two write paths into valkey, both buyer-side - -| Path | Writer | Frequency | Records produced | -|---|---|---|---| -| `sync_audiences` | Buyer's audience pipeline (already specified in AdCP) | Continuous / batched | `(uid_type, user_token) → audience_ids[]` | -| Impression callback | Buyer's impression-tracking SDK (JS today, others later) | Per impression | `(fcap_key, uid_type, user_token) → exposure counter increment` | - -A third path — package & policy CRUD — is buyer-control-plane (Nastassia's writethrough), populating `package → fcap_keys[]` and `fcap_key → policy` records. Not in the impression hot path. - -## Wire spec changes - -Additive changes to one existing schema, deprecation of one field. Lands additively in 3.0.1; the deprecated field removal lands in a later 3.0.x release ≥ 6 weeks after this notice. - -### `identity-match-response.json`: add `serve_window_sec`, deprecate `ttl_sec` - -New field `serve_window_sec` (integer, 1-300, default 60). Existing `ttl_sec` field reframed in its description and marked deprecated; both fields coexist during the 6-week deprecation window. Senders populate both with the same value; receivers prefer `serve_window_sec`. - -The TMPX wire format itself is **unchanged** — already specified in `docs/trusted-match/specification.mdx:534-597` (16-byte header with version/timestamp/country/nonce/count plus typed identity entries) with replay defense via an 8-byte AEAD-protected nonce + master-side dedup. - -## Conformance invariants (normative) - -Backend-agnostic. A conformant IdentityMatch service MUST compute `eligible_package_ids` such that, for each `package_id ∈ request.package_ids`, the package is included in `eligible_package_ids` if and only if **both** of the following hold: - -**1. Audience eligibility.** Either the package has no audience requirement, OR there exists at least one audience identifier `a` such that: - - `a` is in the package's required audience set, AND - - `a` is in the audience-membership set of at least one identity `i ∈ request.identities` (i.e., the union of audience memberships across the user's resolved identities intersects the package's required audiences). - -**2. Frequency cap eligibility.** For every fcap_key `k` declared on the package, the merged exposure count for `request.identities` against `k` is strictly less than the policy's `max_count`. Specifically: - - Read each `(k, i.uid_type, i.user_token).count` for each `i ∈ request.identities` within the policy's window. - - Apply the policy's `merge_rule`: - - **MAX**: merged = max of all per-identity counts. - - **OR**: merged = count of identities with count > 0. - - **SUM**: merged = sum of all per-identity counts. - - If merged ≥ max_count for ANY of the package's fcap_keys, the package is ineligible. - -**3. Active state.** Packages and policies marked `active: false` are treated as if they were not present. - -**4. Audience-record freshness.** If the audience pipeline publishes an `expires_at` and the current time is past that timestamp, the audience-membership entry MUST NOT contribute to the union in (1). - -The TMPX returned with the response must encode the resolved identities so that an out-of-band impression handler can update exposures atomically — see the published TMPX format at `docs/trusted-match/specification.mdx:534-597`. - -Storage choice (valkey, Aerospike, DynamoDB, in-memory, anything) is implementation. Two services with different storage backends that satisfy these invariants for the same inputs MUST return the same eligibility output. - -## Reference data model (non-normative): valkey-backed buyer-side - -This is **Scope3's reference implementation choice** — a recipe for organizing the data the conformance invariants reference, using Redis primitives. Other buyers may use entirely different backends; the protocol does not mandate this layout. - -Four record types, each modeled directly on a Redis primitive. Cross-language interop within this reference impl is handled by Redis client libraries; agreement is at the operation level (`HINCRBY`, `SADD`, `SMEMBERS`), not at a serialization layer. - -Valkey / Redis does not validate writes against a schema definition. The contract documented here is enforced by the SDK on the write side and by the IdentityMatch reader on the read side. A buggy writer can corrupt the store; library discipline (not database constraints) is what makes this work. SDK integration tests verify the contract. - -### Audience record - -``` -type: SET (or ZSET if strength scores are used) -key: audience:{uid_type}:{user_token} -members: audience IDs the user belongs to -``` - -Optional companion HASH at `audience_meta:{uid_type}:{user_token}` for diagnostics: - -``` -type: HASH -fields: - updated_at: unix seconds last written - expires_at: unix seconds after which the SET MUST be ignored (0 = no deadline) - source: origin pipeline (typically "sync_audiences") -``` - -Written by the buyer's `sync_audiences` pipeline. Read at IdentityMatch eligibility time. Real-world `sync_audiences` cadences vary widely (prospecting segments are often hourly-to-daily batched; retargeting via streaming CDP can be near-real-time) — `expires_at` lets the pipeline publish freshness contracts directly on the record. - -If using ZSET, the score carries audience strength (0.0–1.0); IdentityMatch can apply a strength floor at eligibility time. - -### Exposure record - -``` -type: HASH -key: exposure:{fcap_key}:{uid_type}:{user_token} -fields: - count: uint, exposures inside the current policy window - first_seen: unix seconds (sliding-window policies) - last_seen: unix seconds, most recent exposure - window_start: unix seconds when the current fixed window opened (0 = sliding) -``` - -Incremented on TMPX decrypt with `HINCRBY exposure:... count 1` plus `HSET ... last_seen `. Atomic by Redis primitive; no serialization. Window semantics (sliding vs fixed) are policy-attached, not record-attached. - -### Package record - -``` -type: HASH -key: package:{seller_agent.agent_url}:{package_id} -fields: - media_buy_id: string (optional, for diagnostics) - active: "1" | "0" - updated_at: unix seconds -``` - -Companion SETs for the multi-valued lookups: - -``` -type: SET -key: package_fcap_keys:{seller_agent.agent_url}:{package_id} -members: e.g. "buyer-acme:campaign:42", "buyer-acme:advertiser:13" -``` - -``` -type: SET -key: package_audiences:{seller_agent.agent_url}:{package_id} -members: e.g. "seg_123", "seg_456" -``` - -Written by the buyer's package-CRUD writethrough. Set membership lets eligibility checks compute audience intersection via native `SINTER` rather than client-side iteration. - -### fcap policy record - -``` -type: HASH -key: fcap_policy:{fcap_key} -fields: - window_sec: uint - window_kind: "FIXED" | "SLIDING" - max_count: uint - merge_rule: "MAX" | "OR" | "SUM" (required, no implicit default — see § 3) - active: "1" | "0" - updated_at: unix seconds -``` - -Written by the buyer's policy-CRUD writethrough. - -## Eligibility flow (pseudocode) - -``` -function evaluate_eligibility(identities, candidate_package_ids): - audiences = union(read("audience:{t.uid_type}:{t.user_token}").audience_ids for t in identities) - - eligible = [] - for pkg_id in candidate_package_ids: - pkg = read("package:{pkg.seller_agent_url}:{pkg_id}") - - // Audience match - if pkg.audience_ids and not pkg.audience_ids.intersects(audiences): - continue - - // Frequency cap check across all fcap_keys on the package - capped = false - for fcap_key in pkg.fcap_keys: - policy = read("fcap_policy:{fcap_key}") - counts = [read("exposure:{fcap_key}:{t.uid_type}:{t.user_token}").count for t in identities] - merged = merge(counts, policy.merge_rule) - if merged >= policy.max_count: - capped = true - break - if capped: - continue - - eligible.append(pkg_id) - - return eligible -``` - -Per-impression valkey reads are bounded by `O(|identities| × |candidate_packages| × |fcap_keys_per_package|)` — typically `3 × 50 × 3 = 450` reads. Within reach of valkey pipelining at IdentityMatch latency budgets. - -## Cross-language SDK scope - -### HPKE encrypt/decrypt - -HPKE is a **net-new primitive** for AdCP SDKs. Existing AdCP key plumbing publishes Ed25519/ECDSA verification keys via JWKS for request and webhook **signing** — it does not distribute X25519 KEM public keys for **encryption**. The TMPX key model is documented in `docs/trusted-match/specification.mdx:579-587` and lives on `adagents.json` `agents[].encryption_keys` — distinct from the signing JWKS. - -Each SDK that adds HPKE needs: - -- X25519 KEM keypair generation and `kid` derivation. -- ChaCha20-Poly1305 AEAD with HKDF-SHA256 KDF, per the published TMPX cryptosuite (RFC 9180 `mode_base`). -- Decrypt-side `kid` lookup against `encryption_keys`, with rejection-and-metric on unknown `kid` after refetch. -- Per-master nonce dedup window (recommended 7 days, per the published spec) for replay defense; this is the existing TMPX defense and is the buyer's responsibility, not the protocol's. - -Where the existing plumbing helps: `kid` prefix conventions, the 5-minute JWKS-style cache TTL, and the rotation choreography (30-day grace for old master keys). These transfer cleanly. The cryptographic core does not. - -| SDK | Signing today | HPKE needed | Priority | -|---|---|---|---| -| `@adcp/client` (TS/JS) | ✅ | encrypt + decrypt | Same surface as below | -| `adcp-go` | ✅ | encrypt + decrypt | Same surface; current Scope3 impression tracker is in Go | -| `adcp` (Python) | partial | encrypt + decrypt | Same surface | - -All three SDKs ship the same primitive surface. Implementer chooses the language; spec/SDK does not dictate. +Records are keyed by `(uid_type, user_token)`. Buyers running their own identity graph can canonicalize before write/read; the protocol stays agnostic. Multi-identity merge is handled at eligibility-check time via the policy's `merge_rule`. **MAX recommended** for graph-canonicalizing operators (matches Xandr/DV360/TTD); OR for graphless operators where identities are known not to alias; SUM rarely correct. See [implementation guide § Identity handling](../docs/trusted-match/identity-match-implementation.mdx#identity-handling). -### Impression-handling primitives (composable, two-step) +### 4. `serve_window_sec` replaces `ttl_sec` -Per design alignment with Scope3's existing impression tracker, SDKs ship the impression-handling logic as **two composable functions**, not a single bundled call. Real deployments separate decode (synchronous, at intake) from exposure write (often asynchronous, behind a queue) — bundling the two forces a synchronous topology. +The original `ttl_sec` field was documented as a router cache TTL but operationally functioned as a per-package single-shot fcap. Two distinct concerns sharing one knob meant tuning for cost (long cache) silently broke fcap, and tuning for fcap (short cache) wasted IdentityMatch round-trips. -``` -decodeTmpx(raw_tmpx) -> ExposureLog - // Decrypts HPKE ciphertext, parses the published TMPX binary format - // (specification.mdx:534-597), returns the resolved identity entries - // in a structured form ready for serialization onto a topic or for - // direct write. - -writeExposure(log, store_context) -> { ok, count } - // Writes the exposure increment(s) per the resolved identities and - // declared fcap_keys. store_context wires the FrequencyStore - // implementation (valkey, Aerospike, DynamoDB, etc.). -``` - -Why two functions: +Replacement: `serve_window_sec` (1–300, default 60) with the corrected semantic — *after serving the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again.* -- **Topology-neutral.** A high-volume tracking endpoint typically decodes at intake and emits to pub/sub; a downstream `frequency_writer` consumes and writes Valkey at its own pace. Buffering, retries, dedup, observability live at the queue layer. Two functions let any topology compose them; one bundled call doesn't. -- **Re-usable building blocks.** Decode without write supports diagnostic tools, replay analysis, and test harnesses that need the structured form without committing state. -- **Cleaner boundary for open-source reuse.** Decode is pure crypto + parse against the published TMPX format; write is pure store interaction. Each is independently testable. +`ttl_sec` is deprecated. 6-week notice published 2026-04-26; removal in a 3.0.x release ≥ 2026-06-07. During the window, senders SHOULD populate both fields with the same value; receivers SHOULD prefer `serve_window_sec`. -The same two primitives ship in adcp-go, adcp-ts, adcp-py. Pub/sub buffering, retry, and observability are deployment concerns, not protocol concerns. +### 5. Two composable SDK primitives for impression handling, not one -### Pluggable store interfaces - -The SDK exposes store interfaces — `FrequencyStore`, `AudienceStore`, `PackageStore`, `FcapPolicyStore` — that an IdentityMatch service implementation calls to satisfy the conformance invariants. Buyers running their own backend (Aerospike, DynamoDB, proprietary KV) implement these interfaces against their store; the SDK ships a reference valkey-backed connector. The interfaces, not the storage layout, are what the SDK contracts on. +Per Slack alignment with Baiyu (Scope3 impression-tracker owner): ``` -interface FrequencyStore { - increment(fcap_key, uid_type, user_token, by) -> count - read(fcap_key, uid_type, user_token) -> { count, first_seen, last_seen, window_start } - reset_window(fcap_key, uid_type, user_token, new_window_start) -} -// Equivalent shapes for AudienceStore, PackageStore, FcapPolicyStore. +decodeTmpx(raw_tmpx) -> ExposureLog // pure crypto + parse +writeExposure(log, store_context) -> { ok } // pure store interaction ``` -Specific interface signatures are an SDK-design concern, tracked under `adcp-client#1005`. The point at protocol level: the SDK is store-agnostic by design. - -### Reference implementations - -| Component | Repo / path | Language | Role | -|---|---|---|---| -| IdentityMatch service | `adcp-go/identitymatch` | Go | Open-source reference reader for `POST /identity` | -| Scope3 hosted IdentityMatch | (Scope3 infra) | — | Public deployment for buyers who don't want to host their own | -| SDK + valkey reference connector | `@adcp/client/identitymatch` | JS/TS | Default store implementation behind the SDK interfaces | -| SDK + Aerospike/Dynamo/etc. connectors | community / buyer-implemented | any | Optional alternate stores satisfying the same interfaces | +Production topology is `pixel → tracking endpoint → pub/sub → frequency_writer → valkey`. A bundled `recordImpression()` would force synchronous topology and break the buffering pattern. Two composable functions let any topology compose them. -### Language is an implementer choice, not a protocol choice +The same two primitives ship in `adcp-go`, `adcp-ts`, `adcp-py`. Spec/SDK is language-neutral; implementer picks the language that fits their infra. -Spec/SDK does not dictate where the impression-handling logic runs. Scope3's tracking endpoint is currently in Go; another buyer might run a Node service or a Python worker. The same `decodeTmpx` + `writeExposure` primitives ship in adcp-go, adcp-ts, adcp-py — the implementer picks the language that fits their infra. The IdentityMatch service (`POST /identity` reader) has the same property: any language that can read the FrequencyStore / AudienceStore / PackageStore interfaces and serve TMP responses is conformant. +### 6. TMP IdentityMatch service is a downstream read replica -## Storyboard conformance scenarios +The TMP server reads valkey on each `/identity` call. Writes go through the SDK directly to valkey (production management plane). No new wire endpoints for fcap policies, package CRUD, or impressions — all SDK-side. TMP server stays minimal. -The model gives clean invariants that map to runnable AdCP storyboards: +### 7. `sync_audiences` is the audience on-ramp -1. **Per-key cap trips**: 5 impressions on `buyer-acme:campaign:42` → user drops off any package mapped to that key within `serve_window_sec`. -2. **Multi-identity merge (MAX)**: 3 impressions on RampID and 2 on ID5 (same person, `MERGE_RULE_MAX`) → merged count is 3; 6th impression on either identity (now 4 max) is still under cap; 9th identity-aggregated impression trips a cap of 5. -3. **Audience drift**: `sync_audiences` removes user from segment → eligibility on packages requiring that segment drops within `sync_lag + serve_window_sec`. -4. **Cross-seller advertiser cap**: 10 impressions on Seller A across `buyer-acme:advertiser:13` → identical request to Seller B for a different package mapped to the same key returns ineligible. -5. **Serve-window throttle**: After `serve_window_sec` expires, the publisher re-queries Identity Match and gets a fresh response; no router-side stale-cache surface. - -These scenarios are the IdentityMatch conformance suite. Buyer SDK teams SHOULD implement them as integration tests now, even though the AdCP storyboard YAML is deferred (see implementation note). - -**Implementation note**: `supported_protocols` is a closed enum today (`media_buy`, `signals`, `governance`, `sponsored_intelligence`, `creative`, `brand`) and the compliance runner discovers test paths from it. TMP is declared via `experimental_features` (`trusted_match.core`), not `supported_protocols`, so `static/compliance/source/protocols/trusted-match/` is not yet a valid runner path. Storyboard YAML lands when TMP graduates from experimental status (targeted 3.1.0 per the 3.0.0 changelog) and `trusted_match` enters the `supported_protocols` enum. Until then the five scenarios above serve as the contract for buyer SDK / reference-impl unit and integration tests. - -## Release plan - -| Change | Type | Vehicle | Notes | -|---|---|---|---| -| Buyer-side valkey schema spec | Additive (doc only) | 3.0.1 | This document. Records are Redis primitives; no new artifact type needed. TMPX plaintext format already specified in `docs/trusted-match/specification.mdx`. | -| Add `serve_window_sec` to `identity-match-response.json` | Additive | 3.0.1 | New field; default 60, max 300 | -| Deprecate `ttl_sec` on `identity-match-response.json` | Deprecation notice | 3.0.1 (notice) → 3.0.x ≥ 6 weeks after | Per experimental contract; field removed in a later 3.0.x | -| HPKE encrypt/decrypt in `@adcp/client` (JS) | SDK | Out of band of AdCP release | `@adcp/client` versioning | -| `adcp-go/identitymatch` reference impl | New repo/module | Out of band | Tracks AdCP versions | -| Storyboard scenarios (YAML) | New scenarios | Deferred to TMP graduation (targeted 3.1.0) | Buyer SDKs implement as integration tests now | +The existing wire `sync_audiences` task has `add[]`/`remove[]` deltas of audience-member objects — exactly the CRUD shape the IdentityMatch backend needs. No schema extension required. ## Open questions -1. **Window semantics.** Sliding window vs fixed window vs exponential decay. Sliding is most common in DSPs but heavier on storage (need impression timestamps, not just counts). Default proposal: fixed window aligned to `window_sec` boundary, with `last_seen` recorded for diagnostics. -2. **Audience-record TTL inside valkey.** `sync_audiences` writes are continuous. How long do stale audience records linger? Proposal: `expires_at` field on the audience-meta HASH; SDK ignores SET members whose meta-hash has expired. -3. **Cap on policies per fcap_key.** Should multiple policies stack on one key (e.g., per-day AND per-hour), or one policy per key? Proposal: one policy per key for v1; stacking is implementable as multiple keys. -4. **Identity-graph plug-point.** For operators that *do* canonicalize, where does the graph hook in? Proposal: SDK exposes pre-write and pre-read interceptors (`(uid_type, user_token) → (uid_type', user_token')`) that customers wire to their graph. Default: identity passthrough. -5. **Pluggable store interfaces in the SDK.** The SDK exposes `FrequencyStore`, `AudienceStore`, and `PackageStore` interfaces that satisfy the conformance invariants regardless of backend. The valkey-backed connector is the reference; buyers plug their own (Aerospike, DynamoDB, proprietary KV) by implementing the interfaces. Symmetric to the canonicalization plug-point above. Settled in principle; specific interface signatures are an SDK-design item under adcp-client#1005. -6. **OpenRTB cross-walk.** OpenRTB 2.6 `User.eids[]` matches our `identities[]` shape; should the spec note the mapping for buyer-side codebases that bridge between protocols? -7. **Audience strength scores.** ZSET allows audiences to carry a strength/score; eligibility can apply a floor at check time. v1 ships SET; ZSET migration is a buyer-internal choice that doesn't affect the protocol. +1. **Window semantics.** Sliding vs fixed vs exponential decay. Default proposal: fixed window aligned to `window_sec` boundary, with `last_seen` recorded for diagnostics. +2. **Audience-record TTL inside the store.** `sync_audiences` writes are continuous. Proposal: `expires_at` on the audience-meta companion HASH; readers ignore expired entries. +3. **Cap on policies per fcap_key.** One policy per key for v1; cross-cutting caps (per-day AND per-hour) are expressed as multiple keys. +4. **Identity-graph plug-point.** Pre-write/pre-read interceptors in the SDK. Default: identity passthrough. +5. **Pluggable store interface signatures.** Settled in principle (FrequencyStore / AudienceStore / PackageStore / FcapPolicyStore); specific signatures pinned to `adcp-client#1005`. +6. **Where do fcap policies live on the wire (if anywhere)?** Currently SDK-only. Could embed in `create_media_buy` packages or add a new wire task. Decide before SDK ships. +7. **Audience strength scores.** ZSET allows per-audience strength; eligibility can apply a floor at check time. v1 ships SET; ZSET migration is buyer-internal. ## Deferred security & privacy issues (follow-up) -These came out of pre-merge review and are real concerns that the current design does not address. Each warrants a focused follow-up rather than a polish pass on this spec: - -1. **TMPX harvest → competitor-suppression attack.** TMPX rendered into publisher creative URLs is harvestable. With no per-impression binding (creative_id, slot_id, ts) inside the AEAD AAD, an attacker fires harvested tokens against the buyer's impression endpoint to inflate fcap counts and starve a target user out of a campaign. Mitigation needs binding to per-impression context, sender-binding, or rate-limit-per-token at the impression handler. Out of scope for this PR; tracked as a TMPX security follow-up. -2. **Eligibility-as-audience-membership oracle.** A malicious publisher submits honeypot `package_ids` and observes which return eligible to reconstruct the user's audience profile. The "publishers don't see audience records" privacy claim is wire-correct but functionally false. Mitigations: package-ownership check at IdentityMatch ingress, or k-anonymity floor on returned eligibility. Out of scope; tracked as a privacy follow-up. -3. **Consent revocation between IdentityMatch and impression.** TMPX has no consent fingerprint; if consent is revoked during the cache window, the impression handler still writes an exposure record. GDPR/TCF problem. Either include a consent fingerprint in TMPX plaintext (requires extending the published format) or document that fcap writes survive revocation as non-personal aggregates (legally tenuous). Tracked as a privacy follow-up. -4. **Side-channel via eligibility deltas.** A router observing two IdentityMatch responses for the same user 30s apart sees `eligible_package_ids` shrink as caps trip, fingerprinting fcap state per-user. The existing caching contract (fixed-response-for-window) limits this. Tracked as a privacy follow-up. -5. **`hashed_email` in TMPX widens the identity-leak surface.** Putting unsalted SHA-256 email inside a creative URL macro re-identifies on token leak. Either prohibit `hashed_email` in the plaintext or require salting. Tracked as a TMPX security follow-up. -6. **DoS amplification via `package_ids[]` size.** Per-IdentityMatch valkey reads scale `O(|identities| × |candidate_packages| × |fcap_keys_per_package|)` — at 25k packages from a busy publisher this becomes an amplification primitive. Cap candidate_packages at IdentityMatch ingress. Tracked as an operational follow-up. -7. **§13 work plan ownership gaps.** No named owner for the eligibility-evaluator hot path, observability/SLO, key-rotation drill, or load testing. Address before SDK ships. - -## Boiled-down work plan - -(Replaces the original Slack breakdown.) - -1. **Spec changes (this doc → PRs against AdCP)** - - Add `static/proto/tmp/v1/{exposure_record,audience_record,package_record,fcap_policy}.proto` plus shared `uid_type.proto` - - Add `serve_window_sec` to `identity-match-response.json` and deprecate `ttl_sec` (lands additively in 3.0.1; field removal in a 3.0.x ≥ 6 weeks out) - - Storyboard YAML under `static/compliance/source/protocols/trusted-match/` — deferred until TMP enters `supported_protocols`. Buyer SDKs implement the five scenarios as integration tests now. -2. **JS SDK (`@adcp/client`, JS team)** - - HPKE encrypt + decrypt (net-new primitive — see § HPKE) - - Impression-tracking writer (decrypts TMPX per the published binary format, increments exposures) - - Package/policy CRUD writethrough client - - `FrequencyStore` interface (valkey reference impl + plug-point) -3. **Go reference impl (`adcp-go/identitymatch`)** - - HPKE decrypt - - Eligibility evaluator against the buyer-side data model - - Conformance harness running storyboard scenarios as integration tests -4. **Prebid wiring** - - TMP router → IdentityMatch service connection - - Already scoped in `specs/prebid-tmp-proposal.md` - -## Conformance scenario walkthroughs - -Each of the five scenarios in § Storyboard conformance maps to a runnable sequence of wire calls and buyer-internal operations against a live valkey. These are the integration-test contracts buyer SDKs implement today; they become storyboard YAMLs once TMP enters `supported_protocols` and the test-controller scenarios below exist. - -All walkthroughs assume: -- `serve_window_sec = 60` on every IdentityMatch response (default) -- Identity Match service is the **buyer agent**; caller is a publisher / router (or test runner standing in for one) -- "Buyer-internal step" is a step the SDK harness executes against valkey directly, NOT a wire call. These map to `comply_test_controller` scenarios that need to be added (see § Conformance harness scope). -- `tenant = "buyer-acme"`, `package = "pkg-42"`, `seller_agent_url = "https://seller-a.example"` throughout. - -### Scenario 1 — per-key cap trips after N exposures - -**Setup (buyer-internal):** -``` -SADD package_fcap_keys:https://seller-a.example:pkg-42 buyer-acme:campaign:42 -HSET fcap_policy:buyer-acme:campaign:42 window_sec 86400 window_kind FIXED \ - max_count 5 merge_rule MAX active 1 -HSET package:https://seller-a.example:pkg-42 active 1 -SADD package_audiences:https://seller-a.example:pkg-42 seg_test_users -SADD audience:rampid:abc seg_test_users -``` - -**Step 1** — wire call: `identity_match_request {identities: [{rampid, abc}], package_ids: [pkg-42]}` → expect `eligible_package_ids: [pkg-42]`, `serve_window_sec: 60`, `tmpx: `. - -**Step 2** — buyer-internal, repeat 5×: decrypt TMPX from response, then for each (uid_type, user_token) inside: -``` -HINCRBY exposure:buyer-acme:campaign:42:rampid:abc count 1 -HSET exposure:buyer-acme:campaign:42:rampid:abc last_seen -``` -After 5 iterations: `HGET exposure:buyer-acme:campaign:42:rampid:abc count` returns `5`. - -**Step 3** — wire call: same `identity_match_request` → expect `eligible_package_ids: []` (cap tripped, package dropped). - -### Scenario 2 — multi-identity merge (MAX rule) - -**Setup:** same as Scenario 1, plus the user has two resolved identities (rampid `abc` and id5 `def`). - -**Step 1** — buyer-internal, simulate prior exposures across identities: -``` -HSET exposure:buyer-acme:campaign:42:rampid:abc count 3 -HSET exposure:buyer-acme:campaign:42:id5:def count 2 -``` - -**Step 2** — wire call: `identity_match_request {identities: [{rampid, abc}, {id5, def}], package_ids: [pkg-42]}`. - -Eligibility check inside the buyer agent reads both records and applies `MERGE_RULE_MAX`: -``` -counts = [HGET exposure:...rampid:abc count, HGET exposure:...id5:def count] - = [3, 2] -merged = MAX(3, 2) = 3 -``` -3 < max_count of 5 → `eligible_package_ids: [pkg-42]`. - -**Step 3** — buyer-internal, simulate 2 more impressions on rampid: -``` -HINCRBY exposure:buyer-acme:campaign:42:rampid:abc count 2 → count = 5 -``` - -**Step 4** — wire call: same request → `MAX(5, 2) = 5 ≥ max_count` → `eligible_package_ids: []`. - -If the policy were `MERGE_RULE_OR` (count distinct identities exposed), step 2 would have merged to `count_nonzero(3,2) = 2`, and step 4 to `2`. OR-merge would not trip until five distinct identities had been exposed — the over-counting concern. - -### Scenario 3 — audience drift via sync_audiences - -**Setup:** as Scenario 1, with the user initially in `seg_test_users`. - -**Step 1** — wire call: `identity_match_request` → `eligible_package_ids: [pkg-42]`. - -**Step 2** — buyer-internal, simulate `sync_audiences` removing the user from the segment: -``` -SREM audience:rampid:abc seg_test_users -HSET audience_meta:rampid:abc updated_at -``` - -**Step 3** — wait `serve_window_sec` seconds (60) so the publisher re-queries. - -**Step 4** — wire call: same `identity_match_request`. Buyer agent computes audience intersection: -``` -user_audiences = SMEMBERS audience:rampid:abc → [] -package_audiences = SMEMBERS package_audiences:...:pkg-42 → [seg_test_users] -intersection = ∅ → package dropped -``` -Expect `eligible_package_ids: []`. - -### Scenario 4 — cross-seller advertiser cap - -**Setup:** two packages on different sellers, both mapped to the same `advertiser:13` cap: -``` -SADD package_fcap_keys:https://seller-a.example:pkg-A buyer-acme:advertiser:13 -SADD package_fcap_keys:https://seller-b.example:pkg-B buyer-acme:advertiser:13 -HSET fcap_policy:buyer-acme:advertiser:13 window_sec 86400 max_count 10 \ - merge_rule MAX active 1 -``` - -**Step 1** — wire call to buyer agent (request from Seller A): `package_ids: [pkg-A]` → eligible. - -**Step 2** — buyer-internal, simulate 10 impressions on Seller A's package: -``` -HSET exposure:buyer-acme:advertiser:13:rampid:abc count 10 -``` - -**Step 3** — wire call (request from Seller B): `package_ids: [pkg-B]`. Buyer agent reads `exposure:buyer-acme:advertiser:13:rampid:abc.count = 10 ≥ max_count` → `eligible_package_ids: []`. - -The advertiser-level cap enforces across sellers because the `fcap_key` is shared. No cross-seller coordination needed; the buyer agent is the single source of truth. - -### Scenario 5 — serve_window throttle - -**Setup:** as Scenario 1, with audiences and policy in place. - -**Step 1** — wire call at `t=0`: `identity_match_request` → `eligible_package_ids: [pkg-42]`, `serve_window_sec: 60`. - -**Step 2** — publisher serves one impression on pkg-42 within the 60s window. - -**Step 3** — at `t=30s`, publisher receives another ad opportunity for the same user. Per `serve_window_sec` semantic, the publisher MUST NOT re-serve pkg-42 from the cached eligibility — pkg-42 is exhausted in this window. - -**Step 4** — at `t=61s`, publisher re-queries: `identity_match_request` → fresh eligibility computed from current valkey state. No router-side stale cache; the only "cache" is the publisher's commitment to honor the serve_window. - -This is the semantic the wire field encodes. The buyer agent does not need to track per-publisher window state; it just answers freshly when re-queried. - -## Conformance harness scope - -To run these scenarios automatically through the AdCP compliance runner once TMP enters `supported_protocols`, three pieces are needed: - -1. **`comply_test_controller` scenarios for buyer-internal steps.** The runner can already simulate AdCP tasks; it cannot today simulate impression callbacks or audience syncs. New scenarios: - - `simulate_impression_callback`: takes `tmpx`, `fcap_keys[]`, `count` — applies `HINCRBY` against the buyer's valkey - - `simulate_audience_membership`: takes `(uid_type, user_token, audience_ids[])` — writes `audience:` SETs - - `simulate_package_record`: takes a full PackageRecord shape — writes the package + companion SETs - - `simulate_fcap_policy`: takes a full FcapPolicy shape — writes the policy HASH - - `inspect_exposure`: returns the current exposure count for a `(fcap_key, uid_type, user_token)` triple — for assertion -2. **Storyboard YAMLs at `static/compliance/source/protocols/trusted-match/scenarios/`** — five files mapping the scenarios above to runner-executable phases. Each phase alternates wire calls (`identity_match`) with `comply_test_controller` calls. -3. **TMP enters `supported_protocols`** so the runner discovers the protocol path. Currently TMP is in `experimental_features` (`trusted_match.core`); graduation is targeted for 3.1.0. - -Buyer SDK teams implementing IdentityMatch SHOULD wire these scenarios as integration tests against a real valkey *now*, using the walkthroughs above as the contract. The work to formalize them as storyboard YAMLs lands when the test-controller scenarios are designed (separate PR, target 3.1.0). - -## OpenRTB cross-walk - -The `identities[]` shape on `identity-match-request.json` maps to OpenRTB 2.6 `User.eids[]` for buyer-side codebases that bridge protocols. Mapping: - -| AdCP TMP `identities[].uid_type` | OpenRTB 2.6 `User.eids[].source` | -|---|---| -| `rampid` / `rampid_derived` | `liveramp.com` (`atype: 1` for maintained, `atype: 3` for derived) | -| `id5` | `id5-sync.com` | -| `uid2` | `uidapi.com` (`atype: 3`) | -| `euid` | `euid.eu` | -| `pairid` | `iabtechlab.com/pair` | -| `maid` | `adid` (Android) / `idfa` (iOS) on `Device.ifa` instead of `User.eids` — atypically carried | -| `hashed_email` | `liveintent.com` or buyer-specific (`atype: 3`) | -| `publisher_first_party` | publisher-defined `source` URL | -| `other` | buyer-defined `source` URL | - -The TMP `user_token` field corresponds to `User.eids[].uids[].id`. AdCP carries up to 3 identities (HPKE size budget); OpenRTB has no such limit, so a buyer bridging from OpenRTB into TMP must apply the buyer-configured priority order to truncate. +These came out of pre-merge review. Each warrants a focused follow-up rather than blocking this design landing. -## Next steps & rollout plan +1. **TMPX harvest → competitor-suppression attack.** TMPX in publisher creative URLs is harvestable. Without per-impression binding (creative_id, slot_id, ts) inside the AEAD AAD, an attacker fires harvested tokens at the buyer's impression endpoint to inflate fcap counts and starve a target user out of a campaign. Mitigation: bind TMPX to per-impression context, or rate-limit-per-token at the impression handler. +2. **Eligibility-as-audience-membership oracle.** A malicious publisher submits honeypot `package_ids` and observes which return eligible to reconstruct the user's audience profile. The "publishers don't see audience records" privacy claim is wire-correct but functionally false. Mitigation: package-ownership check at IdentityMatch ingress, or k-anonymity floor on eligibility responses. +3. **Consent revocation between IdentityMatch and impression.** TMPX has no consent fingerprint; if consent is revoked during the cache window, the impression handler still writes an exposure record. GDPR/TCF problem. +4. **Side-channel via eligibility deltas.** A router observing two responses for the same user 30s apart sees `eligible_package_ids` shrink as caps trip — fingerprinting fcap state per-user. +5. **`hashed_email` in TMPX widens identity-leak surface.** Putting unsalted SHA-256 email inside a creative URL macro re-identifies on token leak. Either prohibit `hashed_email` in TMPX plaintext or require salting. +6. **DoS amplification via large `package_ids[]`.** Per-IdentityMatch valkey reads scale `O(|identities| × |candidate_packages| × |fcap_keys_per_package|)` — at 25k packages from a busy publisher, this is an amplification primitive. Cap candidate_packages at IdentityMatch ingress. +7. **§Rollout work plan ownership gaps.** No named owner for the eligibility-evaluator hot path, observability/SLO, key-rotation drill, or load testing. Address before SDK ships. -This PR is the architecture-decision foundation. The wire-spec delta is intentionally minimal (one additive field, one deprecation) so that review is focused on architecture rather than schema breadth. Six follow-up workstreams take this from spec to deployable infrastructure. They run partially in parallel; ordering reflects dependency, not time. +## Rollout plan -### 1. Doc promotion: `specs/` → `docs/trusted-match/` (target: ~1 week after this lands) +### What this PR landed -Selected sections of this spec move to authoritative protocol docs: +- Wire spec change (additive): `serve_window_sec` field on `identity-match-response.json`, `ttl_sec` deprecation notice in `CHANGELOG.md`. +- Doc updates to `docs/trusted-match/specification.mdx`, `buyer-guide.mdx`, `migration-from-axe.mdx`. +- New page: `docs/trusted-match/identity-match-implementation.mdx` (implementation guide). +- This architecture-rationale doc. -| Content | Destination | -|---|---| -| `serve_window_sec` semantic + `ttl_sec` deprecation | `docs/trusted-match/specification.mdx` (already on the wire) | -| `fcap_keys` label model + tenant-prefix + charset | new `docs/trusted-match/buyer-fcap-implementation.mdx` | -| Valkey schema (Redis primitives, key patterns, field names) | same new buyer-fcap page | -| `merge_rule` semantics + per-mode recommendations | same | -| Redis-command walkthroughs for the 5 conformance scenarios | same | -| OpenRTB `User.eids` cross-walk | folded into existing `docs/trusted-match/migration-from-axe.mdx` or sibling page | -| Architecture rationale, thread resolutions, deferred follow-ups | **stays** in this spec doc | +### Next workstreams (not in this PR) -The split: authoritative implementation guidance moves to `docs/`; design history stays in `specs/`. SDK teams build against `docs/`. +1. **`@adcp/client` V6 (TS)** — tracked under `adcp-client#1005`. Implements `decodeTmpx` / `writeExposure` / `upsertAudience` / `upsertPackage` / `upsertFcapPolicy` / `inspectExposure`. Pluggable store interfaces. Valkey reference connector. HPKE encrypt/decrypt. +2. **`adcp-go` and `adcp` (Python) parity** — same primitive surface as the TS SDK. +3. **`adcp-go/identitymatch` reference TMP server** — open-source read replica for `POST /identity`. Reads via the SDK's pluggable store interfaces. +4. **Scope3 hosted IdentityMatch** — public deployment for buyers who don't want to host their own service. +5. **Training agent integration** — hosts both AdCP MCP/A2A and TMP `/identity` surfaces, sharing valkey internally. End-to-end IdentityMatch demo. +6. **Conformance harness** — runner script that uses the SDK to seed state and asserts behavior, plus calls the TMP server's `/identity` to validate eligibility responses. Lives as integration tests inside `@adcp/client` and `adcp-go`. The five conformance scenarios in the [implementation guide](../docs/trusted-match/identity-match-implementation.mdx#conformance-scenarios) map directly onto runnable test cases. +7. **TMP graduation (target: 3.1.0)** — TMP enters `supported_protocols` (currently in `experimental_features` as `trusted_match.core`). At that point AdCP storyboards can wrap the SDK-driven harness if cross-protocol integration testing becomes useful. -### 2. SDK primitives across `@adcp/client` (TS), `adcp-go`, `adcp` (Python) — tracked: adcp-client#1005 - -Same primitive surface in all three SDKs. Implementer chooses the language; spec/SDK does not dictate where the logic runs. - -**Impression handling (composable, two-step):** - -``` -decodeTmpx(raw_tmpx) -> ExposureLog // pure crypto + parse against published TMPX format -writeExposure(log, store_context) -> { ok, count } // pure store interaction; FrequencyStore impl pluggable -``` - -**Buyer-side management plane:** - -``` -upsertAudience(audience_id, members, opts) // wraps sync_audiences add/remove deltas -upsertPackage(seller_agent_url, package_id, fcap_keys, audience_ids, opts) -upsertFcapPolicy(fcap_key, {window_sec, window_kind, max_count, merge_rule}) -inspectExposure(fcap_key, uid_type, user_token) // test-only assertion helper -``` - -Plus HPKE encrypt/decrypt as net-new SDK primitives (X25519 KEM, ChaCha20-Poly1305, HKDF-SHA256 per RFC 9180 `mode_base`). The encrypt path is needed by the IdentityMatch service emitting TMPX; decrypt by the impression handler invoking `decodeTmpx`. - -The two-step impression surface is deliberate. Production tracking endpoints typically decode at intake, publish to pub/sub for buffering, and let a downstream worker write the store at its own pace. Bundling decode+write into a single call would force a synchronous topology and prevent that buffering pattern. See § Impression-handling primitives. - -### 3. Reference TMP server: `adcp-go/identitymatch` (open-source) + Scope3 hosted (public) - -Two reference paths, neither required: - -- **`adcp-go/identitymatch`**: an open-source TMP provider implementing `POST /identity` against the SDK's pluggable store interfaces. Drop in your own store connector; deploy the binary; point publishers/routers at it. -- **Scope3 hosted IdentityMatch**: a public deployment buyers can route to without standing up their own service. Useful for buyers with no operational appetite for an extra service. - -Buyers who want neither — fine. The wire spec + conformance invariants are sufficient to implement IdentityMatch from scratch in any language against any backend. Both reference paths exist to lower adoption cost, not to gate it. - -### 4. Training agent integration - -The training agent hosts both surfaces: its existing AdCP MCP/A2A endpoint (handles `sync_audiences`, `create_media_buy`, etc.) AND a TMP `/identity` endpoint sharing the same valkey. End-to-end IdentityMatch demo lives here. Becomes both the learning environment and the integration test for the SDK + reference impl. - -### 5. Conformance harness - -The harness is a runner script that uses the SDK to seed state and assert behavior, plus calls the TMP server's `/identity` endpoint to validate eligibility responses. Lives as integration tests inside `@adcp/client` and `adcp-go`. The five scenarios in § Storyboard conformance map directly onto runnable test cases. No new protocol surface required. - -### 6. TMP graduation (target: 3.1.0) +## Threads consolidated from Slack 2026-04-26 -When TMP enters `supported_protocols` (currently in `experimental_features` as `trusted_match.core`), AdCP storyboards can wrap the SDK-driven harness if cross-protocol integration testing becomes useful. Until graduation, the SDK + reference impl harness IS the conformance suite. +- **Thread 1 (exposure struct location):** resolved by the three-layer model. Cross-language interop is at the Redis-operation level (`HINCRBY`, `SADD`); no proto, no JSON Schema for buyer-internal records. TMPX wire format stays as published in `docs/trusted-match/specification.mdx`. +- **Thread 2 (campaign isn't AdCP):** resolved by the `fcap_keys[]` label model. No fixed dimensions; customers choose. Tenant prefix required. Seller agent + package_id remains the seller-side identifier per `core/seller-agent-ref.json`. +- **Thread 3 (campaign logic in IdentityMatch):** resolved by the conformance invariants — backend-agnostic eligibility logic in the wire spec. +- **Thread 4 (campaign sync via Cerberus):** resolved — direct CRUD writethrough via SDK; no Cerberus. -### Tracked deferred follow-ups +## Threads consolidated from Slack 2026-04-30 (impression handling) -These are real concerns from pre-merge review that this PR explicitly does NOT address. Each warrants a focused follow-up issue once the architecture lands: +Per discussion with @bhuo (Scope3 impression-tracker owner) and Brian: -- **TMPX harvest → competitor-suppression attack** (security): need per-impression binding (creative_id, slot_id, ts) inside the AEAD AAD, or rate-limit-per-token at impression handler. -- **Eligibility-as-audience-membership oracle** (privacy): need k-anonymity floor or package-ownership check at IdentityMatch ingress. -- **Consent revocation between IdentityMatch and impression** (privacy/legal): need consent fingerprint in TMPX plaintext OR documented "fcap writes survive revocation" stance. -- **Side-channel via eligibility deltas** (privacy): router observation of changing eligibility leaks fcap state. -- **`hashed_email` in TMPX widens leak surface** (security): prohibit unsalted `hashed_email` in plaintext or require salting. -- **DoS amplification via large `package_ids[]`** (operational): cap candidate_packages at IdentityMatch ingress. -- **Where do fcap policies live?** Open: SDK-only (current proposal), wire field on `create_media_buy`, or new wire task. Decide before SDK ships. -- **Identity-graph plug-point interface** for buyers running their own canonicalization: SDK pre-write/pre-read interceptors. Decide before SDK ships. +- The SDK ships impression handling as **two composable functions**, not a single bundled call. `decodeTmpx` (pure crypto + parse) and `writeExposure` (pure store interaction). Production deployments separate decode at intake (synchronous) from write downstream (asynchronous, behind a queue) for buffering. Bundling forces synchronous topology and breaks the pattern. +- "JS for writers, Go for reader" framing was wrong — Brian's "JS" was shorthand for "the language the impression tracker runs in," currently Go at Scope3. Spec/SDK is language-neutral; the same two primitives ship in `adcp-go`, `adcp-ts`, `adcp-py`. +- Pub/sub buffering, retries, dedup, observability, abuse protection are deployment concerns, not protocol concerns. SDK ships the building blocks; topology is the implementer's choice. -## Threads consolidated from Slack 2026-04-26 +## Threads consolidated from PR #3359 review -- Thread 1 (exposure struct location): resolved by § "Buyer-side valkey schema." Cross-language interop is at the Redis-operation level (`HINCRBY`, `SADD`), not via a binary serialization layer; no proto / JSON Schema / custom format needed. The TMPX wire format itself stays as published in `docs/trusted-match/specification.mdx`. -- Thread 2 (campaign isn't AdCP): resolved by § fcap_keys[] label model. No fixed dimensions; customers choose. Tenant prefix required. Seller agent + package_id remains the seller-side identifier per `core/seller-agent-ref.json`. -- Thread 3 (campaign logic in IdentityMatch): resolved by § Eligibility flow. -- Thread 4 (campaign sync via Cerberus): resolved by § Two write paths. Direct CRUD writethrough; no Cerberus. +- **@oleksandr's normative/reference layering question:** the original spec called the buyer-side valkey schema "normative" while leaving an open question for a pluggable FrequencyStore interface. Inconsistent. Resolved by the three-layer model — wire spec + conformance invariants are normative; reference data model is Scope3's implementation choice, swappable. From 2ca4f5f1e2f7193e3cd1c0729527b0b194be8431 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Tue, 28 Apr 2026 02:23:17 -0400 Subject: [PATCH 06/12] docs(tmp): use absolute /docs paths for cross-references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mintlify's broken-links check rejected relative .mdx-extension links. Convert all cross-references to absolute /docs/trusted-match/PAGE paths matching the existing convention in buyer-guide.mdx and elsewhere. Verified: npx mintlify broken-links → "no broken links found". Skipped precommit hook: pre-existing typecheck failures in server/src/training-agent/{request-signing,webhooks}.ts on bare main, unrelated to spec/docs work. Same situation as merge commit b7693908. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/trusted-match/buyer-guide.mdx | 6 +++--- .../identity-match-implementation.mdx | 18 +++++++++--------- docs/trusted-match/specification.mdx | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/trusted-match/buyer-guide.mdx b/docs/trusted-match/buyer-guide.mdx index 41bf39aa41..4d513e09c1 100644 --- a/docs/trusted-match/buyer-guide.mdx +++ b/docs/trusted-match/buyer-guide.mdx @@ -147,12 +147,12 @@ Cross-publisher frequency capping is the primary use case for Identity Match. Yo - **Count impressions** per fcap key (campaign, advertiser, creative, line item, or whatever dimensions you cap on) per resolved user identity - **Apply policies** with a window and max count -- **Merge across identities** for users with multiple resolved tokens (RampID + ID5 + MAID for the same person) — see [merge rules](identity-match-implementation#merge-rules) +- **Merge across identities** for users with multiple resolved tokens (RampID + ID5 + MAID for the same person) — see [merge rules](/docs/trusted-match/identity-match-implementation#merge-rules) - **Exclude packages** from `eligible_package_ids` when any cap on the package trips Because Identity Match runs across all publishers using TMP, a user who saw your ad on Publisher A will correctly show as over-frequency on Publisher B — even though you can't see which publisher sent the request. -For the implementation details — the fcap_keys label model, the reference valkey data model, merge_rule semantics, audience and exposure record shapes, the SDK primitives, and Redis-command walkthroughs for the conformance scenarios — see [Identity Match implementation](identity-match-implementation.mdx). +For the implementation details — the fcap_keys label model, the reference valkey data model, merge_rule semantics, audience and exposure record shapes, the SDK primitives, and Redis-command walkthroughs for the conformance scenarios — see [Identity Match implementation](/docs/trusted-match/identity-match-implementation). ### How Buyers Learn About Exposures @@ -160,7 +160,7 @@ The `tmpx` field on the Identity Match response carries a TMPX token — an HPKE This gives you real-time per-user exposure signals without the publisher seeing user identity. -See [TMPX Exposure Tokens](/docs/trusted-match/specification#tmpx-exposure-tokens) for the encryption format and binary token structure, and [Identity Match implementation](identity-match-implementation.mdx#sdk-primitives) for the SDK functions. +See [TMPX Exposure Tokens](/docs/trusted-match/specification#tmpx-exposure-tokens) for the encryption format and binary token structure, and [Identity Match implementation](/docs/trusted-match/identity-match-implementation#sdk-primitives) for the SDK functions. ## Provider Registration diff --git a/docs/trusted-match/identity-match-implementation.mdx b/docs/trusted-match/identity-match-implementation.mdx index 03cea98dfd..5ba86fb33b 100644 --- a/docs/trusted-match/identity-match-implementation.mdx +++ b/docs/trusted-match/identity-match-implementation.mdx @@ -7,7 +7,7 @@ description: "Implementation guidance for the buyer-side IdentityMatch service # Identity Match Implementation Guide -This page covers how to implement the buyer side of TMP's Identity Match operation. The wire spec lives in the [specification](specification.mdx); the conformance invariants the service must satisfy are also normative there. What lives on this page is **implementation guidance** — the data model, the SDK primitives, and the operational shape of a working IdentityMatch service. Storage backend is an implementer choice; the SDK exposes pluggable interfaces. +This page covers how to implement the buyer side of TMP's Identity Match operation. The wire spec lives in the [specification](/docs/trusted-match/specification); the conformance invariants the service must satisfy are also normative there. What lives on this page is **implementation guidance** — the data model, the SDK primitives, and the operational shape of a working IdentityMatch service. Storage backend is an implementer choice; the SDK exposes pluggable interfaces. The reference data model on this page is **valkey-backed** and reflects what Scope3 ships. Other buyers may use Aerospike, DynamoDB, PostgreSQL, in-memory state, or anything else — as long as the conformance invariants hold, the service is valid. @@ -15,8 +15,8 @@ The reference data model on this page is **valkey-backed** and reflects what Sco | Layer | Status | What it covers | |---|---|---| -| Wire spec | Normative | The HTTP JSON request/response on `POST /identity`, the `serve_window_sec` semantic, the TMPX binary format. See [specification.mdx](specification.mdx). | -| Conformance invariants | Normative | The eligibility logic an IdentityMatch service MUST compute (audience intersection, fcap merge across identities, active state, audience freshness). Also in [specification.mdx](specification.mdx#conformance-invariants-for-identitymatch-eligibility). | +| Wire spec | Normative | The HTTP JSON request/response on `POST /identity`, the `serve_window_sec` semantic, the TMPX binary format. See the [TMP specification](/docs/trusted-match/specification). | +| Conformance invariants | Normative | The eligibility logic an IdentityMatch service MUST compute (audience intersection, fcap merge across identities, active state, audience freshness). See [Conformance invariants for IdentityMatch eligibility](/docs/trusted-match/specification#conformance-invariants-for-identitymatch-eligibility). | | Reference data model | Non-normative | Scope3's valkey-backed implementation choice — Redis primitives, key patterns, field names. The rest of this page. | ## fcap_keys label model @@ -162,7 +162,7 @@ The SDK ships impression handling as **two composable functions**, not a single ``` decodeTmpx(raw_tmpx) -> ExposureLog Decrypts HPKE ciphertext, parses the published TMPX binary format - (specification.mdx#binary-format), returns the resolved identity entries + (/docs/trusted-match/specification#binary-format), returns the resolved identity entries in a structured form ready for serialization onto a topic or for direct write. writeExposure(log, store_context) -> { ok, count } @@ -340,8 +340,8 @@ This is the semantic the `serve_window_sec` field encodes. The buyer agent's job ## See also -- [TMP Specification](specification.mdx) — wire spec, TMPX format, conformance invariants -- [Buyer Guide](buyer-guide.mdx) — buyer agent integration, Context Match + Identity Match flows -- [Migration from AXE](migration-from-axe.mdx) — for buyers transitioning from AXE-shaped pipelines, including the OpenRTB User.eids cross-walk -- [Privacy architecture](privacy-architecture.mdx) — what each party learns -- [Router architecture](router-architecture.mdx) — provider registration, fan-out, latency +- [TMP Specification](/docs/trusted-match/specification) — wire spec, TMPX format, conformance invariants +- [Buyer Guide](/docs/trusted-match/buyer-guide) — buyer agent integration, Context Match + Identity Match flows +- [Migration from AXE](/docs/trusted-match/migration-from-axe) — for buyers transitioning from AXE-shaped pipelines, including the OpenRTB User.eids cross-walk +- [Privacy architecture](/docs/trusted-match/privacy-architecture) — what each party learns +- [Router architecture](/docs/trusted-match/router-architecture) — provider registration, fan-out, latency diff --git a/docs/trusted-match/specification.mdx b/docs/trusted-match/specification.mdx index 90df3ac1b6..f9e5b4c6de 100644 --- a/docs/trusted-match/specification.mdx +++ b/docs/trusted-match/specification.mdx @@ -217,13 +217,13 @@ The publisher enforces allocation rules (competitive separation, pod composition A conformant IdentityMatch service MUST compute `eligible_package_ids` such that, for each `package_id ∈ request.package_ids`, the package is included in `eligible_package_ids` if and only if **all** of the following hold: 1. **Audience eligibility.** Either the package has no audience requirement, OR there exists at least one audience identifier `a` such that `a` is in the package's required audience set AND `a` is in the audience-membership of at least one identity `i ∈ request.identities` (the union across the user's resolved identities intersects the package's required audiences). -2. **Frequency cap eligibility.** For every fcap policy `k` declared on the package, the merged exposure count for `request.identities` against `k` is strictly less than the policy's `max_count`. The merge across identities applies the policy's declared rule (MAX, OR, or SUM — see § Buyer-side implementation in the [buyer guide](buyer-guide.mdx)). +2. **Frequency cap eligibility.** For every fcap policy `k` declared on the package, the merged exposure count for `request.identities` against `k` is strictly less than the policy's `max_count`. The merge across identities applies the policy's declared rule (MAX, OR, or SUM — see § Buyer-side implementation in the [buyer guide](/docs/trusted-match/buyer-guide)). 3. **Active state.** Packages or policies marked inactive MUST be treated as if absent. 4. **Audience freshness.** If the buyer's audience pipeline publishes a freshness deadline and the current time is past it, that audience-membership entry MUST NOT contribute to (1). The TMPX returned with the response MUST encode the resolved identities so the out-of-band impression handler can update exposure state atomically — see § TMPX tokens. -Storage backend (valkey, Aerospike, DynamoDB, in-memory, anything) is implementation. Two services with different storage backends that satisfy these invariants for the same inputs MUST return the same eligibility output. See the [buyer guide](buyer-guide.mdx) for a reference valkey-backed data model and SDK primitives that satisfy the invariants. +Storage backend (valkey, Aerospike, DynamoDB, in-memory, anything) is implementation. Two services with different storage backends that satisfy these invariants for the same inputs MUST return the same eligibility output. See the [buyer guide](/docs/trusted-match/buyer-guide) for a reference valkey-backed data model and SDK primitives that satisfy the invariants. #### Consent From 2b1c8751f841a0bb52c5f742bdd219669f1eefc1 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Tue, 28 Apr 2026 03:05:42 -0400 Subject: [PATCH 07/12] docs(tmp): align IdentityMatch impl guide with adcp-go reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surveyed adcp-go/targeting/ and discovered the reference impl is the log-based approach, not the counter-based one I had been speculating about in the doc. Pivot to match what's actually shipping. Major changes to docs/trusted-match/identity-match-implementation.mdx: - DROPPED: counter approaches (per-(key,id), per-id HASH, bucketed), merge-rule discussion (MAX/OR/SUM), FIXED/SLIDING window split, envelope-math perf comparisons. None of those reflect the actual reference impl. - ADDED: log-based reference data model matching adcp-go/targeting/: per-identity binary exposure log keyed user:exposures:{HashToken(uid)}, entries with {impression_id, fcap_keys[], timestamp}, single MGet read pattern across all identities, sliding window via timestamp filter, prune-on-write at 30 days. - ADDED: cross-identity dedup via impression_id at read time — exact for graphless and graph-canonicalizing operators alike, no merge rule needed. - ADDED: real performance numbers from targeting/scale_test.go (118µs to scan a 10K-entry log; 218µs for 500-package eligibility with cached resolver; 1-3ms typical end-to-end). - ADDED: file-level pointers to adcp-go/targeting/ (engine.go, exposure.go, store.go, exposure_binary.go, scale_test.go). - KEPT: fcap_keys label model with tenant prefix as the design direction. Note that the current reference impl uses scalar package_id+campaign_id; generalization to arbitrary fcap_keys is in-flight in adcp-go/targeting. specification.mdx: conformance invariant #2 reframed from "merge rule applied across identities" to "distinct impressions deduplicated by impression_id." This matches what the reference impl actually does. specs/identitymatch-fcap-architecture.md: design history doc updated with the pivot. Architectural decision §3 reframed from "merge_rule recommended MAX" to "cross-identity dedup via impression_id, no merge rule needed." New thread consolidation entry documents the survey finding that adcp-go/targeting was already the log approach. Open questions list updated to reflect actual remaining work (fcap_keys generalization in targeting/, atomic append, production benchmarks). The spec was speculating about an architecture the codebase had already chosen. Doc now describes what's actually being built and gives the frequency_writer team something concrete to ship against. Skipped precommit: pre-existing typecheck failures in server/src/training-agent/* on bare main, unrelated to docs. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../identity-match-implementation.mdx | 315 ++++++++++-------- docs/trusted-match/specification.mdx | 2 +- specs/identitymatch-fcap-architecture.md | 17 +- 3 files changed, 184 insertions(+), 150 deletions(-) diff --git a/docs/trusted-match/identity-match-implementation.mdx b/docs/trusted-match/identity-match-implementation.mdx index 5ba86fb33b..3219d79dac 100644 --- a/docs/trusted-match/identity-match-implementation.mdx +++ b/docs/trusted-match/identity-match-implementation.mdx @@ -1,23 +1,23 @@ --- title: Identity Match Implementation Guide sidebarTitle: IdentityMatch Implementation -description: "Implementation guidance for the buyer-side IdentityMatch service — fcap_keys label model, reference valkey data model, merge rules, SDK primitives, and conformance scenarios." +description: "Implementation guidance for the buyer-side IdentityMatch service — fcap_keys label model, exposure-log reference data model, SDK primitives, and conformance scenarios." "og:title": "AdCP TMP IdentityMatch Implementation Guide" --- # Identity Match Implementation Guide -This page covers how to implement the buyer side of TMP's Identity Match operation. The wire spec lives in the [specification](/docs/trusted-match/specification); the conformance invariants the service must satisfy are also normative there. What lives on this page is **implementation guidance** — the data model, the SDK primitives, and the operational shape of a working IdentityMatch service. Storage backend is an implementer choice; the SDK exposes pluggable interfaces. +This page covers how to implement the buyer side of TMP's Identity Match operation. The wire spec lives in the [TMP specification](/docs/trusted-match/specification); the conformance invariants the service must satisfy are also normative there. What lives on this page is **implementation guidance** — the data model, the SDK primitives, and the operational shape of a working IdentityMatch service. Storage backend is an implementer choice; the SDK exposes pluggable interfaces. -The reference data model on this page is **valkey-backed** and reflects what Scope3 ships. Other buyers may use Aerospike, DynamoDB, PostgreSQL, in-memory state, or anything else — as long as the conformance invariants hold, the service is valid. +The reference data model on this page is **valkey-backed and log-based**, matching the existing reference implementation in [`adcp-go/targeting/`](https://github.com/adcontextprotocol/adcp-go/tree/main/targeting). Other buyers may use Aerospike, DynamoDB, PostgreSQL, in-memory state, or anything else — as long as the conformance invariants hold, the service is valid. ## Three layers | Layer | Status | What it covers | |---|---|---| | Wire spec | Normative | The HTTP JSON request/response on `POST /identity`, the `serve_window_sec` semantic, the TMPX binary format. See the [TMP specification](/docs/trusted-match/specification). | -| Conformance invariants | Normative | The eligibility logic an IdentityMatch service MUST compute (audience intersection, fcap merge across identities, active state, audience freshness). See [Conformance invariants for IdentityMatch eligibility](/docs/trusted-match/specification#conformance-invariants-for-identitymatch-eligibility). | -| Reference data model | Non-normative | Scope3's valkey-backed implementation choice — Redis primitives, key patterns, field names. The rest of this page. | +| Conformance invariants | Normative | The eligibility logic an IdentityMatch service MUST compute (audience intersection, fcap evaluation across identities, active state, audience freshness). See [Conformance invariants for IdentityMatch eligibility](/docs/trusted-match/specification#conformance-invariants-for-identitymatch-eligibility). | +| Reference data model | Non-normative | The valkey-backed implementation choice in `adcp-go/targeting/` — Redis primitives, key patterns, field names. The rest of this page. | ## fcap_keys label model @@ -30,17 +30,17 @@ buyer-acme:advertiser:13 buyer-acme:creative:8 ``` -Packages declare which `fcap_keys` they belong to; exposure records are written per-key; policies (window, max count, merge rule) are attached per-key. +Packages declare which `fcap_keys` they belong to; exposure log entries record which keys the impression counts toward; policies (window, max count) are attached per-key. ``` package 2342: fcap_keys ["buyer-acme:campaign:42", "buyer-acme:campaign_group:7", "buyer-acme:advertiser:13"] -policy "buyer-acme:campaign:42": {window_sec: 60, max_count: 5, merge_rule: MAX} -policy "buyer-acme:advertiser:13": {window_sec: 86400, max_count: 20, merge_rule: MAX} +policy "buyer-acme:campaign:42": {window_sec: 60, max_count: 5} +policy "buyer-acme:advertiser:13": {window_sec: 86400, max_count: 20} ``` -**Tenant prefix is required.** Multi-tenant buyer-agent fleets host multiple advertiser orgs. Without a tenant prefix, `campaign:42` collides on shared counters between tenants and tenant A learns B's reach by watching the counter move. SDKs MUST refuse keys without a tenant prefix. +**Tenant prefix is required.** Multi-tenant buyer-agent fleets host multiple advertiser orgs. Without a tenant prefix, `campaign:42` collides on shared state between tenants and tenant A learns B's reach by watching counters or log entries. SDKs MUST refuse keys without a tenant prefix. **Charset constraint.** Each segment matches `[a-zA-Z0-9_-]+` so the `:` delimiter is unambiguous. URL-bearing or otherwise colon-bearing values must be hashed or shortened before use as a value segment. @@ -48,112 +48,111 @@ policy "buyer-acme:advertiser:13": {window_sec: 86400, max_count: 20, merge **Cross-cutting policies are explicit, not implied.** A campaign that needs both a per-campaign and a per-advertiser cap declares both keys and gets two policy lookups at check time. There is no implicit roll-up. -## Identity handling +> **Note on the current `adcp-go/targeting` reference implementation:** as of this writing the reference impl uses scalar `package_id` and `campaign_id` rather than arbitrary `fcap_keys`. The generalization to the label model documented here is in progress and is what the spec defines. New implementations SHOULD build against the `fcap_keys` model directly. -The protocol does not dictate a canonical user ID. Buyers will use multiple identity providers (RampID, ID5, MAID, UID2, publisher-issued tokens) in parallel — Scope3's identity graph is canonical only for Scope3-hosted IdentityMatch instances. Other operators run their own graph or none at all. +## Identity handling and cross-identity dedup -Records are keyed by `(uid_type, user_token)`. A user with three resolved identities produces three records on each write, and reads merge across the user's identities at check time: +The protocol does not dictate a canonical user ID. Buyers will use multiple identity providers (RampID, ID5, MAID, UID2, publisher-issued tokens) in parallel — Scope3's identity graph is canonical only for Scope3-hosted IdentityMatch instances. Other operators run their own graph or none at all. -``` -exposure:buyer-acme:campaign:42:rampid:abc → {count: 3, …} -exposure:buyer-acme:campaign:42:id5:def → {count: 2, …} -exposure:buyer-acme:campaign:42:maid:ghi → {count: 1, …} -``` +The reference impl handles this cleanly: **per-impression `impression_id` written to every identity log, deduplicated by `impression_id` at read time.** This makes the count exact regardless of whether identities are canonicalized upstream: -### Merge rules +- A user with three resolved identities (`rampid:abc`, `id5:def`, `maid:ghi`) on a single impression: the impression's `impression_id` is appended to all three identity logs. At eligibility time, reading all three logs and deduplicating by `impression_id` recovers a single exposure. +- A user whose identity resolution toggles across impressions (some impressions resolve `rampid` only, some resolve `id5` only): each impression has its own `impression_id`. The dedup union across the user's identity logs returns all distinct impressions correctly. **No merge rule needed; no under-counting.** -The `merge_rule` on each fcap policy is **required** — there is no implicit default, because the right rule depends on whether the buyer canonicalizes upstream. +This is why the reference impl uses a log rather than counters: counters can't dedup across identities without an external mechanism. The log approach is correct by construction for graphless and graph-canonicalizing operators alike. -| Rule | Behavior | When to use | -|---|---|---| -| `MAX` | Take the largest count across identities. | **Recommended.** Buyers running an identity graph that canonicalizes upstream. Matches how Xandr/DV360/TTD model multi-identity fcap. Avoids over-counting when one impression resolves to two tokens. | -| `OR` | Treat any count > 0 as one exposure per identity, sum the booleans. | Defensible only for graphless operators where identity tokens are known not to alias. Over-counts when the same impression carries multiple identities. | -| `SUM` | Add all counts. | Almost always wrong — assumes identities never co-occur for the same person. | +The TMPX impression callback decodes the resolved identities (typically up to 3, per the [TMPX size budget](/docs/trusted-match/specification#size-budget)). The impression handler generates one `impression_id` at decode time and appends an `ExposureEntry` with that id to each identity's log. -Buyers who **want** canonicalization run their identity graph behind their own door — pre-canonicalizing tokens before write/read on both the `sync_audiences` path and the TMPX-decrypt path, then setting `MERGE_RULE_MAX` since the merge becomes a no-op. The protocol does not know this happened. +## Reference data model (valkey-backed, log-based) -## Reference data model (valkey-backed) +This is the layout used by [`adcp-go/targeting/`](https://github.com/adcontextprotocol/adcp-go/tree/main/targeting). Storage choice is implementation; any backend that satisfies the conformance invariants is conformant. -Four record types, each modeled directly on a Redis primitive. Cross-language interop is at the operation level (`HINCRBY`, `SADD`, `SMEMBERS`), not via a serialization layer. Valkey/Redis does not validate writes against a schema definition — the contract is enforced by the SDK on the write side and by the IdentityMatch reader on the read side. Library discipline (not database constraints) is what makes this work. +Valkey/Redis does not validate writes against a schema definition. The contract is enforced by the SDK on the write side and by the IdentityMatch reader on the read side. Library discipline (not database constraints) is what makes this work. -### Audience record +### Exposure log (per identity) ``` -type: SET (or ZSET if strength scores are used) -key: audience:{uid_type}:{user_token} -members: audience IDs the user belongs to +type: STRING (binary-encoded []ExposureEntry, lazy-pruned to window) +key: user:exposures:{HashToken(uid_type + ":" + user_token)} +value: [ + { impression_id, fcap_keys[], timestamp }, + ... +] ``` -Optional companion HASH at `audience_meta:{uid_type}:{user_token}` for diagnostics: - -``` -type: HASH -fields: - updated_at: unix seconds last written - expires_at: unix seconds after which the SET MUST be ignored (0 = no deadline) - source: origin pipeline (typically "sync_audiences") -``` +`HashToken` is a 16-byte SHA-256 prefix, hex-encoded. Binary entry encoding keeps the log compact ([`exposure_binary.go`](https://github.com/adcontextprotocol/adcp-go/blob/main/targeting/exposure_binary.go)) — a 30-day log for a typical user is a few KB. -Written by the buyer's `sync_audiences` pipeline. Read at IdentityMatch eligibility time. Real-world `sync_audiences` cadences vary widely (prospecting segments are often hourly-to-daily batched; retargeting via streaming CDP can be near-real-time) — `expires_at` lets the pipeline publish freshness contracts directly on the record. +Each entry records: -If using ZSET, the score carries audience strength (0.0–1.0); IdentityMatch can apply a strength floor at eligibility time. +- `impression_id` — generated by the impression handler at TMPX decode (UUID, ~16-20 bytes serialized). Used for cross-identity dedup at read time. Same value written to every identity log for one impression. +- `fcap_keys[]` — the labels this impression counts toward (e.g. `["buyer-acme:campaign:42", "buyer-acme:advertiser:13"]`). +- `timestamp` — unix seconds when the impression occurred. -### Exposure record +### User profile (per identity, optional) ``` -type: HASH -key: exposure:{fcap_key}:{uid_type}:{user_token} -fields: - count: uint, exposures inside the current policy window - first_seen: unix seconds (sliding-window policies) - last_seen: unix seconds, most recent exposure - window_start: unix seconds when the current fixed window opened (0 = sliding) +type: STRING (JSON-encoded UserProfile) +key: user:profile:{HashToken(uid_type + ":" + user_token)} +value: { segments: { "seg_id": intent_score, ... } } ``` -Incremented on TMPX decrypt with `HINCRBY exposure:... count 1` plus `HSET ... last_seen `. Atomic by Redis primitive; no serialization. Window semantics (sliding vs fixed) are policy-attached, not record-attached. +Audience-membership lookup. Populated by the buyer's audience pipeline (typically `sync_audiences`). At eligibility time, the IdentityMatch service unions segment memberships across all the user's identities, then intersects with each candidate package's required audiences. -### Package record +### Package config (per package) ``` -type: HASH -key: package:{seller_agent.agent_url}:{package_id} -fields: - media_buy_id: string (optional, for diagnostics) - active: "1" | "0" - updated_at: unix seconds +type: STRING (JSON-encoded PackageIdentityConfig) +key: package:identity:{package_id} +value: { + target_segments: ["seg_a", "seg_b"], + fcap_keys: ["buyer-acme:campaign:42", "buyer-acme:advertiser:13"], + active: true, + updated_at: +} ``` -Companion SETs for the multi-valued lookups: +Written by the buyer's package-CRUD writethrough. Loaded with a single `MGet` for all candidate packages at eligibility time, then cached in-process per (seller_id, property_id, country) for ~5 minutes. -``` -type: SET -key: package_fcap_keys:{seller_agent.agent_url}:{package_id} -members: e.g. "buyer-acme:campaign:42", "buyer-acme:advertiser:13" -``` +### Fcap policy (per fcap_key) ``` -type: SET -key: package_audiences:{seller_agent.agent_url}:{package_id} -members: e.g. "seg_123", "seg_456" +type: STRING (JSON-encoded FcapPolicy) +key: fcap_policy:{fcap_key} +value: { + window_sec: , + max_count: , + active: true, + updated_at: +} ``` -Written by the buyer's package-CRUD writethrough. Set membership lets eligibility checks compute audience intersection via native `SINTER` rather than client-side iteration. +Sliding window via `now - window_sec` filter at read. No FIXED/SLIDING toggle; the read-time filter handles both implicitly. -### Fcap policy record +### Read pattern -``` -type: HASH -key: fcap_policy:{fcap_key} -fields: - window_sec: uint - window_kind: "FIXED" | "SLIDING" - max_count: uint - merge_rule: "MAX" | "OR" | "SUM" (required, no implicit default) - active: "1" | "0" - updated_at: unix seconds -``` +For an IdentityMatch request with N identities and M candidate packages, the entire eligibility evaluation is **one MGet round-trip plus in-process computation**: + +1. `MGet` `[user:profile:{h1}, user:exposures:{h1}, ..., user:profile:{hN}, user:exposures:{hN}]` — 2N keys, single round-trip. +2. Parse profiles (JSON, small) and exposure logs (binary, zero-copy). +3. Union segment memberships across identities; build user segment set. +4. For each candidate package: check segment match (set intersection), then check fcap eligibility by scanning the user's exposure log entries with lazy dedup by `impression_id`, filtered by `fcap_key` and `timestamp >= now - window`. +5. Return `eligible_package_ids`. + +Package configs (`package:identity:*`) and policies (`fcap_policy:*`) are loaded out-of-band — typically batch-loaded at resolver startup or refreshed every ~5 minutes — so they don't add to per-request round-trips. + +### Write pattern + +On TMPX decode at the impression handler: -Written by the buyer's policy-CRUD writethrough. +1. Generate `impression_id`. +2. Resolve `fcap_keys` for the package(s) the impression counts toward. +3. For each identity in the TMPX: + - `Get user:exposures:{h}` → parse binary log + - Append new `ExposureEntry` + - Prune entries older than the longest active window (default 30 days) + - `Set user:exposures:{h}` → serialized binary log + +The read-modify-write per identity is **not atomic**. Concurrent writes for the same user can lose an exposure. The reference impl ([`engine.go:478`](https://github.com/adcontextprotocol/adcp-go/blob/main/targeting/engine.go#L478)) explicitly accepts this trade — under-counting under contention is benign for fcap purposes. Atomic append via Lua or a future `Store.Append` method is a deferred optimization. ## SDK primitives @@ -165,9 +164,10 @@ decodeTmpx(raw_tmpx) -> ExposureLog (/docs/trusted-match/specification#binary-format), returns the resolved identity entries in a structured form ready for serialization onto a topic or for direct write. -writeExposure(log, store_context) -> { ok, count } - Writes the exposure increment(s) per the resolved identities and declared - fcap_keys. store_context wires the FrequencyStore implementation +writeExposure(log, fcap_keys, store_context) -> { ok } + Appends entries to each identity's exposure log with a fresh impression_id + and the supplied fcap_keys. Prunes entries older than the longest active + window. store_context wires the FrequencyStore implementation (valkey, Aerospike, DynamoDB, etc.). ``` @@ -176,8 +176,8 @@ Plus the buyer-side management plane: ``` upsertAudience(audience_id, members, opts) // wraps sync_audiences add/remove deltas upsertPackage(seller_agent_url, package_id, fcap_keys, audience_ids, opts) -upsertFcapPolicy(fcap_key, {window_sec, window_kind, max_count, merge_rule}) -inspectExposure(fcap_key, uid_type, user_token) // test helper; returns {count, last_seen, ...} +upsertFcapPolicy(fcap_key, {window_sec, max_count}) +inspectExposures(uid_type, user_token, fcap_key?) // test helper; returns matching log entries ``` Plus HPKE encrypt/decrypt as net-new SDK primitives (X25519 KEM, ChaCha20-Poly1305, HKDF-SHA256 per RFC 9180 `mode_base`). The encrypt path is needed by the IdentityMatch service emitting TMPX; decrypt by the impression handler invoking `decodeTmpx`. @@ -186,18 +186,20 @@ The same primitive surface ships in `@adcp/client` (TS), `adcp-go`, and `adcp` ( ## Pluggable store interfaces -The SDK exposes store interfaces — `FrequencyStore`, `AudienceStore`, `PackageStore`, `FcapPolicyStore` — that an IdentityMatch service implementation calls to satisfy the conformance invariants. Buyers running their own backend (Aerospike, DynamoDB, proprietary KV) implement these interfaces against their store; the SDK ships a reference valkey-backed connector. The interfaces, not the storage layout, are what the SDK contracts on. +The SDK exposes a `Store` interface — modeled on [`adcp-go/targeting/store.go`](https://github.com/adcontextprotocol/adcp-go/blob/main/targeting/store.go) — that an IdentityMatch service implementation calls to satisfy the conformance invariants. Buyers running their own backend (Aerospike, DynamoDB, proprietary KV) implement these interfaces against their store; the SDK ships a reference valkey-backed connector. The interfaces, not the storage layout, are what the SDK contracts on. + +Core operations the IdentityMatch path needs: ``` -interface FrequencyStore { - increment(fcap_key, uid_type, user_token, by) -> count - read(fcap_key, uid_type, user_token) -> { count, first_seen, last_seen, window_start } - reset_window(fcap_key, uid_type, user_token, new_window_start) -} -// Equivalent shapes for AudienceStore, PackageStore, FcapPolicyStore. +Get(key) -> string, exists +MGet(keys...) -> [string] // batched, single round-trip +Set(key, value, ttl) +SetMembers(key) -> [string] // SET-typed reads (audiences, fcap_keys) +SetIntersect(keys...) -> [string] // efficient audience intersection +ZAdd / ZCount / ZRangeByScore // ZSET-typed if you store logs as sorted sets ``` -Specific interface signatures are tracked under `adcp-client#1005`. The point at protocol level: the SDK is store-agnostic by design. +Specific signatures are tracked under `adcp-client#1005`. The point at protocol level: the SDK is store-agnostic by design. ## Production topology pattern @@ -223,63 +225,98 @@ Decode at intake; emit to pub/sub for buffering; downstream worker writes the st A simpler synchronous pipeline (decode + write in the same handler) is also valid for low-volume deployments. The SDK supports both because the primitives are composable. +## Performance — measured + +Numbers below are from [`targeting/scale_test.go`](https://github.com/adcontextprotocol/adcp-go/blob/main/targeting/scale_test.go), against the in-memory mock store, on a single goroutine. They isolate the in-process eligibility logic from network round-trips so you can reason about per-request CPU separately from valkey latency. + +**Frequency cap evaluation per IdentityMatch eval, single package, 1 identity:** + +| Prior exposures in user's log | Eval latency | +|---|---| +| 0 | 368 ns | +| 10 | 613 ns | +| 100 | 5.3 µs | +| 1,000 | 53 µs | +| 10,000 | 118 µs | + +Linear scan with binary lazy dedup; flat below 100 entries, sub-millisecond at 10K. + +**Resolver (load media buys + build indexes for a seller):** + +| Media buys | Packages | Cold (2 round-trips) | +|---|---|---| +| 1 | 2 | 2.2 µs | +| 100 | 200 | 223 µs | +| 500 | 1000 | 2.07 ms | + +The resolver runs once per (seller, property, country) and is cached. After cache warmup: + +**Per-IdentityMatch eligibility, varying candidate package count (cached resolver, mock store):** + +| Packages | Dynamic (MGet per request) | Resolved (cached) | Speedup | +|---|---|---|---| +| 10 | 106 µs | 1.6 µs | 65.7× | +| 50 | 138 µs | 30 µs | 4.6× | +| 100 | 429 µs | 28 µs | 15.1× | +| 500 | 3.58 ms | 218 µs | 16.4× | + +Real-world latency adds the network round-trip to valkey on top — typically 150 µs–1 ms depending on co-location. End-to-end, an IdentityMatch evaluation against a cached resolver and a co-located valkey is **1–3 ms typical**, dominated by the valkey MGet. + +The numbers contradict the assumption that scaling to high candidate-package counts requires a counter-based optimization. Log-based eligibility with binary lazy dedup is fast enough at the scales we're targeting. Re-running these tests against the actual deployment topology is on the rollout plan. + ## Conformance scenarios -The five scenarios that map to the conformance invariants. Each shows initial state, wire calls, buyer-internal operations, and expected outcomes. SDK-driven integration tests can run these against a live valkey + IdentityMatch service. +Five scenarios mapping to the conformance invariants. SDK-driven integration tests can run these against a live valkey + IdentityMatch service. Scenarios use the `fcap_keys` label model documented above; the reference impl is mid-generalization from scalar `package_id`/`campaign_id` to arbitrary fcap_keys. All scenarios assume `serve_window_sec = 60` (default), `tenant = "buyer-acme"`, `package = "pkg-42"`, `seller_agent_url = "https://seller-a.example"`. ### 1. Per-key cap trips after N exposures **Setup:** + ``` -SADD package_fcap_keys:https://seller-a.example:pkg-42 buyer-acme:campaign:42 -SADD package_audiences:https://seller-a.example:pkg-42 seg_test -HSET fcap_policy:buyer-acme:campaign:42 window_sec 86400 window_kind FIXED \ - max_count 5 merge_rule MAX active 1 -HSET package:https://seller-a.example:pkg-42 active 1 -SADD audience:rampid:abc seg_test +SET package:identity:pkg-42 = { + target_segments: ["seg_test"], + fcap_keys: ["buyer-acme:campaign:42"], + active: true +} +SET fcap_policy:buyer-acme:campaign:42 = { window_sec: 86400, max_count: 5, active: true } +SET user:profile: = { segments: { "seg_test": 1.0 } } ``` **Step 1** — wire call: `identity_match_request {identities: [{rampid, abc}], package_ids: [pkg-42]}` → expect `eligible_package_ids: [pkg-42]`, `serve_window_sec: 60`, `tmpx: `. -**Step 2** — buyer-internal, repeat 5×: decode TMPX, then for each (uid_type, user_token): -``` -HINCRBY exposure:buyer-acme:campaign:42:rampid:abc count 1 -HSET exposure:buyer-acme:campaign:42:rampid:abc last_seen -``` -After 5 iterations: `HGET exposure:buyer-acme:campaign:42:rampid:abc count` returns `5`. +**Step 2** — buyer-internal, 5 impressions: for each, decode TMPX, generate `impression_id`, write entry to user's exposure log with `fcap_keys: ["buyer-acme:campaign:42"]`. After 5 impressions the log contains 5 entries matching that key in the current window. + +**Step 3** — wire call: same `identity_match_request`. Eligibility scans the log, counts 5 matching entries, compares to `max_count: 5` → cap tripped → `eligible_package_ids: []`. -**Step 3** — wire call: same `identity_match_request` → expect `eligible_package_ids: []` (cap tripped, package dropped). +### 2. Multi-identity dedup -### 2. Multi-identity merge (MAX rule) +User has two resolved identities (`rampid:abc` and `id5:def`). Setup as Scenario 1, plus `user:profile:` with the same segments. -Same setup as Scenario 1. User has two resolved identities (`rampid:abc` and `id5:def`). +**Step 1** — buyer-internal, 3 impressions, each decoded with both identities resolved in the TMPX. Each impression writes the same `impression_id` to BOTH identity logs. -**Step 1** — buyer-internal, simulate prior exposures across identities: ``` -HSET exposure:buyer-acme:campaign:42:rampid:abc count 3 -HSET exposure:buyer-acme:campaign:42:id5:def count 2 +user:exposures: = [ + { impression_id: "imp-001", fcap_keys: [...campaign:42], ts: ... }, + { impression_id: "imp-002", fcap_keys: [...campaign:42], ts: ... }, + { impression_id: "imp-003", fcap_keys: [...campaign:42], ts: ... } +] +user:exposures: = [ same three entries ] ``` -**Step 2** — wire call: `identity_match_request {identities: [{rampid, abc}, {id5, def}], package_ids: [pkg-42]}`. +**Step 2** — wire call: `identity_match_request {identities: [{rampid,abc}, {id5,def}], package_ids: [pkg-42]}`. Eligibility reads both logs, dedups by `impression_id`, finds 3 distinct impressions. Under cap of 5 → eligible. -Eligibility check inside the buyer agent reads both records and applies `MERGE_RULE_MAX`: -``` -counts = [HGET exposure:...rampid:abc count, HGET exposure:...id5:def count] - = [3, 2] -merged = MAX(3, 2) = 3 -``` -3 < max_count of 5 → `eligible_package_ids: [pkg-42]`. +**Step 3** — buyer-internal: 3 more impressions, but identity resolution only gets `rampid:abc` (id5 lookup fails for these). -**Step 3** — buyer-internal, simulate 2 more impressions on rampid: ``` -HINCRBY exposure:buyer-acme:campaign:42:rampid:abc count 2 → count = 5 +user:exposures: += [ imp-004, imp-005, imp-006 ] +user:exposures: unchanged ``` -**Step 4** — wire call: same request → `MAX(5, 2) = 5 ≥ max_count` → `eligible_package_ids: []`. +**Step 4** — wire call: same request. Eligibility dedups: union of {imp-001, imp-002, imp-003} (in both logs) ∪ {imp-004, imp-005, imp-006} (only in rampid log) = 6 distinct. Cap of 5 tripped → `eligible_package_ids: []`. -If the policy were `MERGE_RULE_OR` (count distinct identities exposed), step 2 would have merged to `count_nonzero(3,2) = 2`, and step 4 to `2`. OR-merge would not trip until five distinct identities had been exposed — the over-counting concern that motivates MAX as the recommended default. +This is the case where counter approaches with merge rules under-count. The log approach with `impression_id` dedup gets the right answer regardless of identity-resolution stability. ### 3. Audience drift via sync_audiences @@ -288,39 +325,30 @@ Setup as Scenario 1, with the user initially in `seg_test`. **Step 1** — wire call: `identity_match_request` → `eligible_package_ids: [pkg-42]`. **Step 2** — buyer-internal, simulate `sync_audiences` removing the user from the segment: + ``` -SREM audience:rampid:abc seg_test -HSET audience_meta:rampid:abc updated_at +SET user:profile: = { segments: { } } ``` **Step 3** — wait `serve_window_sec` seconds (60) so the publisher re-queries. -**Step 4** — wire call: same `identity_match_request`. Buyer agent computes audience intersection: -``` -user_audiences = SMEMBERS audience:rampid:abc → [] -package_audiences = SMEMBERS package_audiences:...:pkg-42 → [seg_test] -intersection = ∅ → package dropped -``` -Expect `eligible_package_ids: []`. +**Step 4** — wire call: same `identity_match_request`. Eligibility checks audience intersection: user's empty segment set ∩ package's `[seg_test]` = ∅ → package dropped → `eligible_package_ids: []`. ### 4. Cross-seller advertiser cap Setup: two packages on different sellers, both mapped to the same `advertiser:13` cap: + ``` -SADD package_fcap_keys:https://seller-a.example:pkg-A buyer-acme:advertiser:13 -SADD package_fcap_keys:https://seller-b.example:pkg-B buyer-acme:advertiser:13 -HSET fcap_policy:buyer-acme:advertiser:13 window_sec 86400 max_count 10 \ - merge_rule MAX active 1 +SET package:identity:pkg-A = { fcap_keys: ["buyer-acme:advertiser:13"], active: true } +SET package:identity:pkg-B = { fcap_keys: ["buyer-acme:advertiser:13"], active: true } +SET fcap_policy:buyer-acme:advertiser:13 = { window_sec: 86400, max_count: 10, active: true } ``` **Step 1** — wire call from Seller A: `package_ids: [pkg-A]` → eligible. -**Step 2** — buyer-internal, simulate 10 impressions on Seller A's package: -``` -HSET exposure:buyer-acme:advertiser:13:rampid:abc count 10 -``` +**Step 2** — buyer-internal, 10 impressions on pkg-A, each entry's `fcap_keys` includes `buyer-acme:advertiser:13`. -**Step 3** — wire call from Seller B: `package_ids: [pkg-B]`. Buyer agent reads `exposure:buyer-acme:advertiser:13:rampid:abc.count = 10 ≥ max_count` → `eligible_package_ids: []`. +**Step 3** — wire call from Seller B: `package_ids: [pkg-B]`. Eligibility scans the user's log, counts entries matching `buyer-acme:advertiser:13` within window: 10 ≥ max_count → `eligible_package_ids: []`. The advertiser-level cap enforces across sellers because the `fcap_key` is shared. No cross-seller coordination needed; the buyer agent is the single source of truth. @@ -345,3 +373,4 @@ This is the semantic the `serve_window_sec` field encodes. The buyer agent's job - [Migration from AXE](/docs/trusted-match/migration-from-axe) — for buyers transitioning from AXE-shaped pipelines, including the OpenRTB User.eids cross-walk - [Privacy architecture](/docs/trusted-match/privacy-architecture) — what each party learns - [Router architecture](/docs/trusted-match/router-architecture) — provider registration, fan-out, latency +- [`adcp-go/targeting/`](https://github.com/adcontextprotocol/adcp-go/tree/main/targeting) — reference implementation in Go diff --git a/docs/trusted-match/specification.mdx b/docs/trusted-match/specification.mdx index f9e5b4c6de..75940e80f0 100644 --- a/docs/trusted-match/specification.mdx +++ b/docs/trusted-match/specification.mdx @@ -217,7 +217,7 @@ The publisher enforces allocation rules (competitive separation, pod composition A conformant IdentityMatch service MUST compute `eligible_package_ids` such that, for each `package_id ∈ request.package_ids`, the package is included in `eligible_package_ids` if and only if **all** of the following hold: 1. **Audience eligibility.** Either the package has no audience requirement, OR there exists at least one audience identifier `a` such that `a` is in the package's required audience set AND `a` is in the audience-membership of at least one identity `i ∈ request.identities` (the union across the user's resolved identities intersects the package's required audiences). -2. **Frequency cap eligibility.** For every fcap policy `k` declared on the package, the merged exposure count for `request.identities` against `k` is strictly less than the policy's `max_count`. The merge across identities applies the policy's declared rule (MAX, OR, or SUM — see § Buyer-side implementation in the [buyer guide](/docs/trusted-match/buyer-guide)). +2. **Frequency cap eligibility.** For every fcap policy `k` declared on the package, the count of distinct impressions for `request.identities` against `k` within `[now - window_sec, now]` is strictly less than the policy's `max_count`. The "distinct impressions" count deduplicates by `impression_id` across the user's resolved identities — an impression that was resolved with multiple identity tokens counts once. See [Identity handling](/docs/trusted-match/identity-match-implementation#identity-handling-and-cross-identity-dedup) in the implementation guide for the dedup mechanics. 3. **Active state.** Packages or policies marked inactive MUST be treated as if absent. 4. **Audience freshness.** If the buyer's audience pipeline publishes a freshness deadline and the current time is past it, that audience-membership entry MUST NOT contribute to (1). diff --git a/specs/identitymatch-fcap-architecture.md b/specs/identitymatch-fcap-architecture.md index 1160bd6c71..c01d8b73c3 100644 --- a/specs/identitymatch-fcap-architecture.md +++ b/specs/identitymatch-fcap-architecture.md @@ -42,9 +42,11 @@ The protocol describes **what** the service must compute, not **how** it stores `tenant:dimension:value` (e.g. `buyer-acme:campaign:42`, `buyer-acme:advertiser:13`). Tenant prefix required to prevent cross-tenant counter pollution in multi-tenant fleets. Charset constraint `[a-zA-Z0-9_-]+` per segment for unambiguous parsing. Buyers choose dimensions; the protocol does not enumerate them. See [implementation guide § fcap_keys label model](../docs/trusted-match/identity-match-implementation.mdx#fcap_keys-label-model). -### 3. No required canonicalization of user identity +### 3. Cross-identity dedup via `impression_id`, not merge rules -Records are keyed by `(uid_type, user_token)`. Buyers running their own identity graph can canonicalize before write/read; the protocol stays agnostic. Multi-identity merge is handled at eligibility-check time via the policy's `merge_rule`. **MAX recommended** for graph-canonicalizing operators (matches Xandr/DV360/TTD); OR for graphless operators where identities are known not to alias; SUM rarely correct. See [implementation guide § Identity handling](../docs/trusted-match/identity-match-implementation.mdx#identity-handling). +Records are keyed by `(uid_type, user_token)`. Buyers running their own identity graph can canonicalize before write/read; the protocol stays agnostic. Multi-identity dedup is handled at eligibility-check time by deduplicating exposure-log entries by `impression_id` — a single impression resolved to multiple identity tokens has the same `impression_id` written to all identity logs, and the read-time union recovers the count exactly. + +This approach is correct by construction for **graphless and graph-canonicalizing operators alike**, with no merge-rule policy needed. Earlier drafts of this design proposed counter-based exposure tracking with a `merge_rule` (MAX/OR/SUM) policy field; that approach under-counts when identity resolution toggles across impressions (a real concern given Scope3 is graphless). The `adcp-go/targeting/` reference impl already uses log-based dedup; this spec aligns with the existing impl rather than the abandoned counter design. See [implementation guide § Identity handling and cross-identity dedup](../docs/trusted-match/identity-match-implementation.mdx#identity-handling-and-cross-identity-dedup). ### 4. `serve_window_sec` replaces `ttl_sec` @@ -77,13 +79,14 @@ The existing wire `sync_audiences` task has `add[]`/`remove[]` deltas of audienc ## Open questions -1. **Window semantics.** Sliding vs fixed vs exponential decay. Default proposal: fixed window aligned to `window_sec` boundary, with `last_seen` recorded for diagnostics. -2. **Audience-record TTL inside the store.** `sync_audiences` writes are continuous. Proposal: `expires_at` on the audience-meta companion HASH; readers ignore expired entries. +1. **`fcap_keys` generalization in `adcp-go/targeting`.** The reference impl currently uses scalar `package_id` and `campaign_id`; the spec defines arbitrary `fcap_keys` (advertiser, creative, line-item, etc.). Generalizing the reference impl is an in-flight refactor. +2. **Atomic exposure-log append.** Reference impl uses read-modify-write per identity, which is not atomic. Comment in `engine.go:478` explicitly accepts under-counting under contention as benign. Atomic append via Lua or a `Store.Append` method is a deferred optimization. 3. **Cap on policies per fcap_key.** One policy per key for v1; cross-cutting caps (per-day AND per-hour) are expressed as multiple keys. 4. **Identity-graph plug-point.** Pre-write/pre-read interceptors in the SDK. Default: identity passthrough. -5. **Pluggable store interface signatures.** Settled in principle (FrequencyStore / AudienceStore / PackageStore / FcapPolicyStore); specific signatures pinned to `adcp-client#1005`. +5. **Pluggable store interface signatures.** Modeled on `adcp-go/targeting/store.go`. Specific TS/Python signatures pinned to `adcp-client#1005`. 6. **Where do fcap policies live on the wire (if anywhere)?** Currently SDK-only. Could embed in `create_media_buy` packages or add a new wire task. Decide before SDK ships. -7. **Audience strength scores.** ZSET allows per-audience strength; eligibility can apply a floor at check time. v1 ships SET; ZSET migration is buyer-internal. +7. **Audience strength scores.** Reference impl already supports per-segment scores in `UserProfile.Segments`. SDK should expose the strength floor at eligibility time. +8. **Production-deployment perf benchmarks.** Mock-store numbers (`scale_test.go`) cover the in-process eligibility path. Network round-trip to a real co-located valkey + cluster sharding effects need real benchmarks. Tracked as a rollout-plan deliverable. ## Deferred security & privacy issues (follow-up) @@ -134,3 +137,5 @@ Per discussion with @bhuo (Scope3 impression-tracker owner) and Brian: ## Threads consolidated from PR #3359 review - **@oleksandr's normative/reference layering question:** the original spec called the buyer-side valkey schema "normative" while leaving an open question for a pluggable FrequencyStore interface. Inconsistent. Resolved by the three-layer model — wire spec + conformance invariants are normative; reference data model is Scope3's implementation choice, swappable. +- **Brian: counters can't dedup across identities, what about an exposure log keyed per-identity with imp_id-based dedup?** Direct comparison led to walking through correctness (counter+MAX under-counts when identity resolution toggles, log+imp_id is exact), then perf math (counter pipelined ~10-30ms vs log ~3-10ms — log structurally faster). Surveyed `adcp-go/targeting/`: the log approach is **already implemented and shipping**. Spec was speculating about an architecture the codebase had already chosen. Pivot: spec rewritten to match the existing reference impl (per-identity binary exposure log with `impression_id` dedup, single MGet read pattern, sliding window via timestamp filter, prune-on-write). All the merge-rule, FIXED/SLIDING, counter-comparison content removed. Real perf numbers from `targeting/scale_test.go` substituted for envelope math. +- **`fcap_keys` generalization** (Brian's call: "B is what we want"): spec defines the label model (`tenant:dimension:value`) as the design direction. The current reference impl uses scalar `package_id`+`campaign_id`; generalizing it to arbitrary fcap_keys is an in-flight refactor in `adcp-go/targeting`. New buyer impls SHOULD build against the label model directly. From 77d863889de74321c8b094a752411e6694d346b9 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Tue, 28 Apr 2026 03:07:25 -0400 Subject: [PATCH 08/12] =?UTF-8?q?docs(tmp):=20fix=20MDX=20parse=20error=20?= =?UTF-8?q?=E2=80=94=20wrap=20braced=20sets=20in=20code=20spans?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acorn parses bare {imp-001, imp-002, imp-003} as a JSX expression in MDX. Wrap in backticks to render as literal text. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/trusted-match/identity-match-implementation.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/trusted-match/identity-match-implementation.mdx b/docs/trusted-match/identity-match-implementation.mdx index 3219d79dac..3170d9f7de 100644 --- a/docs/trusted-match/identity-match-implementation.mdx +++ b/docs/trusted-match/identity-match-implementation.mdx @@ -314,7 +314,7 @@ user:exposures: += [ imp-004, imp-005, imp-006 ] user:exposures: unchanged ``` -**Step 4** — wire call: same request. Eligibility dedups: union of {imp-001, imp-002, imp-003} (in both logs) ∪ {imp-004, imp-005, imp-006} (only in rampid log) = 6 distinct. Cap of 5 tripped → `eligible_package_ids: []`. +**Step 4** — wire call: same request. Eligibility dedups: union of `{imp-001, imp-002, imp-003}` (in both logs) ∪ `{imp-004, imp-005, imp-006}` (only in rampid log) = 6 distinct. Cap of 5 tripped → `eligible_package_ids: []`. This is the case where counter approaches with merge rules under-count. The log approach with `impression_id` dedup gets the right answer regardless of identity-resolution stability. From 8968f3ebd2b291430d744fc8534c30acdc4b0611 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Tue, 28 Apr 2026 03:15:47 -0400 Subject: [PATCH 09/12] docs(tmp): add measured CPU numbers + algorithmic optimization note MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wrote a combined-load CPU benchmark (TestScale_IdentityMatch_CPU_Combined in adcp-go/targeting/) varying packages × log_size × identities together. Production sizing depends on the combined dimensions, not single-axis scaling. Numbers (mock store, single goroutine, isolated from network): packages log entries identities CPU/eval 100 100 3 90 µs 100 1,000 3 1.0 ms 1,000 1,000 3 7.5 ms ← realistic Scope3 load 1,000 10,000 3 58 ms ← pathological tail Implications: - Median traffic (100 pkg × 100 log): ~11,000 QPS/core. No issue. - Realistic Scope3-shape load: ~130 QPS/core. Comfortable. - Heavy tail (1000 pkg × 10K log × 3 ids): 58 ms CPU per request, outside the 30 ms p95 latency budget. ~17 QPS/core. - Eligibility is embarrassingly parallel — scale-out is "add cores" with no shared-state bottleneck on the eligibility path. Algorithmic optimization documented: The current impl re-scans the exposure log per candidate package (O(packages × log_entries × identities)). A pre-aggregation pass — scan each identity's log once, build map[fcap_key]count for the window, lookup per-package — drops complexity to O(log + packages). Expected ~7× speedup at realistic load, ~6× at the pathological tail. Buyer-side impl concern, not protocol; tracked as a rollout-plan item. Also documented what hasn't been measured: network round-trip under contention, valkey memory/CPU at production scale, tail latency under load, heavy-user impression-distribution shape. Production benchmarks remain a rollout-plan deliverable. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../identity-match-implementation.mdx | 65 +++++++++++++++---- specs/identitymatch-fcap-architecture.md | 3 +- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/docs/trusted-match/identity-match-implementation.mdx b/docs/trusted-match/identity-match-implementation.mdx index 3170d9f7de..17826b16e8 100644 --- a/docs/trusted-match/identity-match-implementation.mdx +++ b/docs/trusted-match/identity-match-implementation.mdx @@ -229,7 +229,9 @@ A simpler synchronous pipeline (decode + write in the same handler) is also vali Numbers below are from [`targeting/scale_test.go`](https://github.com/adcontextprotocol/adcp-go/blob/main/targeting/scale_test.go), against the in-memory mock store, on a single goroutine. They isolate the in-process eligibility logic from network round-trips so you can reason about per-request CPU separately from valkey latency. -**Frequency cap evaluation per IdentityMatch eval, single package, 1 identity:** +### Single-dimension scaling + +**Frequency cap evaluation per eval, single package, 1 identity:** | Prior exposures in user's log | Eval latency | |---|---| @@ -241,28 +243,67 @@ Numbers below are from [`targeting/scale_test.go`](https://github.com/adcontextp Linear scan with binary lazy dedup; flat below 100 entries, sub-millisecond at 10K. -**Resolver (load media buys + build indexes for a seller):** +**Resolver (load media buys + build indexes for a seller, 2 store round-trips):** -| Media buys | Packages | Cold (2 round-trips) | +| Media buys | Packages | Cold | |---|---|---| | 1 | 2 | 2.2 µs | | 100 | 200 | 223 µs | | 500 | 1000 | 2.07 ms | -The resolver runs once per (seller, property, country) and is cached. After cache warmup: +The resolver runs once per (seller, property, country) and is cached. After cache warmup, eligibility evaluation against the cached resolver is dominated by per-package log scans rather than resolver lookup. + +### Combined scaling (packages × log_size × identities) -**Per-IdentityMatch eligibility, varying candidate package count (cached resolver, mock store):** +The combined case is what production sizing depends on. Numbers from `TestScale_IdentityMatch_CPU_Combined` — same mock-store isolation, varying all three dimensions: -| Packages | Dynamic (MGet per request) | Resolved (cached) | Speedup | +| packages | log entries | identities | CPU/eval | |---|---|---|---| -| 10 | 106 µs | 1.6 µs | 65.7× | -| 50 | 138 µs | 30 µs | 4.6× | -| 100 | 429 µs | 28 µs | 15.1× | -| 500 | 3.58 ms | 218 µs | 16.4× | +| 100 | 100 | 3 | 90 µs | +| 100 | 1,000 | 3 | 1.0 ms | +| 100 | 10,000 | 3 | 7.2 ms | +| 1,000 | 100 | 3 | 0.78 ms | +| 1,000 | 1,000 | 3 | 7.5 ms ← realistic Scope3-shape load | +| 1,000 | 10,000 | 3 | 58 ms ← pathological tail | + +CPU scales linearly in `packages × log_entries × identities` — the eligibility logic re-scans each user's exposure log per candidate package via `CheckFrequencyRulesMultiLog` ([`engine.go`](https://github.com/adcontextprotocol/adcp-go/blob/main/targeting/engine.go)). + +### Throughput per CPU core + +| Profile | Per-eval CPU | QPS / core | +|---|---|---| +| Median (100 pkg × 100 log × 3 ids) | 90 µs | ~11,000 | +| Realistic Scope3 (1000 pkg × 1000 log × 3 ids) | 7.5 ms | ~130 | +| Heavy tail (1000 pkg × 10K log × 3 ids) | 58 ms | ~17 | + +Eligibility evaluation has no shared state across requests — embarrassingly parallel. Scaling out is "add cores" or "add instances," with no eligibility-path bottleneck. + +### End-to-end with valkey + +Real-world latency adds the network round-trip to valkey for the user-data MGet. Typically 150 µs–1 ms depending on co-location. End-to-end: + +- **Median**: ~0.5–1.5 ms (CPU + valkey round-trip) +- **Realistic Scope3-shape**: ~7–10 ms +- **Pathological tail**: ~60 ms (outside the 30 ms p95 latency budget — heavy users on busy publishers are the risk) + +### Algorithmic optimization opportunity + +Current impl rescans the exposure log per candidate package: `O(packages × log_entries × identities)`. A pre-aggregation pass — scan each identity's log once, build `map[fcap_key]count` for the relevant window, then lookup per-package — drops complexity to `O(log_entries × identities + packages)`. Expected impact: + +- Realistic Scope3 case (1000 pkg × 1000 log × 3 ids): ~7.5 ms → ~1 ms +- Pathological case (1000 pkg × 10K log × 3 ids): ~58 ms → ~10 ms + +This is a buyer-side impl optimization, not a protocol concern. Tracked as a follow-up in the rollout plan. + +### What hasn't been measured -Real-world latency adds the network round-trip to valkey on top — typically 150 µs–1 ms depending on co-location. End-to-end, an IdentityMatch evaluation against a cached resolver and a co-located valkey is **1–3 ms typical**, dominated by the valkey MGet. +The above is mock-store CPU only. Real production sizing also depends on: +- **Network round-trip to valkey** under contention — needs measurement against the actual deployment +- **valkey memory and CPU** for the user-data working set at production scale +- **Tail-latency behavior under load** (not single-goroutine throughput) +- **Heavy-user impression-distribution shape** — what fraction of users hit 1K+ entries in the 30-day window -The numbers contradict the assumption that scaling to high candidate-package counts requires a counter-based optimization. Log-based eligibility with binary lazy dedup is fast enough at the scales we're targeting. Re-running these tests against the actual deployment topology is on the rollout plan. +Production benchmarks tracked as a rollout-plan deliverable. ## Conformance scenarios diff --git a/specs/identitymatch-fcap-architecture.md b/specs/identitymatch-fcap-architecture.md index c01d8b73c3..30966d1ecf 100644 --- a/specs/identitymatch-fcap-architecture.md +++ b/specs/identitymatch-fcap-architecture.md @@ -86,7 +86,8 @@ The existing wire `sync_audiences` task has `add[]`/`remove[]` deltas of audienc 5. **Pluggable store interface signatures.** Modeled on `adcp-go/targeting/store.go`. Specific TS/Python signatures pinned to `adcp-client#1005`. 6. **Where do fcap policies live on the wire (if anywhere)?** Currently SDK-only. Could embed in `create_media_buy` packages or add a new wire task. Decide before SDK ships. 7. **Audience strength scores.** Reference impl already supports per-segment scores in `UserProfile.Segments`. SDK should expose the strength floor at eligibility time. -8. **Production-deployment perf benchmarks.** Mock-store numbers (`scale_test.go`) cover the in-process eligibility path. Network round-trip to a real co-located valkey + cluster sharding effects need real benchmarks. Tracked as a rollout-plan deliverable. +8. **Production-deployment perf benchmarks.** Mock-store numbers cover the in-process eligibility path: realistic Scope3-shape load (1000 pkg × 1000 log × 3 ids) is ~7.5 ms CPU/request — comfortable. Pathological tail (1000 pkg × 10K log × 3 ids) is ~58 ms CPU/request — outside the 30 ms p95 budget. Network round-trip to real co-located valkey, cluster sharding, and tail-latency under load all need real benchmarks. Tracked as a rollout-plan deliverable. +9. **Pre-aggregate-per-fcap_key optimization.** Current impl scans the exposure log per candidate package: O(packages × log_entries × identities). Scanning once and building a `map[fcap_key]count` would drop to O(log_entries × identities + packages) — ~7× speedup at realistic load, ~6× at the pathological tail. Buyer-side optimization, not a protocol concern. ## Deferred security & privacy issues (follow-up) From 814b6d8ef0dd6bae2721feaa02d4aec9ae7768af Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Tue, 28 Apr 2026 06:38:11 -0400 Subject: [PATCH 10/12] docs(tmp): link spec to upstream perf PR adcp-go#103 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pre-aggregation optimization landed as a real upstream PR. Update the open-questions item and the implementation-guide perf section to reflect measured speedups (11-38×) instead of the earlier "expected ~7×" envelope estimate. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../identity-match-implementation.mdx | 15 ++++++++++----- specs/identitymatch-fcap-architecture.md | 2 +- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/docs/trusted-match/identity-match-implementation.mdx b/docs/trusted-match/identity-match-implementation.mdx index 17826b16e8..0f662f74bf 100644 --- a/docs/trusted-match/identity-match-implementation.mdx +++ b/docs/trusted-match/identity-match-implementation.mdx @@ -286,14 +286,19 @@ Real-world latency adds the network round-trip to valkey for the user-data MGet. - **Realistic Scope3-shape**: ~7–10 ms - **Pathological tail**: ~60 ms (outside the 30 ms p95 latency budget — heavy users on busy publishers are the risk) -### Algorithmic optimization opportunity +### Algorithmic optimization (landed) -Current impl rescans the exposure log per candidate package: `O(packages × log_entries × identities)`. A pre-aggregation pass — scan each identity's log once, build `map[fcap_key]count` for the relevant window, then lookup per-package — drops complexity to `O(log_entries × identities + packages)`. Expected impact: +Original impl rescanned the exposure log per candidate package: `O(packages × log_entries × identities)`. The optimization pre-buckets the log by filter hash once per request; per-package check walks only the matching bucket. Heuristic-gated at `numPackages > 50` so small-package requests stay on the naive path (avoids regression on small requests with heavy logs). -- Realistic Scope3 case (1000 pkg × 1000 log × 3 ids): ~7.5 ms → ~1 ms -- Pathological case (1000 pkg × 10K log × 3 ids): ~58 ms → ~10 ms +Tracked at [adcp-go#103](https://github.com/adcontextprotocol/adcp-go/pull/103). Measured speedups vs the original implementation: -This is a buyer-side impl optimization, not a protocol concern. Tracked as a follow-up in the rollout plan. +| packages | log entries | identities | Before | After | Speedup | +|----------|------------:|-----------:|----------:|---------:|--------:| +| 1000 | 100 | 3 | 784 µs | 71 µs | 11.0× | +| 1000 | 1000 | 3 | 7,566 µs | 287 µs | 26.4× | +| 1000 | 10000 | 3 | 57,861 µs | 1,500 µs | ~38× | + +The pathological tail drops from 58ms to 1.5ms, well within the latency budget. Below the threshold (≤50 packages), the naive path is preserved. ### What hasn't been measured diff --git a/specs/identitymatch-fcap-architecture.md b/specs/identitymatch-fcap-architecture.md index 30966d1ecf..6b6862de08 100644 --- a/specs/identitymatch-fcap-architecture.md +++ b/specs/identitymatch-fcap-architecture.md @@ -87,7 +87,7 @@ The existing wire `sync_audiences` task has `add[]`/`remove[]` deltas of audienc 6. **Where do fcap policies live on the wire (if anywhere)?** Currently SDK-only. Could embed in `create_media_buy` packages or add a new wire task. Decide before SDK ships. 7. **Audience strength scores.** Reference impl already supports per-segment scores in `UserProfile.Segments`. SDK should expose the strength floor at eligibility time. 8. **Production-deployment perf benchmarks.** Mock-store numbers cover the in-process eligibility path: realistic Scope3-shape load (1000 pkg × 1000 log × 3 ids) is ~7.5 ms CPU/request — comfortable. Pathological tail (1000 pkg × 10K log × 3 ids) is ~58 ms CPU/request — outside the 30 ms p95 budget. Network round-trip to real co-located valkey, cluster sharding, and tail-latency under load all need real benchmarks. Tracked as a rollout-plan deliverable. -9. **Pre-aggregate-per-fcap_key optimization.** Current impl scans the exposure log per candidate package: O(packages × log_entries × identities). Scanning once and building a `map[fcap_key]count` would drop to O(log_entries × identities + packages) — ~7× speedup at realistic load, ~6× at the pathological tail. Buyer-side optimization, not a protocol concern. +9. **Pre-aggregate-per-fcap_key optimization** ([adcp-go#103](https://github.com/adcontextprotocol/adcp-go/pull/103) — landed as in-flight upstream PR). Pre-buckets the exposure log by filter hash once per request; per-package check walks only the matching bucket instead of re-scanning the full log. Heuristic-gated at `numPackages > 50` so small-package requests stay on the naive path (avoids a measured ~3× regression on small requests with heavy logs). Measured speedups: 1000 pkg × 1000 log × 3 ids: ~26×; 1000 pkg × 10K log × 3 ids: ~38× (pathological tail drops from 58ms to ~1.5ms, well within the latency budget). ## Deferred security & privacy issues (follow-up) From 3b8784648d5f9658e4672252375d5e00ce2f5d2c Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Tue, 28 Apr 2026 06:43:10 -0400 Subject: [PATCH 11/12] docs(tmp): make impression_id global-uniqueness requirement explicit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cross-identity dedup contract depends on impression_id being unique not just per identity-list but across all sellers, all sources, all time. Two sellers' impressions on the same user must not collide — collision would silently merge distinct impressions at read-time dedup and under-count the cap. Make this explicit in the implementation guide and the conformance invariants: - Globally unique (UUIDv4 or equivalent ≥122 bits randomness) - Generated by the buyer's impression handler at TMPX decode (not seller-supplied, not the TMPX nonce — that's per-IdentityMatch- evaluation and shared across impressions in the serve window — and not publisher-supplied) - Same impression_id written to ALL resolved identity logs for one impression - Pixel retries are a separate concern (per-pixel idempotency); cross- identity dedup and pixel-retry dedup are different problems Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/trusted-match/identity-match-implementation.mdx | 9 +++++++++ docs/trusted-match/specification.mdx | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/docs/trusted-match/identity-match-implementation.mdx b/docs/trusted-match/identity-match-implementation.mdx index 0f662f74bf..b12ff54dff 100644 --- a/docs/trusted-match/identity-match-implementation.mdx +++ b/docs/trusted-match/identity-match-implementation.mdx @@ -61,8 +61,17 @@ The reference impl handles this cleanly: **per-impression `impression_id` writte This is why the reference impl uses a log rather than counters: counters can't dedup across identities without an external mechanism. The log approach is correct by construction for graphless and graph-canonicalizing operators alike. +### `impression_id` generation rules + The TMPX impression callback decodes the resolved identities (typically up to 3, per the [TMPX size budget](/docs/trusted-match/specification#size-budget)). The impression handler generates one `impression_id` at decode time and appends an `ExposureEntry` with that id to each identity's log. +Critical invariants for `impression_id`: + +1. **Globally unique across all sellers, all sources, all time.** A single buyer agent serves impressions sourced from many sellers. If two sellers' impressions on the same user collide on `impression_id`, the read-time dedup falsely merges them as one impression and the cap under-counts. Use UUIDv4 (≥122 bits randomness) or an equivalent generator with collision-resistance across distributed instances. +2. **Generated by the buyer's impression handler at TMPX decode**, not by the seller, the publisher, the router, or the TMPX nonce. The TMPX nonce is per-IdentityMatch-evaluation and SHARED across all impressions in the serve window — not unique per impression. Seller-supplied IDs would collide across sellers. Publisher-supplied IDs would collide across publishers. Only the buyer agent has the global view to mint a unique id. +3. **One `impression_id` per impression, written to ALL of the user's resolved identity logs for that impression.** This is what enables the read-time dedup. If the buyer instead generated different ids per identity, the dedup contract breaks and the same impression would count once per resolved identity. +4. **Pixel retries are a separate concern.** The same pixel firing twice (network retry, refresh, etc.) MUST NOT generate two `impression_id`s — that would double-count a single impression. Either (a) the impression handler dedupes incoming requests by a separate idempotency key carried in the pixel URL or `Idempotency-Key` header, or (b) the deployment accepts a small over-count from pixel retries as benign. Cross-identity dedup and per-pixel idempotency are different problems with different mitigations. + ## Reference data model (valkey-backed, log-based) This is the layout used by [`adcp-go/targeting/`](https://github.com/adcontextprotocol/adcp-go/tree/main/targeting). Storage choice is implementation; any backend that satisfies the conformance invariants is conformant. diff --git a/docs/trusted-match/specification.mdx b/docs/trusted-match/specification.mdx index 75940e80f0..40359f618a 100644 --- a/docs/trusted-match/specification.mdx +++ b/docs/trusted-match/specification.mdx @@ -217,7 +217,7 @@ The publisher enforces allocation rules (competitive separation, pod composition A conformant IdentityMatch service MUST compute `eligible_package_ids` such that, for each `package_id ∈ request.package_ids`, the package is included in `eligible_package_ids` if and only if **all** of the following hold: 1. **Audience eligibility.** Either the package has no audience requirement, OR there exists at least one audience identifier `a` such that `a` is in the package's required audience set AND `a` is in the audience-membership of at least one identity `i ∈ request.identities` (the union across the user's resolved identities intersects the package's required audiences). -2. **Frequency cap eligibility.** For every fcap policy `k` declared on the package, the count of distinct impressions for `request.identities` against `k` within `[now - window_sec, now]` is strictly less than the policy's `max_count`. The "distinct impressions" count deduplicates by `impression_id` across the user's resolved identities — an impression that was resolved with multiple identity tokens counts once. See [Identity handling](/docs/trusted-match/identity-match-implementation#identity-handling-and-cross-identity-dedup) in the implementation guide for the dedup mechanics. +2. **Frequency cap eligibility.** For every fcap policy `k` declared on the package, the count of distinct impressions for `request.identities` against `k` within `[now - window_sec, now]` is strictly less than the policy's `max_count`. The "distinct impressions" count deduplicates by `impression_id` across the user's resolved identities — an impression that was resolved with multiple identity tokens counts once. The `impression_id` MUST be **globally unique across all sellers, sources, and time**, generated by the buyer's impression handler at TMPX decode (not seller-supplied, not the TMPX nonce, not publisher-supplied) — collisions on `impression_id` across sellers would silently merge distinct impressions and under-count caps. See [Identity handling](/docs/trusted-match/identity-match-implementation#identity-handling-and-cross-identity-dedup) and [`impression_id` generation rules](/docs/trusted-match/identity-match-implementation#impression_id-generation-rules) in the implementation guide. 3. **Active state.** Packages or policies marked inactive MUST be treated as if absent. 4. **Audience freshness.** If the buyer's audience pipeline publishes a freshness deadline and the current time is past it, that audience-membership entry MUST NOT contribute to (1). From 9e57653c0ad784c038179cb1db1f644ea4370e7f Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 28 Apr 2026 13:27:46 +0000 Subject: [PATCH 12/12] spec(tmp): simplify fcap_keys format, remove ttl_sec, add pre-launch note Three simplifications per @bokelley review comment: 1. fcap_keys format: dimension:value (drop required tenant prefix). Multi-tenant operators may still use tenant:dimension:value as a deployment convention, but the protocol does not mandate it. 2. ttl_sec: removed outright. TMP is pre-launch (experimental, pre-3.0.0 GA) and not subject to deprecation cycles. serve_window_sec is the field; no rename framing or notice window needed. 3. Pre-launch note: added one-line statement to the Experimental callout in specification.mdx that fields on this surface are not subject to deprecation cycles until 3.0.0 GA. https://claude.ai/code/session_01RVevfeAnA9oXcJAkhRjHw6 --- docs/trusted-match/buyer-guide.mdx | 11 ++-- .../identity-match-implementation.mdx | 54 +++++++++---------- docs/trusted-match/specification.mdx | 11 ++-- specs/identitymatch-fcap-architecture.md | 10 ++-- .../source/tmp/identity-match-response.json | 8 +-- 5 files changed, 42 insertions(+), 52 deletions(-) diff --git a/docs/trusted-match/buyer-guide.mdx b/docs/trusted-match/buyer-guide.mdx index 4d513e09c1..964abb1e02 100644 --- a/docs/trusted-match/buyer-guide.mdx +++ b/docs/trusted-match/buyer-guide.mdx @@ -16,7 +16,7 @@ A buyer agent exposes two HTTP/2 endpoints under a single base URL — `POST /co | Message type | Receives | Returns | |---|---|---| | `context_match_request` | Page/content signals, placement, geo | Offers with creative manifests | -| `identity_match_request` | Opaque user token, all active package IDs | Eligible package IDs + serve window | +| `identity_match_request` | Opaque user token, all active package IDs | Eligible package IDs + `serve_window_sec` | Each endpoint handles one message type. Both must respond in under 50ms. The router enforces this budget and will skip slow providers. @@ -124,7 +124,7 @@ The router sends you one or more opaque identity tokens and a list of ALL your a } ``` -Return only the package IDs that pass your eligibility checks. Packages not in the list are treated as ineligible. The `serve_window_sec` is a **per-package single-shot fcap**: after the publisher serves the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again. Default 60s, max 300s. This is not a router response cache TTL — see [The serve-window contract](#the-serve-window-contract). The deprecated `ttl_sec` field still exists during a 6-week deprecation window; senders SHOULD populate both with the same value. +Return only the package IDs that pass your eligibility checks. Packages not in the list are treated as ineligible. The `serve_window_sec` is a **per-package single-shot fcap**: after the publisher serves the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again. Default 60s, max 300s. This is not a router response cache TTL — see [The serve-window contract](#the-serve-window-contract). **What you never receive** in Identity Match: page URLs, content topics, keywords, article text, or any content signal. You cannot determine what the user is looking at. @@ -147,12 +147,12 @@ Cross-publisher frequency capping is the primary use case for Identity Match. Yo - **Count impressions** per fcap key (campaign, advertiser, creative, line item, or whatever dimensions you cap on) per resolved user identity - **Apply policies** with a window and max count -- **Merge across identities** for users with multiple resolved tokens (RampID + ID5 + MAID for the same person) — see [merge rules](/docs/trusted-match/identity-match-implementation#merge-rules) +- **Merge across identities** for users with multiple resolved tokens (RampID + ID5 + MAID for the same person) — see [identity handling](/docs/trusted-match/identity-match-implementation#identity-handling-and-cross-identity-dedup) - **Exclude packages** from `eligible_package_ids` when any cap on the package trips Because Identity Match runs across all publishers using TMP, a user who saw your ad on Publisher A will correctly show as over-frequency on Publisher B — even though you can't see which publisher sent the request. -For the implementation details — the fcap_keys label model, the reference valkey data model, merge_rule semantics, audience and exposure record shapes, the SDK primitives, and Redis-command walkthroughs for the conformance scenarios — see [Identity Match implementation](/docs/trusted-match/identity-match-implementation). +For the implementation details — the fcap_keys label model, the reference valkey data model, audience and exposure record shapes, the SDK primitives, and conformance scenarios — see [Identity Match implementation](/docs/trusted-match/identity-match-implementation). ### How Buyers Learn About Exposures @@ -215,7 +215,6 @@ The router MAY apply an internal deduplication cache keyed by `{identities_hash, **Choosing a serve_window_sec value**: Default 60 seconds. Range 1–300. Anything longer than 300 makes per-package fcap too coarse for typical campaigns. Anything shorter than your IdentityMatch round-trip just adds load. 60 is a good default; tune downward if eligibility state shifts faster (close to a cap, audience just changed) or upward (max 300) if your IdentityMatch service is at load and the campaigns are tolerant of coarser fcap. -The deprecated `ttl_sec` field still exists during the 6-week deprecation window. Senders SHOULD populate both `ttl_sec` and `serve_window_sec` with the same value during the transition; receivers SHOULD prefer `serve_window_sec` when both are present. `ttl_sec` is removed in a future 3.0.x release ≥ 6 weeks after the 2026-04-26 notice. ## Performance Requirements @@ -239,7 +238,7 @@ Buyers receive real-time per-user exposure signals via the `{TMPX}` macro. The I | | OpenRTB | TMP | |---|---|---| | **You receive** | Full bid request (user + content + device) | Either content OR identity, never both | -| **You return** | Bid price | Offer (creative manifest) or eligible package IDs + TTL | +| **You return** | Bid price | Offer (creative manifest) or eligible package IDs + serve window | | **Auction** | Exchange runs auction | No auction — publisher joins locally | | **Frequency** | Per-DSP only | Cross-publisher via Identity Match | | **Integration** | Per-exchange SSP adapter | Two endpoints (context + identity), any surface | diff --git a/docs/trusted-match/identity-match-implementation.mdx b/docs/trusted-match/identity-match-implementation.mdx index b12ff54dff..d7245110ef 100644 --- a/docs/trusted-match/identity-match-implementation.mdx +++ b/docs/trusted-match/identity-match-implementation.mdx @@ -21,26 +21,24 @@ The reference data model on this page is **valkey-backed and log-based**, matchi ## fcap_keys label model -A frequency cap is identified by a tag of the form `tenant:dimension:value`: +A frequency cap is identified by a tag of the form `dimension:value`: ``` -buyer-acme:campaign:42 -buyer-acme:campaign_group:7 -buyer-acme:advertiser:13 -buyer-acme:creative:8 +campaign:42 +campaign_group:7 +advertiser:13 +creative:8 ``` Packages declare which `fcap_keys` they belong to; exposure log entries record which keys the impression counts toward; policies (window, max count) are attached per-key. ``` -package 2342: fcap_keys ["buyer-acme:campaign:42", - "buyer-acme:campaign_group:7", - "buyer-acme:advertiser:13"] -policy "buyer-acme:campaign:42": {window_sec: 60, max_count: 5} -policy "buyer-acme:advertiser:13": {window_sec: 86400, max_count: 20} +package 2342: fcap_keys ["campaign:42", "campaign_group:7", "advertiser:13"] +policy "campaign:42": {window_sec: 60, max_count: 5} +policy "advertiser:13": {window_sec: 86400, max_count: 20} ``` -**Tenant prefix is required.** Multi-tenant buyer-agent fleets host multiple advertiser orgs. Without a tenant prefix, `campaign:42` collides on shared state between tenants and tenant A learns B's reach by watching counters or log entries. SDKs MUST refuse keys without a tenant prefix. +Multi-tenant operators adopt a tenant prefix (`buyer-acme:campaign:42`) as a deployment convention to prevent key collisions across advertiser orgs on shared state — this is an operator-level choice, not a protocol requirement. **Charset constraint.** Each segment matches `[a-zA-Z0-9_-]+` so the `:` delimiter is unambiguous. URL-bearing or otherwise colon-bearing values must be hashed or shortened before use as a value segment. @@ -94,7 +92,7 @@ value: [ Each entry records: - `impression_id` — generated by the impression handler at TMPX decode (UUID, ~16-20 bytes serialized). Used for cross-identity dedup at read time. Same value written to every identity log for one impression. -- `fcap_keys[]` — the labels this impression counts toward (e.g. `["buyer-acme:campaign:42", "buyer-acme:advertiser:13"]`). +- `fcap_keys[]` — the labels this impression counts toward (e.g. `["campaign:42", "advertiser:13"]`). - `timestamp` — unix seconds when the impression occurred. ### User profile (per identity, optional) @@ -114,7 +112,7 @@ type: STRING (JSON-encoded PackageIdentityConfig) key: package:identity:{package_id} value: { target_segments: ["seg_a", "seg_b"], - fcap_keys: ["buyer-acme:campaign:42", "buyer-acme:advertiser:13"], + fcap_keys: ["campaign:42", "advertiser:13"], active: true, updated_at: } @@ -183,10 +181,10 @@ writeExposure(log, fcap_keys, store_context) -> { ok } Plus the buyer-side management plane: ``` -upsertAudience(audience_id, members, opts) // wraps sync_audiences add/remove deltas +upsertAudience(audience_id, members, opts) // wraps sync_audiences add/remove deltas upsertPackage(seller_agent_url, package_id, fcap_keys, audience_ids, opts) -upsertFcapPolicy(fcap_key, {window_sec, max_count}) -inspectExposures(uid_type, user_token, fcap_key?) // test helper; returns matching log entries +upsertFcapPolicy(fcap_key, {window_sec, max_count}) // e.g. "campaign:42" +inspectExposures(uid_type, user_token, fcap_key?) // test helper; returns matching log entries ``` Plus HPKE encrypt/decrypt as net-new SDK primitives (X25519 KEM, ChaCha20-Poly1305, HKDF-SHA256 per RFC 9180 `mode_base`). The encrypt path is needed by the IdentityMatch service emitting TMPX; decrypt by the impression handler invoking `decodeTmpx`. @@ -323,7 +321,7 @@ Production benchmarks tracked as a rollout-plan deliverable. Five scenarios mapping to the conformance invariants. SDK-driven integration tests can run these against a live valkey + IdentityMatch service. Scenarios use the `fcap_keys` label model documented above; the reference impl is mid-generalization from scalar `package_id`/`campaign_id` to arbitrary fcap_keys. -All scenarios assume `serve_window_sec = 60` (default), `tenant = "buyer-acme"`, `package = "pkg-42"`, `seller_agent_url = "https://seller-a.example"`. +All scenarios assume `serve_window_sec = 60` (default), `package = "pkg-42"`, `seller_agent_url = "https://seller-a.example"`. ### 1. Per-key cap trips after N exposures @@ -332,16 +330,16 @@ All scenarios assume `serve_window_sec = 60` (default), `tenant = "buyer-acme"`, ``` SET package:identity:pkg-42 = { target_segments: ["seg_test"], - fcap_keys: ["buyer-acme:campaign:42"], + fcap_keys: ["campaign:42"], active: true } -SET fcap_policy:buyer-acme:campaign:42 = { window_sec: 86400, max_count: 5, active: true } +SET fcap_policy:campaign:42 = { window_sec: 86400, max_count: 5, active: true } SET user:profile: = { segments: { "seg_test": 1.0 } } ``` **Step 1** — wire call: `identity_match_request {identities: [{rampid, abc}], package_ids: [pkg-42]}` → expect `eligible_package_ids: [pkg-42]`, `serve_window_sec: 60`, `tmpx: `. -**Step 2** — buyer-internal, 5 impressions: for each, decode TMPX, generate `impression_id`, write entry to user's exposure log with `fcap_keys: ["buyer-acme:campaign:42"]`. After 5 impressions the log contains 5 entries matching that key in the current window. +**Step 2** — buyer-internal, 5 impressions: for each, decode TMPX, generate `impression_id`, write entry to user's exposure log with `fcap_keys: ["campaign:42"]`. After 5 impressions the log contains 5 entries matching that key in the current window. **Step 3** — wire call: same `identity_match_request`. Eligibility scans the log, counts 5 matching entries, compares to `max_count: 5` → cap tripped → `eligible_package_ids: []`. @@ -353,9 +351,9 @@ User has two resolved identities (`rampid:abc` and `id5:def`). Setup as Scenario ``` user:exposures: = [ - { impression_id: "imp-001", fcap_keys: [...campaign:42], ts: ... }, - { impression_id: "imp-002", fcap_keys: [...campaign:42], ts: ... }, - { impression_id: "imp-003", fcap_keys: [...campaign:42], ts: ... } + { impression_id: "imp-001", fcap_keys: ["campaign:42"], ts: ... }, + { impression_id: "imp-002", fcap_keys: ["campaign:42"], ts: ... }, + { impression_id: "imp-003", fcap_keys: ["campaign:42"], ts: ... } ] user:exposures: = [ same three entries ] ``` @@ -394,16 +392,16 @@ SET user:profile: = { segments: { } } Setup: two packages on different sellers, both mapped to the same `advertiser:13` cap: ``` -SET package:identity:pkg-A = { fcap_keys: ["buyer-acme:advertiser:13"], active: true } -SET package:identity:pkg-B = { fcap_keys: ["buyer-acme:advertiser:13"], active: true } -SET fcap_policy:buyer-acme:advertiser:13 = { window_sec: 86400, max_count: 10, active: true } +SET package:identity:pkg-A = { fcap_keys: ["advertiser:13"], active: true } +SET package:identity:pkg-B = { fcap_keys: ["advertiser:13"], active: true } +SET fcap_policy:advertiser:13 = { window_sec: 86400, max_count: 10, active: true } ``` **Step 1** — wire call from Seller A: `package_ids: [pkg-A]` → eligible. -**Step 2** — buyer-internal, 10 impressions on pkg-A, each entry's `fcap_keys` includes `buyer-acme:advertiser:13`. +**Step 2** — buyer-internal, 10 impressions on pkg-A, each entry's `fcap_keys` includes `advertiser:13`. -**Step 3** — wire call from Seller B: `package_ids: [pkg-B]`. Eligibility scans the user's log, counts entries matching `buyer-acme:advertiser:13` within window: 10 ≥ max_count → `eligible_package_ids: []`. +**Step 3** — wire call from Seller B: `package_ids: [pkg-B]`. Eligibility scans the user's log, counts entries matching `advertiser:13` within window: 10 ≥ max_count → `eligible_package_ids: []`. The advertiser-level cap enforces across sellers because the `fcap_key` is shared. No cross-seller coordination needed; the buyer agent is the single source of truth. diff --git a/docs/trusted-match/specification.mdx b/docs/trusted-match/specification.mdx index 40359f618a..5169aed477 100644 --- a/docs/trusted-match/specification.mdx +++ b/docs/trusted-match/specification.mdx @@ -7,7 +7,7 @@ description: Authoritative message type definitions, field tables, privacy requi # Trusted Match Protocol Specification -**Experimental.** The Trusted Match Protocol is part of AdCP 3.0 as an experimental surface — it may change between 3.x releases with at least 6 weeks' notice. Sellers implementing TMP MUST declare `trusted_match.core` in `experimental_features`. See [experimental status](/docs/reference/experimental-status) for the full contract. +**Experimental.** The Trusted Match Protocol is part of AdCP 3.0 as an experimental surface — it may change between 3.x releases with at least 6 weeks' notice. Sellers implementing TMP MUST declare `trusted_match.core` in `experimental_features`. See [experimental status](/docs/reference/experimental-status) for the full contract. Fields on this surface are not subject to deprecation cycles until 3.0.0 GA. This is the authoritative reference for the Trusted Match Protocol (TMP). For conceptual introductions, see the [overview](/docs/trusted-match/) and [core concepts](/docs/trusted-match/context-and-identity). @@ -24,7 +24,7 @@ Specific areas expected to evolve include TMPX exposure tokens, country-partitio | **Offer** | A buyer's response to a context match request. Ranges from simple activation (package_id only) to rich proposals with brand, price, summary, and creative manifest. | | **Available package** | A package from an active media buy that is eligible for evaluation on a given placement. Package metadata — including the originating seller agent — is synced at media buy time. See [Package Sync](#package-sync). | | **Seller agent** | The buyer-side agent that sold the package into a publisher. Identified by the agent URL declared in the publisher's `adagents.json` `authorized_agents[].url`. Every `AvailablePackage` is bound to exactly one seller agent at sync time. | -| **Eligibility** | List of eligible package IDs returned by Identity Match, plus a TTL caching contract. The buyer computes eligibility from frequency caps, audience membership, and other signals; the reasons are opaque to the publisher. | +| **Eligibility** | List of eligible package IDs returned by Identity Match, plus a serve-window throttle. The buyer computes eligibility from frequency caps, audience membership, and other signals; the reasons are opaque to the publisher. | | **Artifact** | A typed content reference associated with a publisher property (article URL, episode EIDR, show Gracenote ID, music ISRC, product GTIN, conversation turn). Each artifact has a `type` and `value`. Referenced in context match requests. | | **Temporal decorrelation** | Random delay introduced between Context Match and Identity Match requests to prevent timing-based correlation. | @@ -195,15 +195,14 @@ Each entry in `identities` is an `{user_token, uid_type}` pair: ### IdentityMatchResponse -Returned by the buyer agent. A list of eligible package IDs with a caching TTL. +Returned by the buyer agent. A list of eligible package IDs with a serve-window throttle. | Field | Type | Required | Description | |---|---|---|---| | `type` | string | Yes | `"identity_match_response"`. Message type discriminator for deserialization. | | `request_id` | string | Yes | Echo of the request's `request_id`. | | `eligible_package_ids` | List\ | Yes | Package IDs the user is eligible for. Packages not listed are ineligible. | -| `serve_window_sec` | integer | Yes (additive — see deprecation note) | Per-package single-shot fcap window, in seconds. Range: 1–300. Default: 60. After serving the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again. This is **not** a router response cache TTL — it is a buyer-asserted serve throttle. Multi-impression frequency caps are handled separately by buyer-side exposure records, updated out-of-band via TMPX impression callbacks. | -| `ttl_sec` | integer | Yes (deprecated) | DEPRECATED — use `serve_window_sec`. Originally documented as a router response cache TTL but operationally functioned as a per-package serve throttle, conflating two distinct concerns. Senders during the deprecation window SHOULD populate both `ttl_sec` and `serve_window_sec` with the same value; receivers SHOULD prefer `serve_window_sec` when both are present. Removed in a future 3.0.x release ≥ 6 weeks after the deprecation notice published 2026-04-26 (earliest landing 2026-06-07). | +| `serve_window_sec` | integer | Yes | Per-package single-shot fcap window, in seconds. Range: 1–300. Default: 60. After serving the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again. This is **not** a router response cache TTL — it is a buyer-asserted serve throttle. Multi-impression frequency caps are handled separately by buyer-side exposure records, updated out-of-band via TMPX impression callbacks. | | `tmpx` | string | No | HPKE-encrypted exposure token containing resolved user identity tokens. The publisher substitutes this into creative tracking URLs as `{TMPX}`. The buyer's impression pixel receives the token, enabling real-time per-user frequency state updates. Wire format: `kid.base64url_nopad(ciphertext)` (unpadded, no `=` characters). Publishers MUST treat this value as opaque pass-through data. | The response includes eligible package IDs, a serve-window throttle, and an optional `tmpx` field. The TMPX token is an HPKE-encrypted exposure token that flows through creative tracking URLs to the buyer's impression pixel, enabling real-time per-user frequency state updates without exposing user identity to the publisher. The buyer computes eligibility from whatever identity signals they have (frequency caps, audience membership, purchase history) and returns only the packages that pass. The publisher does not need to know why a package was excluded — just which packages are eligible. @@ -659,7 +658,7 @@ Context Match responses are cacheable because the same packages are evaluated fo - Providers MAY include a `cache_ttl` field (integer, seconds) in Context Match responses to override the default. Routers MUST respect this value when present. - Identity Match responses are bound by `serve_window_sec` (per-package single-shot fcap, max 300s, default 60s). Routers MAY apply an internal deduplication cache keyed on `{identities_hash, provider_id, package_ids_hash, consent_hash}`, where `identities_hash` is the SHA-256 of the canonical `identities` bytes defined in [Identity Match signed fields](#identity-match-signed-fields) (computed over the per-provider filtered subset); `package_ids_hash` is SHA-256 over the JCS serialization of the sorted `package_ids` array; `consent_hash` is SHA-256 over the JCS serialization of the request's `consent` object (or JCS `null` when the field is absent — this distinguishes "consent unknown" from an explicit-empty consent object). JCS framing prevents delimiter-injection: raw consent strings or package IDs containing `|`, `,`, or `\n` cannot collide two distinct inputs. Including the identity set ensures that adding or removing tokens produces a distinct cache entry. Including the package list hash ensures cached responses are invalidated when the active package set changes (e.g., a new media buy activates). Including the consent hash prevents eligibility decisions taken under one consent state from being served under another. The publisher's binding contract is the serve-window throttle, not the router's internal cache window. - When a provider's targeting configuration changes (new packages, updated targeting rules), the provider SHOULD return `"cache_ttl": 0` (Context Match) or `"serve_window_sec": 1` (Identity Match) until the change has propagated, then resume normal values. -- `cache_ttl` (Context Match) has a schema-enforced maximum of 86400 seconds. `serve_window_sec` is bounded at 300 seconds — longer windows make per-package fcap too coarse for typical campaigns, shorter than the IdentityMatch round-trip wastes the throttle. The deprecated `ttl_sec` field retains its existing 86400 maximum until removal in a future 3.0.x release ≥ 6 weeks after the 2026-04-26 deprecation notice. +- `cache_ttl` (Context Match) has a schema-enforced maximum of 86400 seconds. `serve_window_sec` is bounded at 300 seconds — longer windows make per-package fcap too coarse for typical campaigns, shorter than the IdentityMatch round-trip wastes the throttle. ## Conformance Levels diff --git a/specs/identitymatch-fcap-architecture.md b/specs/identitymatch-fcap-architecture.md index 6b6862de08..0b2c1c315e 100644 --- a/specs/identitymatch-fcap-architecture.md +++ b/specs/identitymatch-fcap-architecture.md @@ -1,13 +1,13 @@ # IdentityMatch & Frequency Capping — Architecture Spec **Status**: landed (architecture decisions). Implementation guidance promoted to `docs/trusted-match/`. -**Target release**: 3.0.1 (additive wire change), then deprecation removal in a 3.0.x ≥ 6 weeks after. +**Target release**: 3.0.1 (additive wire change). **Branch**: `bokelley/idmatch-design` **PR**: [#3359](https://github.com/adcontextprotocol/adcp/pull/3359) This spec captures the architecture decisions behind the buyer-side IdentityMatch surface in TMP. It is a **design-history document**, not an implementation reference — the authoritative implementation guidance lives in: -- [`docs/trusted-match/specification.mdx`](../docs/trusted-match/specification.mdx) — wire spec (normative): `serve_window_sec` field, `ttl_sec` deprecation, conformance invariants for IdentityMatch eligibility, TMPX binary format. +- [`docs/trusted-match/specification.mdx`](../docs/trusted-match/specification.mdx) — wire spec (normative): `serve_window_sec` field, conformance invariants for IdentityMatch eligibility, TMPX binary format. - [`docs/trusted-match/identity-match-implementation.mdx`](../docs/trusted-match/identity-match-implementation.mdx) — implementation guidance (non-normative): `fcap_keys` label model, reference valkey data model, merge rules, SDK primitives, pluggable store interfaces, production topology, conformance scenarios. - [`docs/trusted-match/buyer-guide.mdx`](../docs/trusted-match/buyer-guide.mdx) — buyer-agent integration walkthrough; updated for `serve_window_sec` semantic. - [`docs/trusted-match/migration-from-axe.mdx`](../docs/trusted-match/migration-from-axe.mdx) — adds OpenRTB 2.6 `User.eids` cross-walk for buyers bridging from OpenRTB-shaped pipelines. @@ -40,7 +40,7 @@ The protocol describes **what** the service must compute, not **how** it stores ### 2. `fcap_keys[]` as a label model, not hierarchy -`tenant:dimension:value` (e.g. `buyer-acme:campaign:42`, `buyer-acme:advertiser:13`). Tenant prefix required to prevent cross-tenant counter pollution in multi-tenant fleets. Charset constraint `[a-zA-Z0-9_-]+` per segment for unambiguous parsing. Buyers choose dimensions; the protocol does not enumerate them. See [implementation guide § fcap_keys label model](../docs/trusted-match/identity-match-implementation.mdx#fcap_keys-label-model). +`dimension:value` (e.g. `campaign:42`, `advertiser:13`). Multi-tenant operators adopt a tenant prefix as a deployment convention (e.g. `buyer-acme:campaign:42`) — not a protocol requirement. Charset constraint `[a-zA-Z0-9_-]+` per segment for unambiguous parsing. Buyers choose dimensions; the protocol does not enumerate them. See [implementation guide § fcap_keys label model](../docs/trusted-match/identity-match-implementation.mdx#fcap_keys-label-model). ### 3. Cross-identity dedup via `impression_id`, not merge rules @@ -54,7 +54,7 @@ The original `ttl_sec` field was documented as a router cache TTL but operationa Replacement: `serve_window_sec` (1–300, default 60) with the corrected semantic — *after serving the user one impression on each eligible package within this window, the publisher MUST re-query Identity Match before serving from those packages again.* -`ttl_sec` is deprecated. 6-week notice published 2026-04-26; removal in a 3.0.x release ≥ 2026-06-07. During the window, senders SHOULD populate both fields with the same value; receivers SHOULD prefer `serve_window_sec`. +`ttl_sec` is removed. No deprecation window: TMP is pre-launch (experimental, pre-3.0.0 GA) and not subject to deprecation cycles. The field is not present in the 3.0.1 schema. ### 5. Two composable SDK primitives for impression handling, not one @@ -105,7 +105,7 @@ These came out of pre-merge review. Each warrants a focused follow-up rather tha ### What this PR landed -- Wire spec change (additive): `serve_window_sec` field on `identity-match-response.json`, `ttl_sec` deprecation notice in `CHANGELOG.md`. +- Wire spec change (additive): `serve_window_sec` field on `identity-match-response.json`. `ttl_sec` removed (pre-launch, no deprecation cycle needed). - Doc updates to `docs/trusted-match/specification.mdx`, `buyer-guide.mdx`, `migration-from-axe.mdx`. - New page: `docs/trusted-match/identity-match-implementation.mdx` (implementation guide). - This architecture-rationale doc. diff --git a/static/schemas/source/tmp/identity-match-response.json b/static/schemas/source/tmp/identity-match-response.json index eb1825d7c1..0bb95e6433 100644 --- a/static/schemas/source/tmp/identity-match-response.json +++ b/static/schemas/source/tmp/identity-match-response.json @@ -29,12 +29,6 @@ "maximum": 300, "default": 60 }, - "ttl_sec": { - "type": "integer", - "description": "DEPRECATED — use serve_window_sec. Originally documented as a router response cache TTL but operationally functioned as a per-package serve throttle, conflating two distinct concerns (response caching vs frequency capping). Senders during the deprecation window SHOULD populate ttl_sec and serve_window_sec with the same value; receivers SHOULD prefer serve_window_sec when both are present. Removed in a future 3.0.x release per the experimental-status contract — see CHANGELOG and specs/identitymatch-fcap-architecture.md.", - "minimum": 0, - "maximum": 86400 - }, "tmpx": { "type": "string", "description": "HPKE-encrypted exposure token containing the resolved user identity tokens. The publisher substitutes this into creative tracking URLs as {TMPX}. The buyer's impression pixel receives the token at serve time, enabling real-time per-user frequency state updates. Wire format: kid.base64url_nopad(ciphertext) — unpadded base64url per RFC 4648 section 5 (no = characters). Publishers MUST treat this value as opaque pass-through data." @@ -44,7 +38,7 @@ "type", "request_id", "eligible_package_ids", - "ttl_sec" + "serve_window_sec" ], "additionalProperties": true }