From c0f5317da8bcfea44b7f63d2eded75176355a56c Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Wed, 29 Apr 2026 20:51:02 -0400 Subject: [PATCH 01/18] =?UTF-8?q?feat(decisioning):=20foundation=20skeleto?= =?UTF-8?q?n=20=E2=80=94=20types,=20accounts,=20Protocol,=20platform?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lays the v6.0 DecisioningPlatform foundation inside the existing ``adcp`` package at ``adcp.decisioning.*``. Pure types + Protocol + reference account-store impls; no dispatch adapter yet (wire-up to ``adcp.server.serve`` ships in the next commit on this PR). Modules: * ``adcp.decisioning.types`` — TaskHandoff (``__slots__``-only marker with type-identity dispatch; rejects subclasses), Account[TMeta], AdcpError (wire-shaped structured error distinct from ``adcp.exceptions.ADCPError``), MaybeAsync / SalesResult named aliases (TypeAliasType-based, mypy-clean for generic parameterization on 3.10-3.12 via typing_extensions) * ``adcp.decisioning.context`` — RequestContext[TMeta] subclasses ``adcp.server.ToolContext`` so the existing framework's idempotency middleware, observability hooks, and A2A executor consume it unchanged while adopter Protocol methods read the typed ``account: Account[TMeta]`` directly. AuthInfo dataclass for verified-principal threading. * ``adcp.decisioning.accounts`` — AccountStore Protocol + three reference impls (``SingletonAccounts``, ``ExplicitAccounts``, ``FromAuthAccounts``). SingletonAccounts synthesizes per-principal IDs (``f"{base}:{principal}"``) so the buyer-to-buyer cache-leak regression from the foundation audit is closed at the reference-impl layer (regression test asserts). * ``adcp.decisioning.platform`` — DecisioningPlatform base class + DecisioningCapabilities dataclass. Adopters subclass and declare ``capabilities`` + ``accounts`` + per-specialism methods directly on the class; the dispatch adapter (next commit) discovers methods via hasattr at server boot. * ``adcp.decisioning.specialisms.sales`` — SalesPlatform Protocol covering all 9 ``sales-*`` specialisms under one unified hybrid shape. Full method signatures with per-method docstrings declaring which specialism gates each (so ``validate_platform`` at boot matches what the docstrings claim). Wire-type imports under ``TYPE_CHECKING`` to keep Protocol-only loads lightweight. 19 unit tests covering: TaskHandoff identity dispatch (subclass-rejection regression), AdcpError wire projection, Account default shape, SingletonAccounts per-principal scoping (buyer-to-buyer leak regression), ExplicitAccounts/FromAuthAccounts resolver shapes, AccountStore Protocol structural matching, DecisioningPlatform subclass attribute contract. All tests pass; mypy clean; black + ruff clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/adcp/decisioning/__init__.py | 88 ++++++ src/adcp/decisioning/accounts.py | 264 ++++++++++++++++ src/adcp/decisioning/context.py | 123 ++++++++ src/adcp/decisioning/platform.py | 111 +++++++ src/adcp/decisioning/specialisms/__init__.py | 27 ++ src/adcp/decisioning/specialisms/sales.py | 230 ++++++++++++++ src/adcp/decisioning/types.py | 264 ++++++++++++++++ tests/test_decisioning_types.py | 308 +++++++++++++++++++ 8 files changed, 1415 insertions(+) create mode 100644 src/adcp/decisioning/__init__.py create mode 100644 src/adcp/decisioning/accounts.py create mode 100644 src/adcp/decisioning/context.py create mode 100644 src/adcp/decisioning/platform.py create mode 100644 src/adcp/decisioning/specialisms/__init__.py create mode 100644 src/adcp/decisioning/specialisms/sales.py create mode 100644 src/adcp/decisioning/types.py create mode 100644 tests/test_decisioning_types.py diff --git a/src/adcp/decisioning/__init__.py b/src/adcp/decisioning/__init__.py new file mode 100644 index 000000000..19f05c42a --- /dev/null +++ b/src/adcp/decisioning/__init__.py @@ -0,0 +1,88 @@ +"""Decisioning Platform v6.0 — Protocol-driven adopter framework. + +The successor to ``adcp.server.ADCPHandler`` for adopters who want a +hybrid sync/handoff return shape and per-specialism Protocol classes +instead of inheriting + overriding methods on a base ABC. Lives inside +the existing ``adcp`` package so adopters reuse the foundation primitives +in ``adcp.signing`` / ``adcp._idempotency`` / ``adcp.server`` rather than +spinning up parallel implementations. + +Quickstart:: + + from adcp.decisioning import ( + DecisioningPlatform, + DecisioningCapabilities, + SingletonAccounts, + SalesPlatform, + create_adcp_server_from_platform, + serve, + ) + from adcp.types import ( + GetProductsRequest, GetProductsResponse, + CreateMediaBuyRequest, CreateMediaBuySuccess, + ) + + + class HelloSeller(DecisioningPlatform): + capabilities = DecisioningCapabilities( + specialisms=["sales-non-guaranteed"], + channels=["display"], + pricing_models=["cpm"], + ) + accounts = SingletonAccounts(account_id="hello") + + def get_products(self, req: GetProductsRequest, ctx) -> GetProductsResponse: + return GetProductsResponse(products=[...]) + + def create_media_buy( + self, req: CreateMediaBuyRequest, ctx, + ) -> CreateMediaBuySuccess: + return CreateMediaBuySuccess(media_buy_id=f"mb_{req.idempotency_key}", ...) + + + serve(create_adcp_server_from_platform( + platform=HelloSeller(), name="hello-seller", version="0.0.1", + )) + +See ``examples/hello_seller.py`` for the runnable version. +""" + +from __future__ import annotations + +from adcp.decisioning.accounts import ( + AccountStore, + ExplicitAccounts, + FromAuthAccounts, + SingletonAccounts, +) +from adcp.decisioning.context import ( + AuthInfo, + RequestContext, +) +from adcp.decisioning.platform import ( + DecisioningCapabilities, + DecisioningPlatform, +) +from adcp.decisioning.types import ( + Account, + AdcpError, + MaybeAsync, + SalesResult, + TaskHandoff, +) + +__all__ = [ + "Account", + "AccountStore", + "AdcpError", + "AuthInfo", + "DecisioningCapabilities", + "DecisioningPlatform", + "ExplicitAccounts", + "FromAuthAccounts", + "MaybeAsync", + "RequestContext", + "SalesResult", + "SingletonAccounts", + "TaskHandoff", +] diff --git a/src/adcp/decisioning/accounts.py b/src/adcp/decisioning/accounts.py new file mode 100644 index 000000000..ff6c7e58e --- /dev/null +++ b/src/adcp/decisioning/accounts.py @@ -0,0 +1,264 @@ +"""Account resolution: ``AccountStore`` Protocol + three reference impls. + +Adopters pick a resolution mode at registration time: + +* :class:`SingletonAccounts` — single-process / single-platform + deployments (Innovid training-agent, single-publisher proof-of-concept). + Synthesizes ``account.id`` per verified principal so idempotency + scopes correctly across distinct callers. +* :class:`ExplicitAccounts` — multi-tenant where the URL or request + body identifies the account (``/tenants/``, ``account.account_id`` + in body). Resolves by the wire reference. +* :class:`FromAuthAccounts` — multi-tenant or single-tenant where the + verified auth principal identifies the account (signed-request bound, + OAuth bearer bound). Resolves by ``ctx.auth_info.principal``. + +Adopters with shapes that don't fit these three implement the +:class:`AccountStore` Protocol directly. +""" + +from __future__ import annotations + +from collections.abc import Awaitable, Callable +from typing import Any, Generic, Literal, Protocol, runtime_checkable + +from typing_extensions import TypeVar + +from adcp.decisioning.context import AuthInfo +from adcp.decisioning.types import Account + +#: Per-platform metadata generic. +TMeta = TypeVar("TMeta", default=dict[str, Any]) + + +@runtime_checkable +class AccountStore(Protocol, Generic[TMeta]): + """Resolves a wire reference + auth context to an :class:`Account`. + + The framework calls :meth:`resolve` for every tool dispatch + (before the handler method runs). Adopters in ``'explicit'`` mode + use ``ref.account_id`` from the wire; ``'from_auth'`` mode reads + ``ctx.auth_info`` to look up the principal-bound account; + ``'singleton'`` mode synthesizes a per-principal account from the + one platform. + + The :attr:`resolution` literal is a structural attribute the + framework reads at server boot — used by :func:`validate_platform` + to fail fast on misconfigured deployments (e.g. + ``'singleton'`` registered into a multi-tenant ``TenantRegistry``). + """ + + resolution: Literal["explicit", "from_auth", "singleton"] + + def resolve( + self, + ref: dict[str, Any] | None, + auth_info: AuthInfo | None = None, + ) -> Awaitable[Account[TMeta]] | Account[TMeta]: + """Return the resolved :class:`Account` or raise on miss. + + :param ref: The wire reference object (typically + ``request.account`` carrying ``account_id`` / + ``account_ref``). ``None`` for tools that don't carry an + explicit account ref — adopters in ``'singleton'`` / + ``'from_auth'`` modes ignore it. + :param auth_info: Verified principal info. ``None`` for + unauthenticated requests (dev / ``'singleton'`` fixtures). + :raises adcp.decisioning.AdcpError: ``code='ACCOUNT_NOT_FOUND'`` + when the resolution can't produce a valid account. + + Implementations may be sync or async; the dispatch adapter + detects via :func:`inspect.iscoroutine` at call time. + """ + ... + + +# --------------------------------------------------------------------------- +# Reference implementations +# --------------------------------------------------------------------------- + + +class SingletonAccounts(Generic[TMeta]): + """Single-platform deployment with per-principal idempotency scoping. + + Use for: Innovid training-agent class, single-publisher proof-of- + concepts, dev/staging environments. + + Synthesizes ``account.id`` from the verified principal: + ``f"{base_account_id}:{principal}"``. Without this, every caller + across the entire deployment would share one idempotency cache — + UUID collision (random or engineered) returns another caller's + ``response_payload``, which is a buyer-to-buyer data leak. + Per-principal synthesis closes this while keeping the "one platform, + no per-tenant lookup" ergonomic. + + For unauthenticated dev fixtures (``ctx.auth_info is None``), + the synthesized id is ``f"{base_account_id}:anonymous"`` — adopters + relying on this MUST ensure their dev/CI pipeline authenticates + before any cross-test isolation matters. + + Example:: + + class TrainingAgentSeller(DecisioningPlatform): + accounts = SingletonAccounts(account_id="training-agent") + + :param account_id: Base account id used in the synthesized + per-principal id. Must be stable across process restarts so + idempotency cache hits work across deploys. + :param name: Human-readable name copied to ``Account.name``. + :param metadata_factory: Optional factory for ``Account.metadata`` + — adopters with typed metadata pass a closure that returns the + right TypedDict / dataclass instance. + """ + + resolution: Literal["singleton"] = "singleton" + + def __init__( + self, + account_id: str, + *, + name: str = "", + metadata_factory: Callable[[], TMeta] | None = None, + ) -> None: + if not account_id or not isinstance(account_id, str): + raise ValueError( + f"SingletonAccounts requires a non-empty account_id; got {account_id!r}" + ) + self._account_id = account_id + self._name = name or account_id + self._metadata_factory = metadata_factory + + def resolve( + self, + ref: dict[str, Any] | None = None, + auth_info: AuthInfo | None = None, + ) -> Account[TMeta]: + del ref # singleton ignores wire refs + principal = auth_info.principal if auth_info and auth_info.principal else "anonymous" + scoped_id = f"{self._account_id}:{principal}" + metadata: TMeta = ( + self._metadata_factory() if self._metadata_factory else {} # type: ignore[assignment] + ) + return Account( + id=scoped_id, + name=f"{self._name} ({principal})" if principal != "anonymous" else self._name, + status="active", + metadata=metadata, + auth_info=_auth_info_to_dict(auth_info), + ) + + +class ExplicitAccounts(Generic[TMeta]): + """Multi-tenant where the wire ref identifies the account. + + Use for: salesagent (URL-pattern ``/tenants//...``), DSPs that + expose multi-account-per-principal flows, agencies routing across + publisher accounts via ``account.account_id`` in the body. + + The framework passes ``ref`` from the parsed request body + (typically ``request.account``); ``resolve`` reads + ``ref["account_id"]`` and looks up the account via the adopter- + supplied ``loader``. Auth-info is available for scope checks + (e.g., reject if principal lacks access to the requested account) + but does not drive resolution. + + Example:: + + class SalesAgentSeller(DecisioningPlatform): + accounts = ExplicitAccounts(loader=load_tenant_from_db) + + :param loader: Callable taking ``account_id: str`` and returning an + :class:`Account` instance. Sync or async. Raises + ``AdcpError(code='ACCOUNT_NOT_FOUND')`` on miss. + """ + + resolution: Literal["explicit"] = "explicit" + + def __init__( + self, + loader: Callable[[str], Awaitable[Account[TMeta]] | Account[TMeta]], + ) -> None: + self._loader = loader + + def resolve( + self, + ref: dict[str, Any] | None, + auth_info: AuthInfo | None = None, + ) -> Awaitable[Account[TMeta]] | Account[TMeta]: + del auth_info # explicit mode ignores auth (scope checks happen elsewhere) + if not ref or not ref.get("account_id"): + from adcp.decisioning.types import AdcpError + + raise AdcpError( + "ACCOUNT_NOT_FOUND", + message=( + "ExplicitAccounts.resolve requires ref with 'account_id'; " + "got missing/empty ref" + ), + recovery="terminal", + field="account.account_id", + ) + return self._loader(ref["account_id"]) + + +class FromAuthAccounts(Generic[TMeta]): + """Multi-tenant where the verified auth principal identifies the account. + + Use for: signed-request-bound integrations (one signing key per + publisher account), OAuth-bearer integrations where the token + binds to a specific account, MMP / measurement-vendor patterns + where the principal IS the account holder. + + Reads ``auth_info.principal`` and routes through the adopter- + supplied ``loader``. The wire ``ref`` is ignored — the auth + principal is the source of truth. + + Example:: + + class MeasurementVendor(DecisioningPlatform): + accounts = FromAuthAccounts(loader=load_account_for_principal) + + :param loader: Callable taking ``principal: str`` and returning an + :class:`Account` instance. Sync or async. + """ + + resolution: Literal["from_auth"] = "from_auth" + + def __init__( + self, + loader: Callable[[str], Awaitable[Account[TMeta]] | Account[TMeta]], + ) -> None: + self._loader = loader + + def resolve( + self, + ref: dict[str, Any] | None = None, + auth_info: AuthInfo | None = None, + ) -> Awaitable[Account[TMeta]] | Account[TMeta]: + del ref # from_auth ignores wire refs + if auth_info is None or not auth_info.principal: + from adcp.decisioning.types import AdcpError + + raise AdcpError( + "AUTH_INVALID", + message=( + "FromAuthAccounts.resolve requires auth_info with a " + "verified principal; got None / empty" + ), + recovery="terminal", + ) + return self._loader(auth_info.principal) + + +def _auth_info_to_dict(auth_info: AuthInfo | None) -> dict[str, Any] | None: + """Project an :class:`AuthInfo` to the dict shape ``Account.auth_info`` + carries. Returns ``None`` when auth_info is absent — keeps account + serialization stable for unauthenticated requests.""" + if auth_info is None: + return None + return { + "kind": auth_info.kind, + "key_id": auth_info.key_id, + "principal": auth_info.principal, + "scopes": list(auth_info.scopes), + } diff --git a/src/adcp/decisioning/context.py b/src/adcp/decisioning/context.py new file mode 100644 index 000000000..70a6f935c --- /dev/null +++ b/src/adcp/decisioning/context.py @@ -0,0 +1,123 @@ +"""Request context for DecisioningPlatform method dispatch. + +:class:`RequestContext` extends :class:`adcp.server.ToolContext` so the +existing framework's idempotency middleware, observability hooks, and +A2A executor — all of which consume ``ToolContext`` — keep working +unchanged. Adopters' Protocol method signatures take +``RequestContext[TMeta]`` and get typed access to the resolved +``account`` plus a typed metadata bag. + +The dispatch adapter (in ``adcp.decisioning.dispatch``) constructs a +``RequestContext`` per request from the underlying ``ToolContext`` and +the platform's ``AccountStore.resolve(...)`` result. +""" + +from __future__ import annotations + +from collections.abc import Awaitable, Callable +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import TYPE_CHECKING, Any, Generic + +from typing_extensions import TypeVar + +from adcp.decisioning.types import Account, TaskHandoff +from adcp.server.base import ToolContext + +if TYPE_CHECKING: + pass + +#: Per-platform metadata generic; mirrors ``adcp.decisioning.types.TMeta`` +#: but redeclared here so ``RequestContext[TMeta]`` parameterization +#: works without importing the same TypeVar from another module (mypy +#: treats same-name TypeVars from different modules as distinct types, +#: which breaks downstream Protocol matching). +TMeta = TypeVar("TMeta", default=dict[str, Any]) + +T = TypeVar("T") + + +@dataclass +class AuthInfo: + """The verified principal authenticated for a request. + + Populated by the framework's signed-request verifier + (:func:`adcp.signing.signed_request_verifier`) or a custom + ``authenticate=`` callable wired via :func:`adcp.decisioning.serve`. + Threaded onto :attr:`RequestContext.auth_info` so platform methods + can read scopes, key_id, principal, etc., without parsing + transport headers. + + :param kind: One of ``'signed_request'``, ``'bearer'``, ``'mtls'``, + ``'derived'``. Adopters with custom auth schemes extend the + type alias. + :param key_id: The signing key id (``kid``) for signed-request auth. + :param principal: The authenticated principal — typically the + buyer's verified label or service-account id. Stable across + sessions. + :param scopes: Granted scopes / capabilities. Used by adopters + gating tools per principal. + """ + + kind: str + key_id: str | None = None + principal: str | None = None + scopes: list[str] = field(default_factory=list) + + +@dataclass +class RequestContext(ToolContext, Generic[TMeta]): + """Per-request context passed to every Protocol method. + + Subclasses :class:`adcp.server.ToolContext` so the existing + framework primitives (idempotency middleware, observability, + A2A executor) consume it as a ``ToolContext`` while adopter + Protocol methods read the typed :attr:`account` directly. + + :param account: The resolved account, with typed ``metadata: TMeta``. + The framework's idempotency middleware reads + ``ctx.caller_identity`` for cache scoping; the dispatch adapter + sets ``caller_identity = account.id`` so caching scopes per + resolved account, not per raw auth principal. + :param auth_info: Optional verified principal info. ``None`` when + the request is unauthenticated (dev / 'singleton' fixtures). + :param now: Monotonic timestamp for the request — adopters use + this rather than ``datetime.now()`` directly so tests can + inject deterministic clocks. + + Adopters call :meth:`handoff_to_task` to promote a method to the + HITL background-task path. The framework dispatcher detects the + returned :class:`TaskHandoff` via type-identity and projects it + to the wire ``Submitted`` envelope. + """ + + # Default factories so ``RequestContext()`` works in tests; in + # production the dispatch adapter populates every field. + account: Account[TMeta] = field(default_factory=lambda: Account(id="")) + auth_info: AuthInfo | None = None + now: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + + def handoff_to_task( + self, + fn: Callable[[Any], Awaitable[T] | T], + ) -> TaskHandoff[T]: + """Promote this call to a background task. + + The buyer sees ``{status: 'submitted', task_id}`` on the + immediate response; the framework runs ``fn`` after returning, + persists ``fn``'s terminal artifact to the task registry, and + emits a push-notification webhook on terminal state. + + ``fn`` receives a ``TaskHandoffContext`` (defined in + :mod:`adcp.decisioning.dispatch`) carrying: + + * ``id`` — framework-issued task UUID + * ``update(progress)`` — write progress payload, transition + ``'submitted'`` → ``'working'`` + * ``heartbeat()`` — liveness signal (v6.1 stub) + + Adopter code passes either a coroutine function (``async def + review_async(task_ctx): ...``) or a sync callable; the + dispatcher detects which and runs it appropriately. + """ + return TaskHandoff(fn) diff --git a/src/adcp/decisioning/platform.py b/src/adcp/decisioning/platform.py new file mode 100644 index 000000000..900c6aad8 --- /dev/null +++ b/src/adcp/decisioning/platform.py @@ -0,0 +1,111 @@ +"""DecisioningPlatform base class + capabilities declaration. + +:class:`DecisioningPlatform` is the adopter-facing base. Adopters subclass +it, attach an :class:`AccountStore`, declare :class:`DecisioningCapabilities`, +and implement specialism methods (``get_products``, ``create_media_buy``, +``sync_audiences``, etc.) directly on the class. The dispatch adapter +discovers methods via ``hasattr`` at server boot, validates against the +declared capabilities, and routes requests through the framework's +existing transport machinery. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from adcp.decisioning.accounts import AccountStore + + +@dataclass +class DecisioningCapabilities: + """What a platform claims to support. + + Read by ``validate_platform`` at server boot to confirm each + declared specialism has the methods it requires, and surfaced via + the framework's auto-generated ``get_adcp_capabilities`` response + so buyers can pre-flight without trial-and-error tool calls. + + :param specialisms: AdCP specialism slugs the platform claims — + e.g. ``['sales-non-guaranteed', 'sales-broadcast-tv']``, + ``['audience-sync']``, ``['signal-marketplace', + 'signal-owned']``. Each maps to a ``Protocol`` class under + :mod:`adcp.decisioning.specialisms`. + :param channels: Inventory channels the platform serves — + ``'display'``, ``'video'``, ``'olv'``, ``'ctv'``, ``'audio'``, + ``'dooh'``. Surfaced on capabilities; not enforced. + :param pricing_models: Pricing models the platform supports — + ``'cpm'``, ``'cpc'``, ``'cpa'``, ``'cpcv'``. Surfaced on + capabilities. + :param creative_agents: Optional list of creative-agent endpoints + the platform delegates creative review/generation to. Empty + list means "no creative-agent integration; review is in-house." + :param config: Free-form adopter-defined config exposed on + capabilities. Use sparingly — strongly-typed fields above are + preferred. + """ + + specialisms: list[str] = field(default_factory=list) + channels: list[str] = field(default_factory=list) + pricing_models: list[str] = field(default_factory=list) + creative_agents: list[Any] = field(default_factory=list) + config: dict[str, Any] = field(default_factory=dict) + + +class DecisioningPlatform: + """Adopter-facing base class for the v6.0 framework. + + Subclasses set: + + * :attr:`capabilities` — what the platform claims to support + * :attr:`accounts` — an :class:`AccountStore` instance defining + how to resolve a wire reference + auth context to an + :class:`Account` + + Then implement specialism methods directly on the subclass + (``get_products``, ``create_media_buy``, ``sync_audiences``, etc.). + Each method takes a typed Pydantic request model + a + :class:`RequestContext[TMeta]` and returns a typed response (or + raises :class:`AdcpError`). + + The dispatch adapter (:func:`adcp.decisioning.create_adcp_server_from_platform`) + discovers methods via ``hasattr``, validates against + ``capabilities.specialisms``, and routes requests through the + framework's existing ``adcp.server.serve()`` infrastructure. + + Example:: + + class HelloSeller(DecisioningPlatform): + capabilities = DecisioningCapabilities( + specialisms=["sales-non-guaranteed"], + channels=["display"], + pricing_models=["cpm"], + ) + accounts = SingletonAccounts(account_id="hello") + + def get_products(self, req, ctx): + return GetProductsResponse(products=[...]) + + def create_media_buy(self, req, ctx): + return CreateMediaBuySuccess(media_buy_id="mb_1", ...) + + Per-method signatures are documented in the per-specialism + Protocol classes under :mod:`adcp.decisioning.specialisms` — + those are the canonical contract reference. The base class + itself is intentionally minimal so adopters can mix in + cross-cutting helpers without inheritance constraints. + """ + + #: Required: the platform's capability declaration. Subclasses + #: override. + capabilities: DecisioningCapabilities = DecisioningCapabilities() + + #: Required: the platform's account-resolution strategy. + #: Subclasses set to a :class:`SingletonAccounts`, + #: :class:`ExplicitAccounts`, :class:`FromAuthAccounts`, or + #: custom :class:`AccountStore` instance. Type erased to ``Any`` + #: at the base because the typed shape is platform-specific + #: (different ``TMeta`` per adopter); ``validate_platform`` + #: confirms an :class:`AccountStore` instance is set. + accounts: AccountStore[Any] = None # type: ignore[assignment] diff --git a/src/adcp/decisioning/specialisms/__init__.py b/src/adcp/decisioning/specialisms/__init__.py new file mode 100644 index 000000000..e8a0d7135 --- /dev/null +++ b/src/adcp/decisioning/specialisms/__init__.py @@ -0,0 +1,27 @@ +"""Per-specialism Protocol classes. + +Adopters claim specialisms via ``DecisioningCapabilities.specialisms`` +and implement the matching Protocol's methods on their +:class:`DecisioningPlatform` subclass. Method names are unified +across specialisms — a platform claiming both ``sales-non-guaranteed`` +and ``sales-broadcast-tv`` implements ``create_media_buy`` once and +returns a hybrid :class:`SalesResult` that branches per call. + +Public surface re-exported from :mod:`adcp.decisioning.specialisms`: + +* :class:`SalesPlatform` — covers all 9 ``sales-*`` specialisms + (non-guaranteed, guaranteed, broadcast-tv, streaming-tv, social, + exchange, proposal-mode, catalog-driven, retail-media) under one + unified hybrid shape. + +Other specialism Protocols (audience, signals, creative-*, governance, +property-lists, etc.) are added as adopters need them — first +:class:`SalesPlatform` because that's the v6.0 vertical-slice the +foundation PR proves out. +""" + +from __future__ import annotations + +from adcp.decisioning.specialisms.sales import SalesPlatform + +__all__ = ["SalesPlatform"] diff --git a/src/adcp/decisioning/specialisms/sales.py b/src/adcp/decisioning/specialisms/sales.py new file mode 100644 index 000000000..e67b010c1 --- /dev/null +++ b/src/adcp/decisioning/specialisms/sales.py @@ -0,0 +1,230 @@ +"""SalesPlatform Protocol — covers all nine ``sales-*`` specialisms. + +A platform claiming any of ``sales-non-guaranteed``, ``sales-guaranteed``, +``sales-broadcast-tv``, ``sales-streaming-tv``, ``sales-social``, +``sales-exchange``, ``sales-proposal-mode``, ``sales-catalog-driven``, +``sales-retail-media`` implements the methods on this Protocol. The +unified hybrid shape collapses 14 method names from v1's dual-method +design (``createMediaBuy`` + ``createMediaBuyTask``) into 7: each +mutating tool returns ``SalesResult[TSuccess]`` so adopters branch per +call between the sync fast path and the HITL slow path. + +Required methods (every sales-* specialism): + +* :meth:`get_products` — sync catalog read +* :meth:`create_media_buy` — hybrid (sync success or task handoff) +* :meth:`update_media_buy` — sync (v6.1 + adcp#3392 expand to hybrid) +* :meth:`sync_creatives` — hybrid for creative review +* :meth:`get_media_buy_delivery` — sync delivery read + +Optional methods present-or-absent (gated by specialism — see per-method +docstrings): + +* :meth:`get_media_buys` +* :meth:`provide_performance_feedback` +* :meth:`list_creative_formats` +* :meth:`list_creatives` +* :meth:`sync_catalogs` — required when claiming ``sales-catalog-driven`` + or ``sales-retail-media`` +* :meth:`log_event` — required when claiming ``sales-retail-media`` +* :meth:`sync_event_sources` — required when claiming + ``sales-retail-media`` + +The framework's :func:`validate_platform` walks ``capabilities.specialisms`` +and confirms each specialism's required methods exist on the platform +subclass — fail-fast at server boot rather than 404 at first dispatch. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Generic, Protocol, runtime_checkable + +from typing_extensions import TypeVar + +if TYPE_CHECKING: + from adcp.decisioning.context import RequestContext + from adcp.decisioning.types import MaybeAsync, SalesResult + +# Wire types — auto-generated from schemas/cache/3.0.0/*.json. Adopters +# import from ``adcp.types``; the Protocol uses string-name references +# under TYPE_CHECKING to avoid forcing the import at module load time +# (the wire-types module is heavy — it pulls in 80+ generated classes — +# and a Protocol-only import shouldn't require it). +if TYPE_CHECKING: + from adcp.types import ( + CreateMediaBuyRequest, + CreateMediaBuySuccessResponse, + GetMediaBuyDeliveryRequest, + GetMediaBuyDeliveryResponse, + GetMediaBuysRequest, + GetMediaBuysResponse, + GetProductsRequest, + GetProductsResponse, + ListCreativeFormatsRequest, + ListCreativeFormatsResponse, + ListCreativesRequest, + ListCreativesResponse, + ProvidePerformanceFeedbackRequest, + ProvidePerformanceFeedbackResponse, + SyncCreativesRequest, + SyncCreativesSuccessResponse, + UpdateMediaBuyRequest, + UpdateMediaBuySuccessResponse, + ) + +#: Per-platform metadata generic; matches ``RequestContext[TMeta]`` and +#: ``Account[TMeta]`` upstream so a platform parameterizing +#: ``SalesPlatform[TenantMeta]`` gets ``ctx.account.metadata``-style typed +#: access inside method bodies. +TMeta = TypeVar("TMeta", default=dict[str, Any]) + + +@runtime_checkable +class SalesPlatform(Protocol, Generic[TMeta]): + """Unified hybrid interface for every ``sales-*`` specialism. + + Methods may be sync (return ``T`` directly) or async (return + ``Awaitable[T]``); the dispatch adapter detects via + :func:`inspect.iscoroutinefunction` and runs sync methods on a + thread pool via :func:`asyncio.to_thread` so a blocking sync + handler doesn't serialize the event loop. + + Hybrid sellers (programmatic remnant + guaranteed inventory in + one tenant) branch per call: return the Success directly for the + sync fast path, return ``ctx.handoff_to_task(fn)`` for the HITL + slow path. The framework dispatcher detects the + :class:`TaskHandoff` via type-identity and projects to the wire + ``Submitted`` envelope. + + Throw :class:`adcp.decisioning.AdcpError` for buyer-fixable + rejection (``BUDGET_TOO_LOW``, ``POLICY_VIOLATION``, etc.); the + framework projects to the wire structured-error envelope with + code, recovery, field, suggestion, retry_after, details. + """ + + # ---- Required for every sales-* specialism ---- + + def get_products( + self, + req: GetProductsRequest, + ctx: RequestContext[TMeta], + ) -> MaybeAsync[GetProductsResponse]: + """Sync catalog read — no HITL even on broadcast/proposal-mode. + + Brief-based proposal generation rides on a separate verb + (``request_proposal``, adcp#3407); proposal-mode adopters + surface the eventual products via + ``ctx.publish_status_change(resource_type='proposal', ...)`` + rather than blocking ``get_products`` waiting for trafficker + approval. + """ + ... + + def create_media_buy( + self, + req: CreateMediaBuyRequest, + ctx: RequestContext[TMeta], + ) -> SalesResult[CreateMediaBuySuccessResponse]: + """Unified hybrid. Return :class:`CreateMediaBuySuccessResponse` directly + for sync fast path; return :meth:`RequestContext.handoff_to_task` + for HITL slow path. + + Pre-flight runs sync regardless of path so bad budgets reject + before allocating a task id — call ``preflight()`` at the top, + ``raise AdcpError(...)`` on rejection. + + Buyer pattern-matches on the response shape: + + * ``media_buy_id`` field present → sync success + * ``task_id`` + ``status='submitted'`` → poll ``tasks_get`` or + receive webhook + """ + ... + + def update_media_buy( + self, + media_buy_id: str, + patch: UpdateMediaBuyRequest, + ctx: RequestContext[TMeta], + ) -> MaybeAsync[UpdateMediaBuySuccessResponse]: + """Mutate an in-flight media buy. + + v6.0 returns sync only — the per-tool response schema doesn't + carry the ``Submitted`` arm yet (adcp#3392). Re-approval flows + return the success with the ``status`` field omitted (in-spec + per the schema description) and drive lifecycle via + ``ctx.publish_status_change``. v6.1 + adcp#3392 expand this + signature to :data:`SalesResult` so re-approval flows can + hand off cleanly. + """ + ... + + def sync_creatives( + self, + req: SyncCreativesRequest, + ctx: RequestContext[TMeta], + ) -> SalesResult[SyncCreativesSuccessResponse]: + """Unified hybrid for creative review. + + Mixed approved/pending rows in a single sync response, OR + hand off the whole batch to background standards-and-practices + review. Adopters with pre-approved buyer pools fast-path; new + buyers' creatives go to review. + """ + ... + + def get_media_buy_delivery( + self, + req: GetMediaBuyDeliveryRequest, + ctx: RequestContext[TMeta], + ) -> MaybeAsync[GetMediaBuyDeliveryResponse]: + """Sync delivery read — pacing, spend, impressions per package.""" + ... + + # ---- Optional (gated by specialism — present-or-absent) ---- + + def get_media_buys( + self, + req: GetMediaBuysRequest, + ctx: RequestContext[TMeta], + ) -> MaybeAsync[GetMediaBuysResponse]: + """List media buys for the resolved account. + + Required when claiming any ``sales-*`` specialism in v6.0 rc.1+. + ``validate_platform`` fails server boot if a sales-claiming + platform doesn't implement this. + """ + ... + + def provide_performance_feedback( + self, + req: ProvidePerformanceFeedbackRequest, + ctx: RequestContext[TMeta], + ) -> MaybeAsync[ProvidePerformanceFeedbackResponse]: + """Buyer-supplied performance signal back to the seller. + + Required when claiming any ``sales-*`` specialism in v6.0 rc.1+. + """ + ... + + def list_creative_formats( + self, + req: ListCreativeFormatsRequest, + ctx: RequestContext[TMeta], + ) -> MaybeAsync[ListCreativeFormatsResponse]: + """Catalog of accepted creative formats. + + Required when claiming any ``sales-*`` specialism in v6.0 rc.1+. + """ + ... + + def list_creatives( + self, + req: ListCreativesRequest, + ctx: RequestContext[TMeta], + ) -> MaybeAsync[ListCreativesResponse]: + """List the seller's view of buyer-uploaded creatives. + + Required when claiming any ``sales-*`` specialism in v6.0 rc.1+. + """ + ... diff --git a/src/adcp/decisioning/types.py b/src/adcp/decisioning/types.py new file mode 100644 index 000000000..354e49b69 --- /dev/null +++ b/src/adcp/decisioning/types.py @@ -0,0 +1,264 @@ +"""Core types for the DecisioningPlatform layer. + +Defines: + +* :class:`TaskHandoff` — marker the framework recognizes as "promote this + call to a long-running task." Plain class with ``__slots__`` so adopters + can't accidentally subclass it into framework dispatch. +* :class:`Account` — generic over per-platform metadata (``TMeta``) so + adopter-defined fields (``adapter``, ``credentials``, ``network_id``, + etc.) typecheck inside method bodies without ``cast``. +* :data:`MaybeAsync`, :data:`SalesResult` — named return-type aliases. + Coding agents (Cursor, Claude Code, etc.) handle one named alias far + better than a nested ``Awaitable[T | TaskHandoff[T]] | T | TaskHandoff[T]``. +* :class:`AdcpError` — re-exported from :mod:`adcp.exceptions` for + one-stop import. + +The :class:`RequestContext` lives in ``context.py`` to keep this module +free of ``adcp.server`` dependencies — pure types adopters can import +without dragging in the transport stack. +""" + +from __future__ import annotations + +from collections.abc import Awaitable, Callable +from dataclasses import dataclass, field +from typing import Any, Generic, Literal + +# PEP 696 TypeVar defaults + PEP 695 / PEP 718 generic TypeAlias both +# need ``typing_extensions`` backports for Python 3.10-3.12 (the package +# floor). ``TypeVar`` with ``default=`` lands in stdlib at 3.13; +# ``TypeAliasType`` (used to declare generic aliases like +# ``MaybeAsync[T]``) lands at 3.12. Importing both from +# ``typing_extensions`` keeps the same source compatible across the +# supported range. +from typing_extensions import TypeAliasType, TypeVar + + +class AdcpError(Exception): + """Wire-shaped structured error raised by platform methods. + + Distinct from :class:`adcp.exceptions.ADCPError` (the client-side + connection-failure exception). This is the *server-side* structured + error the framework's dispatcher catches and projects to the wire + ``adcp_error`` envelope: + + .. code-block:: json + + { + "code": "BUDGET_TOO_LOW", + "message": "total_budget below floor (0.50 CPM × 1000 imp)", + "recovery": "correctable", + "field": "total_budget", + "suggestion": "Increase budget to at least $0.50", + "retry_after": null, + "details": {"errors": [...]} + } + + Adopters raise this from inside Protocol method bodies for any + buyer-fixable rejection. The framework catches at the dispatch + seam, serializes to the structured-error envelope, and returns + the wire response. Adopters do NOT serialize themselves. + + :param code: AdCP error code (e.g. ``BUDGET_TOO_LOW``, + ``POLICY_VIOLATION``, ``INVALID_REQUEST``, + ``ACCOUNT_NOT_FOUND``). The full enum is at + ``schemas/cache/3.0.0/enums/error-code.json``; vendor codes + outside the enum are accepted (``str``) but buyers won't have + first-class handling for them. + :param message: Human-readable error message. Always set. + :param recovery: Buyer's retry strategy: + + * ``'retry_with_changes'`` — fix the indicated field and retry + * ``'correctable'`` — same as retry_with_changes (legacy alias) + * ``'transient'`` — retry as-is after a backoff + * ``'terminal'`` — do not retry; the request is rejected + + :param field: The request field path that caused the error + (e.g. ``'total_budget'``, ``'package[2].targeting'``). Buyers + use this to highlight inputs in their UI. + :param suggestion: Optional human-readable hint for fixing the + error. + :param retry_after: Seconds to wait before retrying. Only + meaningful with ``recovery='transient'``. + :param details: Free-form extras for codes that need them + (e.g. ``{'errors': [...]}`` for multi-error preflight). + """ + + def __init__( + self, + code: str, + *, + message: str = "", + recovery: Literal[ + "retry_with_changes", "correctable", "transient", "terminal" + ] = "terminal", + field: str | None = None, + suggestion: str | None = None, + retry_after: int | None = None, + details: dict[str, Any] | None = None, + ) -> None: + super().__init__(message or code) + self.code = code + self.recovery = recovery + self.field = field + self.suggestion = suggestion + self.retry_after = retry_after + self.details = details or {} + + def __str__(self) -> str: + return f"AdcpError[{self.code} / {self.recovery}]: {self.args[0]}" + + def to_wire(self) -> dict[str, Any]: + """Project to the AdCP wire ``adcp_error`` envelope. + + Called by the framework dispatcher when serializing the + rejection. Adopters don't typically call this directly; it's + public for testing and for adopter middleware that wants to + inspect the projection shape. + """ + out: dict[str, Any] = { + "code": self.code, + "message": self.args[0] if self.args else "", + "recovery": self.recovery, + } + if self.field is not None: + out["field"] = self.field + if self.suggestion is not None: + out["suggestion"] = self.suggestion + if self.retry_after is not None: + out["retry_after"] = self.retry_after + if self.details: + out["details"] = dict(self.details) + return out + + +#: Per-platform metadata generic. Defaults to ``dict[str, Any]`` for +#: adopters who don't define a typed metadata shape; multi-tenant adopters +#: typically define a TypedDict and parameterize ``Account[TenantMeta]``, +#: ``RequestContext[TenantMeta]`` so ``ctx.account.metadata`` typechecks +#: without ``cast``. +TMeta = TypeVar("TMeta", default=dict[str, Any]) + +#: Generic return-type variable for hybrid handoff results. +T = TypeVar("T") + + +class TaskHandoff(Generic[T]): + """Marker the framework recognizes as 'promote this call to a task.' + + Adopters obtain instances via :meth:`RequestContext.handoff_to_task`; + the framework dispatches based on type-identity (``type(obj) is + TaskHandoff``) so a buyer-supplied request body can never become a + handoff (it would never have the right ``type``), and adopter + subclasses don't accidentally trigger the handoff path. + + The Python implementation deliberately omits the JS-side + ``Symbol.for(...)``-keyed brand. JS needs the brand to defend against + untrusted code in the same realm forging markers; Python adopter code + is trusted, and a buyer-supplied wire body cannot reach this type + because :class:`TaskHandoff` is a return type — never deserialized + from JSON. The adversary doesn't exist; the ceremony to defend + against them shouldn't either. + + Example:: + + def create_media_buy(self, req, ctx): + if self._is_pre_approved(req, ctx.account): + # Sync fast path — return Success directly + return CreateMediaBuySuccess(media_buy_id="mb_1", ...) + # HITL slow path — hand off to background trafficker review + return ctx.handoff_to_task(self._review_async) + """ + + __slots__ = ("_fn",) + + def __init__(self, fn: Callable[[Any], Awaitable[T] | T]) -> None: + # ``fn`` is ``Callable[[TaskHandoffContext], Awaitable[T] | T]`` + # but TaskHandoffContext lives in dispatch.py to avoid a cycle. + # The framework calls ``handoff._fn(task_ctx)`` at dispatch time; + # adopters pass either a coroutine function or a sync callable + # and the dispatcher detects via ``inspect.iscoroutine``. + self._fn = fn + + def __repr__(self) -> str: + return "TaskHandoff()" + + +def is_task_handoff(obj: Any) -> bool: + """Type-identity dispatch helper. + + Uses ``type(obj) is TaskHandoff`` — NOT ``isinstance`` — so any + adopter subclass of :class:`TaskHandoff` is rejected at dispatch. + Subclassing is not supported; an adopter who tries gets the + sync-return path and silently delivers their result as a normal + response. Documented as a deliberate non-feature. + """ + return type(obj) is TaskHandoff + + +# --------------------------------------------------------------------------- +# Result type aliases +# --------------------------------------------------------------------------- + +#: Sync result OR async result. Use directly on tools whose response +#: schema does NOT include the ``Submitted`` arm (i.e. read-only + +#: synchronous mutations). +MaybeAsync = TypeAliasType("MaybeAsync", "Awaitable[T] | T", type_params=(T,)) + +#: Hybrid sync-or-handoff result. Read as: "return ``T`` directly for +#: the sync fast path, or ``TaskHandoff[T]`` for the HITL slow path, +#: in either a sync or async method body." Coding agents misread the +#: equivalent inline four-way union; the named alias is materially +#: more legible and matches the TS-side ``SalesResult``. +SalesResult = TypeAliasType( + "SalesResult", + "Awaitable[T] | T | TaskHandoff[T] | Awaitable[TaskHandoff[T]]", + type_params=(T,), +) + + +# --------------------------------------------------------------------------- +# Account +# --------------------------------------------------------------------------- + + +@dataclass +class Account(Generic[TMeta]): + """The resolved account a request operates on. + + Constructed by the platform's :class:`AccountStore` and threaded + through every dispatch via :class:`RequestContext`. ``metadata`` + is the typed extension point — adopters define a TypedDict (or + dataclass) carrying their per-account data (``adapter`` instance, + OAuth credentials, network IDs, sandbox flags, etc.) and + parameterize ``Account[TenantMeta]`` so ``ctx.account.metadata.adapter`` + typechecks inside method bodies. + + The framework's idempotency middleware scopes its cache by + ``account.id``. Adopters in 'singleton' resolution mode MUST + synthesize per-principal IDs (e.g. ``f"training-agent:{principal}"``) + or buyer-to-buyer cache leakage is possible — see + :class:`adcp.decisioning.SingletonAccounts`. + + :param id: Stable, globally-unique account identifier within the + adopter's deployment. Used as the idempotency cache scope key + and the ``caller_identity`` the framework's idempotency middleware + reads. + :param name: Human-readable account name for logging and admin + UIs. Not used for routing or scoping. + :param status: Account lifecycle state — ``'pending_approval'``, + ``'active'``, ``'disabled'``, etc. Adopters consuming the + ``account-status.json`` enum can use this directly. + :param metadata: Adopter-defined typed metadata. Defaults to an + untyped dict for adopters who don't care. + :param auth_info: The verified principal that authenticated this + request, if any. Distinct from ``id`` because one principal + can act on multiple accounts in 'explicit' resolution mode. + """ + + id: str + name: str = "" + status: str = "active" + metadata: TMeta = field(default_factory=lambda: {}) # type: ignore[assignment] + auth_info: dict[str, Any] | None = None diff --git a/tests/test_decisioning_types.py b/tests/test_decisioning_types.py new file mode 100644 index 000000000..4419b2bef --- /dev/null +++ b/tests/test_decisioning_types.py @@ -0,0 +1,308 @@ +"""Unit tests for adcp.decisioning core types. + +Covers: + +* :class:`TaskHandoff` type-identity dispatch (rejects subclasses) +* :class:`AdcpError` wire projection +* :class:`Account` generic shape + auth_info threading +* :class:`SingletonAccounts` per-principal idempotency scoping (the + buyer-to-buyer leak regression) +* :class:`ExplicitAccounts` and :class:`FromAuthAccounts` resolver shapes +* :class:`AccountStore` Protocol structural matching +* :class:`DecisioningPlatform` subclass attribute contract +""" + +from __future__ import annotations + +from typing import Any + +import pytest + +from adcp.decisioning import ( + Account, + AccountStore, + AdcpError, + AuthInfo, + DecisioningCapabilities, + DecisioningPlatform, + ExplicitAccounts, + FromAuthAccounts, + SingletonAccounts, + TaskHandoff, +) +from adcp.decisioning.types import is_task_handoff + +# ---- TaskHandoff ---- + + +def test_task_handoff_type_identity() -> None: + """``type(obj) is TaskHandoff`` must be the dispatch check — + ``isinstance`` would let adopter subclasses trigger the handoff + path silently.""" + + def fn(_ctx: Any) -> str: + return "done" + + h = TaskHandoff(fn) + assert type(h) is TaskHandoff + assert is_task_handoff(h) is True + # A plain dict is never a handoff. + assert is_task_handoff({"status": "submitted"}) is False + # A buyer-supplied request body cannot reach this type. + assert is_task_handoff(None) is False + + +def test_task_handoff_subclass_rejected_at_dispatch() -> None: + """Adopter subclasses of TaskHandoff are NOT recognized as handoffs. + Documented as a deliberate non-feature — subclassing is unsupported + and silently produces the sync-return path.""" + + class AdopterSubclass(TaskHandoff[str]): + pass + + sub = AdopterSubclass(lambda _ctx: "done") + assert type(sub) is AdopterSubclass + assert is_task_handoff(sub) is False, ( + "Adopter subclass of TaskHandoff was treated as a handoff at " + "dispatch — type-identity check is broken; the framework would " + "now dispatch adopter-subclass instances through the handoff " + "path, which is not the documented contract" + ) + + +def test_task_handoff_repr_does_not_leak_fn() -> None: + """``__repr__`` returns a sealed marker so a debug helper or error + traceback can't auto-render the closure body.""" + + def fn(_ctx: Any) -> str: + return "secret" + + h = TaskHandoff(fn) + assert repr(h) == "TaskHandoff()" + assert "secret" not in repr(h) + + +# ---- AdcpError ---- + + +def test_adcp_error_wire_projection() -> None: + """``to_wire()`` produces the AdCP structured-error envelope with + only the fields that were populated. Optional fields stay omitted.""" + err = AdcpError( + "BUDGET_TOO_LOW", + message="total_budget below floor (0.50 CPM × 1000 imp)", + recovery="correctable", + field="total_budget", + suggestion="Increase budget to at least $0.50", + ) + assert err.to_wire() == { + "code": "BUDGET_TOO_LOW", + "message": "total_budget below floor (0.50 CPM × 1000 imp)", + "recovery": "correctable", + "field": "total_budget", + "suggestion": "Increase budget to at least $0.50", + } + + +def test_adcp_error_minimum_fields() -> None: + """Code-only error projects to the minimum envelope. ``recovery`` + defaults to ``'terminal'`` (do-not-retry).""" + err = AdcpError("INVALID_REQUEST") + assert err.to_wire() == { + "code": "INVALID_REQUEST", + "message": "INVALID_REQUEST", + "recovery": "terminal", + } + + +def test_adcp_error_str_includes_code_and_recovery() -> None: + """Default ``__str__`` surfaces ``code`` + ``recovery`` so log + lines and error tracebacks carry both at a glance.""" + err = AdcpError("BUDGET_TOO_LOW", message="too low", recovery="correctable") + assert str(err) == "AdcpError[BUDGET_TOO_LOW / correctable]: too low" + + +def test_adcp_error_with_details() -> None: + """Multi-error preflight: ``details={'errors': [...]}`` survives the + wire projection so buyers can read every rejected field at once.""" + err = AdcpError( + "INVALID_REQUEST", + message="multiple validation failures", + recovery="correctable", + details={ + "errors": [ + {"code": "BUDGET_TOO_LOW", "field": "total_budget"}, + {"code": "INVALID_REQUEST", "field": "package[0].targeting"}, + ] + }, + ) + wire = err.to_wire() + assert "details" in wire + assert wire["details"]["errors"][0]["code"] == "BUDGET_TOO_LOW" + + +# ---- Account ---- + + +def test_account_default_metadata_is_empty_dict() -> None: + """Adopters who don't define typed metadata get an empty dict — + no ``cast`` required to construct.""" + acct = Account(id="acme_42") + assert acct.id == "acme_42" + assert acct.metadata == {} + assert acct.status == "active" + + +# ---- SingletonAccounts (the buyer-to-buyer leak regression) ---- + + +def test_singleton_per_principal_scoping() -> None: + """The buyer-to-buyer cache-leak regression: SingletonAccounts MUST + synthesize per-principal IDs so two distinct buyers don't share an + idempotency cache. Without per-principal synthesis, buyer A's + ``response_payload`` would surface to buyer B on UUID collision — + a confidentiality leak.""" + sa = SingletonAccounts(account_id="training-agent") + a = sa.resolve(None, AuthInfo(kind="signed_request", principal="buyer-a")) + b = sa.resolve(None, AuthInfo(kind="signed_request", principal="buyer-b")) + assert a.id == "training-agent:buyer-a" + assert b.id == "training-agent:buyer-b" + assert a.id != b.id + + +def test_singleton_anonymous_fallback() -> None: + """Unauthenticated dev/CI fixtures get ``:anonymous`` so the + resolver doesn't fail closed in test environments. Production + deployments with auth never hit this branch.""" + sa = SingletonAccounts(account_id="dev") + acct = sa.resolve(None, None) + assert acct.id == "dev:anonymous" + + +def test_singleton_threads_auth_info() -> None: + """``Account.auth_info`` carries the verified principal info so + platform methods can read scopes / key_id without re-parsing + transport headers.""" + sa = SingletonAccounts(account_id="hello") + auth = AuthInfo( + kind="signed_request", + key_id="kid-1", + principal="buyer-a", + scopes=["read", "write"], + ) + acct = sa.resolve(None, auth) + assert acct.auth_info == { + "kind": "signed_request", + "key_id": "kid-1", + "principal": "buyer-a", + "scopes": ["read", "write"], + } + + +def test_singleton_rejects_empty_account_id() -> None: + """``account_id`` must be a non-empty string — fail-fast at + construction beats fail-mysteriously at first request.""" + with pytest.raises(ValueError, match="non-empty account_id"): + SingletonAccounts(account_id="") + + +# ---- ExplicitAccounts ---- + + +def test_explicit_accounts_resolves_via_loader() -> None: + """``ExplicitAccounts`` reads ``ref['account_id']`` and routes + through the adopter's loader.""" + loaded: list[str] = [] + + def loader(account_id: str) -> Account[Any]: + loaded.append(account_id) + return Account(id=account_id, name=f"Acme {account_id}") + + store = ExplicitAccounts(loader=loader) + acct = store.resolve({"account_id": "acme_42"}) + assert isinstance(acct, Account) + assert acct.id == "acme_42" + assert loaded == ["acme_42"] + + +def test_explicit_accounts_missing_ref_raises() -> None: + """Missing/empty ``ref`` produces ``ACCOUNT_NOT_FOUND`` with the + field path set to ``account.account_id`` so buyers know where the + ref should go.""" + + def loader(_account_id: str) -> Account[Any]: + raise AssertionError("loader should not be called on missing ref") + + store = ExplicitAccounts(loader=loader) + with pytest.raises(AdcpError) as exc_info: + store.resolve(None) + assert exc_info.value.code == "ACCOUNT_NOT_FOUND" + assert exc_info.value.field == "account.account_id" + assert exc_info.value.recovery == "terminal" + + +# ---- FromAuthAccounts ---- + + +def test_from_auth_resolves_via_principal() -> None: + """``FromAuthAccounts`` reads ``auth_info.principal`` and ignores + the wire ref. The auth principal IS the account holder.""" + + def loader(principal: str) -> Account[Any]: + return Account(id=f"acct_for_{principal}") + + store = FromAuthAccounts(loader=loader) + acct = store.resolve(ref=None, auth_info=AuthInfo(kind="bearer", principal="buyer-a")) + assert isinstance(acct, Account) + assert acct.id == "acct_for_buyer-a" + + +def test_from_auth_missing_principal_raises() -> None: + """``FromAuthAccounts`` without ``auth_info`` raises + ``AUTH_INVALID`` — the resolver can't synthesize an account from + nothing.""" + + def loader(_principal: str) -> Account[Any]: + raise AssertionError("loader should not be called without auth") + + store = FromAuthAccounts(loader=loader) + with pytest.raises(AdcpError) as exc_info: + store.resolve(None, None) + assert exc_info.value.code == "AUTH_INVALID" + + +# ---- AccountStore Protocol structural matching ---- + + +def test_account_store_protocol_runtime_checkable() -> None: + """All three reference impls satisfy the Protocol structurally + (they have ``resolution: str`` and ``resolve(ref, auth_info)``). + Adopters writing custom stores get the same structural check.""" + assert isinstance(SingletonAccounts(account_id="x"), AccountStore) + assert isinstance(ExplicitAccounts(loader=lambda _x: Account(id="y")), AccountStore) + assert isinstance(FromAuthAccounts(loader=lambda _x: Account(id="z")), AccountStore) + + +def test_account_store_resolution_literal() -> None: + """``resolution`` is a structural literal the framework reads at + server boot for ``validate_platform`` checks.""" + assert SingletonAccounts(account_id="x").resolution == "singleton" + assert ExplicitAccounts(loader=lambda _x: Account(id="y")).resolution == "explicit" + assert FromAuthAccounts(loader=lambda _x: Account(id="z")).resolution == "from_auth" + + +# ---- DecisioningPlatform contract ---- + + +def test_decisioning_platform_subclass_attributes() -> None: + """A subclass declares ``capabilities`` + ``accounts``. The base + leaves them unset (None) so ``validate_platform`` at server boot + can fail-fast on platforms that forgot.""" + + class HelloSeller(DecisioningPlatform): + capabilities = DecisioningCapabilities(specialisms=["sales-non-guaranteed"]) + accounts = SingletonAccounts(account_id="hello") + + s = HelloSeller() + assert s.capabilities.specialisms == ["sales-non-guaranteed"] + assert s.accounts.resolution == "singleton" From cdec11ff88814c6e37367b3f6f174c46d5ed3269 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Wed, 29 Apr 2026 22:43:58 -0400 Subject: [PATCH 02/18] docs(decisioning): dispatch-adapter design (post-6-reviewer-pass) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Design doc capturing 14 locked decisions for the upcoming dispatch adapter, codegen pipeline, task_registry stub, and serve() wrapper — plus the decision to split the framework-shared handler-registration seam into a separate prep PR. Refined across 6 reviewer passes: - Round 1 (initial design): agentic-product-architect, python-expert - Round 2 (post codegen + framing additions): agentic-product-architect, python-expert, dx-expert, code-reviewer Authoritative reference for the foundation PR. Documents: * D1 codegen — reads per-specialism Protocols (not _HANDLER_TOOLS), arg-projection for wire-shape mismatches, fail-fast on missing Pydantic types, prescriptive header, ruff format post-emit * D2 context mutation — extends ToolContext via context_factory, middleware mutates in place (framework supports; replacement doesn't compose) * D3 method discovery — reuses framework's _is_method_overridden * D4 register_handler_tools — adcps an advertised_tools class attr + __init_subclass__ auto-registration + boot-time UserWarning; framed as PlatformHandler enabler, NOT general framework feature (no adopter evidence motivates the broader framing); split as a prep PR * D5 sync-method dispatch — explicit ThreadPoolExecutor + explicit contextvars.copy_context (run_in_executor doesn't auto-snapshot) * D6 TaskHandoff routing — async via create_task (snapshots contextvars for free); sync via run_in_executor + explicit copy. Awaitable-returning sync callables explicitly unsupported * D7 TaskRegistry — Protocol shape pinned with per-method contract docstrings; in-memory stub ships in foundation * D8 dual public API — adcp.decisioning.serve wrapper + seam * D9 caller_identity = account.id — semantic shift documented; metadata["adcp_decisioning.auth_principal"] retains raw principal * D10 idempotency ordering — wrapper builds correctly, runtime assert dropped (had a slice bug; document invariant instead) * D11 __init_subclass__ validator — fails class-definition without capabilities/accounts; BaseModel MRO conflict noted * D12 get_adcp_capabilities — synthesized from platform.capabilities * D13 vertical-slice example + integration test * D14 _invoke_platform_method contract pinned; REQUIRED_METHODS_PER_SPECIALISM.get tolerates unknown specialisms (forward-compat with v6.1+ specs) File plan splits into 2 PRs: - Prep PR: ~175 lines (framework handler-registration seam) - Foundation PR: ~2100 lines (adcp.decisioning.* + 1500 already committed in 4a2f8aae) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../decisioning-platform-dispatch-design.md | 755 ++++++++++++++++++ 1 file changed, 755 insertions(+) create mode 100644 docs/proposals/decisioning-platform-dispatch-design.md diff --git a/docs/proposals/decisioning-platform-dispatch-design.md b/docs/proposals/decisioning-platform-dispatch-design.md new file mode 100644 index 000000000..9077f2dcb --- /dev/null +++ b/docs/proposals/decisioning-platform-dispatch-design.md @@ -0,0 +1,755 @@ +# DecisioningPlatform dispatch design (post-review) + +Pre-implementation reference for the `adcp.decisioning.{handler, dispatch, +serve, task_registry}` modules. Synthesizes 6 reviewer passes: + +* **Round 1** (initial design): agentic-product-architect, python-expert +* **Round 2** (post-codegen-and-framing additions): agentic-product-architect + (framing), python-expert (codegen mechanics), dx-expert (handler + registration UX), code-reviewer (consistency) + +Authoritative through D14. Tracks "things deferred" for v6.1 and beyond. + +## Decisions + +### D1. Explicit `PlatformHandler` class — codegen from per-specialism Protocols + +**Decision:** generate `src/adcp/decisioning/handler.py` from the +per-specialism Protocol classes (`SalesPlatform` and the future 11) via +`scripts/generate_decisioning_handler.py`. Don't hand-write; don't +synthesize at runtime via `type()`; don't read from `_HANDLER_TOOLS` as +the primary input. + +**Rationale:** runtime synthesis breaks IDE go-to-definition, traceback +frames are unreadable, mypy types every shim as `Any`, pickling fails. +Hand-writing 25 typed shims is tedious and drifts when AdCP adds tools. +Codegen keeps the file regenerable and correct. + +**Codegen source of truth: the per-specialism Protocols**, not +`_HANDLER_TOOLS`. The Protocols already encode exactly what codegen +needs: + +* method name (`create_media_buy`) +* typed `req` annotation (`CreateMediaBuyRequest`) +* typed return (`SalesResult[CreateMediaBuySuccessResponse]` vs + `MaybeAsync[GetProductsResponse]`) +* handoff-shape signal (`SalesResult[T]` means hybrid, `MaybeAsync[T]` + means non-hybrid) + +`_HANDLER_TOOLS` reduces to "the full tool list" and adds nothing — +it's consumed only for the `register_handler_tools(...)` call the +generated module emits at import time. Use +`typing.get_type_hints(SalesPlatform.create_media_buy, localns={...})` +per method, then `get_origin` / `get_args` to peel +`MaybeAsync` / `SalesResult` / `Awaitable` wrappers. + +**Wire-shape ≠ Python-signature edge cases.** Some Protocol methods +take more than `(req, ctx)`. Example: `update_media_buy(media_buy_id, +patch, ctx)` is three positional args; the wire tool takes one JSON +object. **Codegen needs a per-method "arg-projection" lookup** for the +handful of tools where wire-shape differs from Python-method-shape. +Path: shim accepts `params: UpdateMediaBuyRequest`, dispatch helper +splits to `(media_buy_id, patch, ctx)` before calling the platform +method. Preserve the Protocol surface as adopters see it. + +**Wire-name → Python-name mapping.** Add a `_WIRE_TO_PYTHON: dict[str, +str]` constant in the generator, default identity. Generator MUST fail +loudly if a wire name isn't a valid Python identifier (the `si_*` +namespace pattern is already non-uniform; this catches future drift). + +**Shim return type: Success response only.** Drop the +`| dict[str, Any]` fallback. Wire-envelope projection (TaskHandoff → +`Submitted` envelope, AdcpError → structured-error envelope) happens +in dispatch AFTER the shim returns. The shim signature is +`-> CreateMediaBuySuccessResponse`, full stop. Cleaner public API, +better IDE completion, no defeated typing. + +**Generator must fail-fast at codegen time** if a Protocol method +references a Pydantic Request type that doesn't exist in `adcp.types`. +Don't emit `params: Any` fallback — refuse to generate. CI regen-drift +catches contributor errors AFTER push; codegen-time fail-fast catches +them before commit. + +**`get_adcp_capabilities` is a hand-templated special case** in the +codegen script — it's not a generic shim because it reads +`self._platform.capabilities` rather than delegating. Generated +alongside the generic shim template. + +**`register_handler_tools` call** emitted at module level by codegen, +using `_HANDLER_TOOLS` as the source for the union of tool names +`PlatformHandler` covers (since it covers all specialisms). + +**Generator output is `ruff format`'ed** post-emit (mirrors +`scripts/generate_registry_types.py:196`); also run `ruff check --fix` +for `from __future__ import annotations` ordering and unused-import +cleanup. Don't add to black-exclude — generated-but-formatted is +reviewer-friendly. + +**Header comment is prescriptive**, not just timestamp: + +``` +# DO NOT EDIT — regenerated from scripts/generate_decisioning_handler.py +# Run `python scripts/sync_schemas.py` (or the explicit codegen step) +# after modifying _HANDLER_TOOLS, adcp.types, or any specialism Protocol. +# Source: src/adcp/decisioning/specialisms/{sales,...}.py +``` + +**Wire generator into the build pipeline AFTER `generate_types.py`**, +NOT inside `scripts/sync_schemas.py`. The `sync_schemas.py` script +only fetches the protocol bundle; `generate_types.py` produces the +Pydantic types the codegen depends on. Add the new step as +`scripts/generate_decisioning_handler.py`, called after Pydantic regen +in whatever invocation glue the project uses (`Makefile` / +pre-commit / etc.). + +**CI regen-drift check** mirrors `tests/test_mcp_schema_drift.py` +(483-line precedent — reuse the regen-into-tempdir + textual-diff +helper). Diff-and-fail, NOT auto-write. Auto-write loses the explicit +commit signal. One combined check is fine — drift in any artifact is +equally a problem. + +**Don't make `PlatformHandler` generic over `TMeta`.** Concrete base +typed as `DecisioningPlatform`; method bodies cast/narrow as needed. +Generic-over-`TMeta` complicates codegen for no DX win. + +**Shim shape:** + +```python +class PlatformHandler(ADCPHandler): + def __init__(self, platform: DecisioningPlatform) -> None: + super().__init__() + self._platform = platform + + async def create_media_buy( + self, params: CreateMediaBuyRequest, context: ToolContext | None = None, + ) -> CreateMediaBuySuccessResponse | dict[str, Any]: + return await _invoke_platform_method( + self._platform, "create_media_buy", params, context, + ) +``` + +Per-method `params` is the typed Pydantic class (not `dict`) so the +framework's `create_tool_caller` path validates inbound JSON against the +typed model before the shim runs. Adopters who want the typed +`RequestContext[TMeta]` get it via `assert isinstance(context, RequestContext)` +inside their platform method body — the runtime check is cheap and +narrows for mypy on adopter side. + +### D2. Context mutation, not replacement + +**Decision:** mutate the existing `RequestContext` in place. Don't try to +swap context objects through `call_next`. + +**Rationale:** the framework's `_dispatch_with_middleware` +(`serve.py:223-260`) closes over a single `context` and forwards it via +`call_next` which takes zero args. The framework explicitly rejects +context replacement (comment at `serve.py:111` "Middleware cannot mutate +what the next layer sees by mutating params"). The supported pattern — +seen in `helpers.py:268-336 resolve_account_into_context` — is in-place +mutation. + +**Wiring:** + +1. `adcp.decisioning.serve` passes `context_factory=lambda req_meta: RequestContext()` + to `adcp.server.serve`. Per-call the framework calls the factory and + gets a `RequestContext` instance (a `ToolContext` subclass) instead of + a plain `ToolContext`. +2. `decisioning_dispatch_middleware` mutates fields on the existing + context: `context.account = resolved`, + `context.caller_identity = resolved.id`, `context.auth_info = auth`, + `context.now = datetime.now(...)`. +3. `call_next()` runs the rest of the middleware chain + handler shim. + The shim and the platform method see the populated `RequestContext`. +4. Inside the shim: `assert isinstance(context, RequestContext)` for + mypy narrowing; pass to platform method. + +### D3. Method discovery — reuse `_is_method_overridden` + +**Decision:** reuse `mcp_tools.py:1336 _is_method_overridden`. Add +`DecisioningPlatform` and the per-specialism Protocol class names to the +existing `_SDK_BASE_CLASS_NAMES` set so the helper recognizes them as +"base, not override" sources. + +**Rationale:** `hasattr` matches inherited Protocol stubs (returning +`...`) and silently passes validation for classes that didn't actually +implement a required method. The existing helper does +`__func__`-identity comparison against the SDK base set — exactly the +right check. + +**Validation walk** (in `validate_platform`): + +```python +def validate_platform(platform: DecisioningPlatform) -> None: + missing: list[tuple[str, str]] = [] + for specialism in platform.capabilities.specialisms: + for method_name in REQUIRED_METHODS_PER_SPECIALISM[specialism]: + if not _is_method_overridden(platform, method_name): + missing.append((specialism, method_name)) + if missing: + raise AdcpError( + "INVALID_REQUEST", + message=( + "DecisioningPlatform claims specialisms but is missing " + f"required methods: {missing}. Implement on your subclass " + "or remove the specialism from capabilities." + ), + recovery="terminal", + details={"missing": [{"specialism": s, "method": m} for s, m in missing]}, + ) +``` + +### D4. `_HANDLER_TOOLS` allowlist — `advertised_tools` class attribute + public registration seam + +**Decision:** ship two complementary surfaces: + +1. **`ADCPHandler.__init_subclass__` reads an `advertised_tools: set[str]` + class attribute and auto-registers** if set. This is the path codegen + emits for `PlatformHandler`; it's what coding agents will reach for + without prompting. +2. **`register_handler_tools(handler_name: str, tools: set[str]) -> None`** + stays as the explicit escape hatch for adopters who can't (or + won't) set the class attribute (e.g., they construct the handler + class dynamically). + +Plus a third change: **`adcp.server.serve()` emits a `UserWarning` at +boot** when an `ADCPHandler` subclass isn't in `_HANDLER_TOOLS`, doesn't +set `advertised_tools`, and doesn't have `advertise_all=True` set. The +silent-fallback (today's behavior at `mcp_tools.py:1466`'s `else` +branch) is the load-bearing DX bug. + +**`register_handler_tools` semantics:** +* Re-registering the same `(handler_name, tools)` set: no-op + DEBUG + log +* Conflicting re-registration (same name, different tools): + `ValueError` with both sets in the message +* Unknown tool names: `ValueError` at registration time with the + closest-match suggestion (mirrors the init-time assertion at + `mcp_tools.py:1027-1029`): + ``` + ValueError: register_handler_tools("MyAgent", ...) references unknown + tool 'fake_tool_name'. Did you mean 'sync_creatives'? Valid tool + names: see adcp.types.ADCP_TOOL_DEFINITIONS. + ``` + +**Frame as a `PlatformHandler` enabler, not a "general framework +feature."** Both reviewer passes pushed back on the original framing. +Searching the codebase confirms: every `class … (ADCPHandler)` outside +tests is in `examples/`, and every one uses a built-in handler base or +accepts the discovery-only fallback. There is **no GitHub issue, no +adopter pattern, no sample code that motivates "general framework +feature."** The honest framing: "this is the registration mechanism +`PlatformHandler` uses; it happens to be a clean public seam for the +narrow case of custom `ADCPHandler` subclasses that implement a +non-standard tool subset." + +**Documentation placement** — extend the existing `tools/list reflects +your overrides` paragraph at `docs/handler-authoring.md:47-56`. Don't +add a new top-level section near the bottom; it'll be missed. Lead the +new prose with: "*You probably don't need this.* If you inherit from +a framework handler class (`SalesHandler`, `GovernanceHandler`, etc.), +tool filtering is already correct. Read on only if you're writing a +custom `ADCPHandler` subclass that implements a non-standard subset +of tools." + +**Worked example:** a hypothetical `ReadOnlyAnalyticsHandler(ADCPHandler)` +implementing only `get_media_buy_delivery` + `get_media_buys` — the +minimum case that demonstrates value via subset-of-existing-spec, not +"composition of two specialisms" (which reads as "this is composition, +why would I need a registration call"). + +**Add to `docs/handler-authoring.md` "What not to build" (line 817):** +"Don't pass `advertise_all=True` as a workaround for missing +registration." Today `advertise_all` is positioned as a legitimate +escape hatch and adopters reach for it; this stops that. + +**Decisioning's use:** codegen emits +`class PlatformHandler(ADCPHandler): advertised_tools = {…}` — +`__init_subclass__` registers automatically at import time. Per- +instance, the framework's existing `_is_method_overridden` filter +then trims to the methods the platform actually overrode. + +**Each `specialisms/*.py`** exports a `TOOLS: set[str]` constant +(`SalesPlatform.TOOLS = {"get_products", "create_media_buy", ...}`). +Codegen unions these into `advertised_tools` on `PlatformHandler`. + +**Land in foundation PR? Reversed: split as a prep PR.** Both reviewer +passes recommended splitting because framework-shared code deserves a +different review lens than decisioning-specific code. ~150-line prep PR +(`__init_subclass__` + `register_handler_tools` + UserWarning + tests + +docs subsection) lets the framework-feature framing get scrutinized on +its own merits and shrinks the foundation PR's review surface. +Reviewer's exact words: "splitting *this* piece is the highest-leverage +split available because it's the one piece that touches framework- +shared code." + +### D5. Sync-method dispatch — explicit executor + contextvars + +**Decision:** allocate a `ThreadPoolExecutor` in `adcp.decisioning.serve`. +Pass it explicitly via `loop.run_in_executor(executor, ctx_snapshot.run, ...)`. +Don't `set_default_executor` (process-global side effect). + +```python +ctx_snapshot = contextvars.copy_context() +result = await loop.run_in_executor( + self._executor, + functools.partial(ctx_snapshot.run, method, req, ctx), +) +``` + +**Detection:** `asyncio.iscoroutinefunction`, not `inspect.iscoroutinefunction` +(the latter doesn't unwrap `functools.partial` until 3.12). + +### D6. TaskHandoff — `asyncio.create_task` already snapshots contextvars; sync path needs explicit copy + +**Decision:** routing detected via `asyncio.iscoroutinefunction(fn)` +only. + +* Async handoff fn (`async def`): `asyncio.create_task(_runner())`. + Don't manually `copy_context` — `create_task` does it internally + (CPython 3.7+). +* Sync handoff fn: route through `loop.run_in_executor(executor, + ctx_snapshot.run, fn, handoff_ctx)` with **explicit + `contextvars.copy_context()` snapshot at the dispatch site** (D5 + pattern). Without the explicit snapshot, the sync body loses the + request's tracing IDs / tenant IDs. +* **`Awaitable`-returning sync callables (coroutine factories not + declared `async def`) are unsupported** and rejected at registration + time. Adopters who want this either declare `async def` or wrap + manually. Document explicitly to avoid the silent-routing bug. + +```python +async def _project_handoff(handoff: TaskHandoff[T], ctx: RequestContext, registry, executor) -> dict: + task_id = await registry.issue(account_id=ctx.account.id, skill_name=ctx._skill) + handoff_ctx = TaskHandoffContext(id=task_id, _registry=registry) + + if asyncio.iscoroutinefunction(handoff._fn): + # create_task copies contextvars internally; the background + # task sees the request's tracing IDs / tenant ID for free. + asyncio.create_task(_run_handoff_async(handoff._fn, handoff_ctx, registry, task_id)) + else: + # run_in_executor does NOT snapshot contextvars — capture explicitly. + ctx_snapshot = contextvars.copy_context() + loop = asyncio.get_running_loop() + asyncio.create_task(_run_handoff_sync_via_executor( + handoff._fn, handoff_ctx, registry, task_id, executor, ctx_snapshot, loop, + )) + + return {"task_id": task_id, "status": "submitted", "task_type": ctx._skill, ...} +``` + +`_run_handoff_sync_via_executor` body: + +```python +async def _run_handoff_sync_via_executor(fn, handoff_ctx, registry, task_id, executor, ctx_snapshot, loop): + try: + result = await loop.run_in_executor( + executor, functools.partial(ctx_snapshot.run, fn, handoff_ctx), + ) + await registry.complete(task_id, result=_serialize(result)) + except AdcpError as e: + await registry.fail(task_id, error=e.to_wire()) + except Exception as e: + await registry.fail(task_id, error={"code": "INTERNAL_ERROR", "message": str(e), "recovery": "terminal"}) +``` + +### D7. TaskHandoff in scope — `InMemoryTaskRegistry` stub with pinned shape contracts + +**Decision:** ship the `TaskRegistry` Protocol + an +`InMemoryTaskRegistry` stub (~100 lines) in the foundation PR. Don't +defer to v6.1. + +**Rationale:** `SalesPlatform.create_media_buy` returns `SalesResult[T]` +— if `TaskHandoff` raises `NotImplementedError` on first use, the +hybrid headline feature is broken on day one. + +**Pinned Protocol shape** (all five methods carry contract docstrings, +not just types): + +```python +class TaskRegistry(Protocol): + async def issue(self, *, account_id: str, skill_name: str) -> str: + """Allocate a new task_id, persist `(account_id, skill_name, + status='submitted', created_at=now)`. Return the task_id.""" + + async def update( + self, task_id: str, *, status: str, progress: dict[str, Any] | None = None, + ) -> None: + """Transition the task. ``status`` is from + ``schemas/cache/3.0.0/enums/task-status.json``. ``progress`` is + adopter-defined JSON the buyer can poll via ``tasks/get``.""" + + async def complete(self, task_id: str, *, result: dict[str, Any]) -> None: + """Mark terminal-success. ``result`` MUST be the JSON-serialized + spec response payload (e.g., + ``CreateMediaBuySuccessResponse.model_dump(mode='json')``). + Buyer's ``tasks/get`` returns this verbatim.""" + + async def fail(self, task_id: str, *, error: dict[str, Any]) -> None: + """Mark terminal-failure. ``error`` MUST be the + ``AdcpError.to_wire()`` shape: + ``{code, message, recovery, [field], [suggestion], + [retry_after], [details]}``.""" + + async def get( + self, task_id: str, *, account_id: str, + ) -> dict[str, Any] | None: + """Account-scoped lookup. Cross-tenant probes (probing a + task_id that doesn't belong to the requesting account) + MUST return None, not raise. Returned shape: + ``{task_id, account_id, skill_name, status, progress, result, + error, created_at, updated_at, completed_at}``. + Missing fields are JSON-null; ``progress`` is the most-recent + update; ``result`` is set only when ``status == 'completed'``; + ``error`` is set only when ``status in {'failed', 'rejected'}``.""" +``` + +**`InMemoryTaskRegistry`** stores rows in a `dict[str, TaskRecord]`, +keyed by `task_id`. `get(task_id, account_id)` returns None when the +row's `account_id` doesn't match (account-scoped invariant). Lost on +process restart. + +Document loudly: "in-memory; lost on restart; production deployments +swap in `SqlAlchemyTaskRegistry` (v6.1)." + +### D8. Public API — both `serve()` wrapper and seam + +**Decision:** export both `adcp.decisioning.serve(platform, ...)` (wrapper) +and `adcp.decisioning.create_adcp_server_from_platform(platform) -> (handler, middleware, context_factory)` (seam). + +**Rationale:** wrapper covers 90% of adopters; seam is required for +adopter middleware composition + test ergonomics. Wrapper docstring +points at the seam for advanced cases. + +### D9. `caller_identity = account.id` — semantic shift acknowledged + +**Decision:** + +- Set `context.caller_identity = account.id` in dispatch middleware. + This is the layering correction — idempotency cache scopes per + resolved account, not per raw auth principal. +- Document the invariant on `Account.id`: "MUST be unique across the + deployment's full account-resolution surface; collisions silently + leak responses across accounts." +- Set `context.metadata["adcp_decisioning.auth_principal"] = auth_info.principal` + so observability middleware that wants the original auth principal + can read it. +- Log at DEBUG: `dispatched skill=%s account_id=%s caller_identity_pre=%s` + for grep-on-leak-report. + +### D10. Idempotency middleware ordering — wrapper builds correctly; runtime assert dropped + +**Decision:** `decisioning_dispatch_middleware` MUST run before any +idempotency middleware. The **wrapper-`serve()` builds the list in the +right order**; no runtime assert is needed. + +```python +def serve(platform, *, middleware=None, ...): + # Dispatch outermost — sets caller_identity = account.id BEFORE + # idempotency reads it for cache scoping. + composed = [decisioning_dispatch_middleware(platform)] + if middleware: + composed.extend(middleware) + adcp.server.serve(handler, middleware=composed, ...) +``` + +**Earlier draft tried to runtime-assert ordering** when adopters pass +their own composed list to `create_adcp_server_from_platform` (the +seam). The assertion logic was buggy +(`composed[len(composed):]` slices end-of-list — always empty) and +fixing it adds runtime cost for a deploy-time bug. **Drop the runtime +assert.** Document the ordering invariant on +`create_adcp_server_from_platform` instead: "the returned middleware +list MUST run outermost in your composed serve(middleware=...) list, +or idempotency cache scoping breaks." Adopters using the wrapper +(`adcp.decisioning.serve`) get the right order automatically; adopters +using the seam read the docs. + +### D11. `__init_subclass__` — fail-fast on missing `accounts`/`capabilities` + +**Decision:** + +```python +class DecisioningPlatform: + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) + if "capabilities" not in cls.__dict__: + raise TypeError( + f"{cls.__name__} must declare a `capabilities = DecisioningCapabilities(...)` " + "attribute. See adcp.decisioning.DecisioningCapabilities." + ) + if "accounts" not in cls.__dict__ or cls.accounts is None: + raise TypeError( + f"{cls.__name__} must declare an `accounts = ...` attribute " + "(SingletonAccounts, ExplicitAccounts, FromAuthAccounts, or a " + "custom AccountStore impl). See adcp.decisioning.AccountStore." + ) +``` + +Fail-fast at class-definition time beats fail-mysteriously at first +request. + +**Pydantic-`BaseModel` MRO conflict footgun.** Add a one-line note to +the `DecisioningPlatform` docstring: "Don't inherit from Pydantic +`BaseModel`; metaclass conflicts. Use a `pydantic.dataclass` field +or wrap a `BaseModel` instance if you need validation on adopter +state." The validator only inspects `cls.__dict__` (not MRO) so it +won't trip MRO walking, but combining `BaseModel` + `DecisioningPlatform` +fails at class-creation due to incompatible metaclasses. + +### D12. `get_adcp_capabilities` — synthesized from `platform.capabilities` + +**Decision:** the `PlatformHandler` synthesizes the `get_adcp_capabilities` +response from `platform.capabilities` so adopters don't implement it. +Always-advertised per `_PROTOCOL_TOOLS`. + +```python +async def get_adcp_capabilities( + self, params: GetAdcpCapabilitiesRequest, context: ToolContext | None = None, +) -> GetAdcpCapabilitiesResponse: + caps = self._platform.capabilities + return GetAdcpCapabilitiesResponse( + adcp_version=ADCP_VERSION, + specialisms=caps.specialisms, + channels=caps.channels, + pricing_models=caps.pricing_models, + creative_agents=caps.creative_agents, + config=caps.config, + # ... whatever else the schema requires + ) +``` + +### D13. Vertical-slice example: `examples/hello_seller.py` + integration test + +**Decision:** ship a runnable single-file example +(`examples/hello_seller.py`) plus an integration test +(`tests/test_hello_seller_integration.py`) that boots the example via +ASGI transport, makes an MCP `tools/call` to a sync `get_products`, +asserts the response round-trips. Plus async-handoff path coverage and +AdcpError wire-projection coverage. + +**Rationale:** the foundation PR's value claim is "the seams compose +end-to-end." Without a working example the claim is unverified. +Integration test is the seam-composition regression guard for every +subsequent change. + +### D14. `_invoke_platform_method` contract + `REQUIRED_METHODS_PER_SPECIALISM` tolerance + +**Decision:** spell out two helpers the file plan listed without a +backing decision. + +**`_invoke_platform_method(platform, method_name, params, ctx)` contract:** + +```python +async def _invoke_platform_method( + platform: DecisioningPlatform, + method_name: str, + params: BaseModel, # the typed Pydantic request, already validated + ctx: RequestContext, +) -> BaseModel | dict[str, Any]: + """Invoke a platform method, projecting hybrid returns. + + Returns: + - A typed Pydantic response on the sync path. The caller + (the shim) returns it as-is; the framework's existing + ``model_dump`` codepath serializes to the wire. + - A dict on the TaskHandoff path: the projected ``Submitted`` + envelope ``{task_id, status, task_type, ...}`` ready for + serialization. + + Raises: + AdcpError: re-raised from the platform method body. The dispatch + middleware catches at the outer wrapper and projects to the + wire structured-error envelope. + AdcpError("INTERNAL_ERROR", recovery="terminal"): wraps any + non-AdcpError exception so the wire response never leaks a + stack trace. Adopter logs the original exception via the + framework's observability hooks. + """ + method = getattr(platform, method_name) + if asyncio.iscoroutinefunction(method): + result = await method(params, ctx) # plus arg-projection if needed (D1) + else: + ctx_snapshot = contextvars.copy_context() + loop = asyncio.get_running_loop() + result = await loop.run_in_executor( + _executor, functools.partial(ctx_snapshot.run, method, params, ctx), + ) + + if type(result) is TaskHandoff: + return await _project_handoff(result, ctx, _registry, _executor) + return result +``` + +**`REQUIRED_METHODS_PER_SPECIALISM` lookup tolerance:** + +```python +def validate_platform(platform: DecisioningPlatform) -> None: + missing: list[tuple[str, str]] = [] + for specialism in platform.capabilities.specialisms: + # Tolerate unknown specialisms (forward-compat with v6.1+ specs) + # — log at debug, don't KeyError. validate_platform's job is to + # catch missing methods for KNOWN specialisms; unknown ones get + # a pass-through with a debug log so spec evolution doesn't + # break server boot. + required = REQUIRED_METHODS_PER_SPECIALISM.get(specialism) + if required is None: + logger.debug( + "validate_platform: specialism %r is not known to this " + "framework version; skipping required-method check. " + "Upgrade adcp-server if buyers expect this specialism.", + specialism, + ) + continue + for method_name in required: + if not _is_method_overridden(platform, method_name): + missing.append((specialism, method_name)) + if missing: + raise AdcpError( + "INVALID_REQUEST", + message=( + "DecisioningPlatform claims specialisms but is missing " + f"required methods: {missing}. Implement on your subclass " + "or remove the specialism from capabilities." + ), + recovery="terminal", + details={"missing": [{"specialism": s, "method": m} for s, m in missing]}, + ) +``` + +**Rationale:** unknown specialisms shouldn't break server boot — +they just mean the deployment is on a framework version that predates +spec evolution. The buyer pays for that with a `tools/list` that +doesn't include the new specialism's tools, which is the right +fail-soft behavior. + +## File plan + +**Two PRs**, splitting the framework-shared code from the +decisioning-specific code per reviewer recommendation. + +### Prep PR: framework handler-registration seam + +| File | Lines (est) | Notes | +|---|---|---| +| `adcp/server/base.py` | +20 | `ADCPHandler.__init_subclass__` reads `advertised_tools: set[str]` class attr, calls `register_handler_tools(cls.__name__, advertised_tools)` if set. | +| `adcp/server/mcp_tools.py` | +30 | New `register_handler_tools(handler_name, tools) -> None` public seam. Idempotent on equal input, raises `ValueError` on conflicting input or unknown tool names (with closest-match suggestion). | +| `adcp/server/serve.py` | +15 | Boot-time `UserWarning` when handler subclass isn't in `_HANDLER_TOOLS`, has no `advertised_tools`, and no `advertise_all=True`. Closes the silent-fallback DX bug. | +| `docs/handler-authoring.md` | +30 | Subsection extending lines 47-56 for the narrow custom-`ADCPHandler`-subclass case. Worked example: `ReadOnlyAnalyticsHandler` advertising 2 of 9 sales tools. "What not to build" line 817 gains "Don't use `advertise_all=True` as a workaround for missing registration." | +| `tests/test_register_handler_tools.py` | ~80 | Idempotent re-registration; conflict detection; unknown-tool validation; `__init_subclass__` auto-registration; UserWarning on missing registration. | + +**Prep PR total:** ~175 lines. Lands as `feat(server):` (additive +public surface — minor bump). + +### Foundation PR: `adcp.decisioning.*` + +| File | Lines (est) | Notes | +|---|---|---| +| `scripts/generate_decisioning_handler.py` | ~200 | Codegen script: walks per-specialism Protocols via `typing.get_type_hints`, emits `handler.py` with typed shims. `_WIRE_TO_PYTHON` map + arg-projection for `update_media_buy`-shape tools. Fail-fast on missing Pydantic types. Post-emit `ruff format` + `ruff check --fix`. Wired AFTER `generate_types.py`, NOT inside `sync_schemas.py`. | +| `adcp/decisioning/handler.py` | ~250 (generated) | `PlatformHandler(ADCPHandler)` with one typed shim per spec tool. Hand-templated `get_adcp_capabilities` synthesis special-case. `advertised_tools = {…full union…}` class attr (auto-registered via prep-PR's `__init_subclass__`). Prescriptive `# DO NOT EDIT` header. | +| `adcp/decisioning/dispatch.py` | ~350 | `decisioning_dispatch_middleware`, `_invoke_platform_method`, `validate_platform` (with tolerant `REQUIRED_METHODS_PER_SPECIALISM.get`), executor lifecycle (allocate in `serve()`, shutdown via existing framework hook), `_project_handoff` (sync needs explicit `copy_context`; async gets it free from `create_task`). | +| `adcp/decisioning/task_registry.py` | ~150 | `TaskRegistry` Protocol with pinned shape contracts (D7) + `InMemoryTaskRegistry` stub + `TaskHandoffContext` (consumed by handoff fns; carries `id` + `update(progress)` + `heartbeat()` stub). | +| `adcp/decisioning/serve.py` | ~150 | Wrapper around `adcp.server.serve`. Builds handler + middleware + context_factory (returns `RequestContext`, NOT `ToolContext`) + executor. `create_adcp_server_from_platform` seam returns `(handler, middleware, context_factory)` 3-tuple. | +| `adcp/decisioning/specialisms/sales.py` | (existing, +10) | Add `TOOLS: set[str]` constant. | +| `adcp/decisioning/platform.py` | (existing, +25) | Add `__init_subclass__` validator (D11) + `BaseModel` MRO-conflict docstring note. | +| `examples/hello_seller.py` | ~50 | Runnable single-file example exercising the full vertical slice (D13). | +| `tests/test_decisioning_dispatch.py` | ~450 | Middleware-mutation correctness; D9 `caller_identity = account.id` + `metadata["adcp_decisioning.auth_principal"]` retains raw principal; AdcpError catch + wire projection (including from sync executor branch); TaskHandoff projection (sync + async paths); sync handoff body sees ContextVar set in request scope (D6 sync-context propagation regression); validate_platform fail-fast; tolerant unknown-specialism path; `_invoke_platform_method` contract (D14). | +| `tests/test_decisioning_task_registry.py` | ~100 | `TaskRegistry` Protocol shape; `InMemoryTaskRegistry` issue/update/complete/fail; account-scoped `get` returns None on cross-tenant probe; concurrent issue (no task_id collision). | +| `tests/test_decisioning_platform_validation.py` | ~50 | D11: platform without `capabilities` fails at class definition; platform without `accounts` fails at class definition; valid platform passes. | +| `tests/test_decisioning_capabilities_synthesis.py` | ~80 | D12 unit test: synthesized `get_adcp_capabilities` response matches `platform.capabilities` field-for-field. Cheaper than driving via integration test. | +| `tests/test_decisioning_handler_codegen.py` | ~80 | Regen-drift: regen `handler.py` into tempdir, `git diff --exit-code`. Mirrors `tests/test_mcp_schema_drift.py` pattern. Codegen-time fail-fast on missing Pydantic Request type. | +| `tests/test_hello_seller_integration.py` | ~150 | End-to-end: boot example via ASGI, MCP `tools/call` hits sync `get_products`, response round-trips. Async handoff path: `create_media_buy` returns `TaskHandoff`, Submitted envelope serializes correctly, registry has the task. AdcpError path: hostile budget rejected with structured-error envelope. | + +**Foundation PR total:** ~2100 lines (~250 generated, ~700 tests). +After prep PR + this lands: ~3500 lines on top of 1500-line foundation +skeleton already committed. + +## Things deferred (track separately) + +- **`SqlAlchemyTaskRegistry`** — v6.1; replaces `InMemoryTaskRegistry` + without changing dispatch. Track in foundation-audit follow-ups. +- **`A2aTaskDelivery` for A2A buyers** — currently TaskHandoff projects + to MCP `Submitted` envelope only. A2A delivery wraps the same payload + in `Task` + `TaskStatusUpdateEvent`. Add when first A2A adopter needs + it; same Protocol surface. +- **`tenant_registry`** — multi-tenant primitive composing + `serve(factory=...)`. Out of foundation-PR scope; v6.1. +- **`status_changes` (DbBackedStatusChangeBus)** — adopters with + audit-relevant status transitions need this; in-memory bus ships + with foundation, durable bus is v6.1. +- **`delivery` module** — McpWebhookDelivery + A2aTaskDelivery composed + on top of `adcp.webhook_sender` + `adcp.server.a2a_server`. Add when + TaskHandoff projection moves from in-memory stub to real persistence. +- **Other 11 specialism Protocols** — only `SalesPlatform` ships in the + foundation PR (the vertical slice); others template-and-fan-out + after merge. +- **Hand-written → codegen for `PlatformHandler` shims** — ~600 lines + is auto-generatable from `_HANDLER_TOOLS` + spec response types. + Hand-written for foundation; codegen task is a separate PR. + +## Round-2 review changelog + +Items the round-2 reviewers (agentic-product, python-expert, dx-expert, +code-reviewer) revised or strengthened from the round-1 design: + +* **D1 codegen source-of-truth changed** from `_HANDLER_TOOLS` to + per-specialism Protocols (Protocols carry typed Pydantic Request + + return-shape signal; `_HANDLER_TOOLS` reduces to "every spec tool" + and adds nothing). +* **D1 wire-shape ≠ Python-signature edge case** added (e.g. + `update_media_buy(media_buy_id, patch, ctx)`); arg-projection lookup + required. +* **D1 shim return type narrowed** to Success-only; dropped + `| dict[str, Any]` fallback. Wire projection happens in dispatch + AFTER shim returns. +* **D1 codegen pipeline ordering corrected** — runs after + `generate_types.py`, NOT inside `sync_schemas.py`. Doc previously + conflated the two. +* **D1 generator must fail-fast** on missing Pydantic Request type + (don't emit `Any` fallback). +* **D1 prescriptive header comment** required (not just timestamp); + CI uses `tests/test_mcp_schema_drift.py` precedent. +* **D4 framing reversed** from "general framework feature" to + "`PlatformHandler` enabler that happens to be a clean public seam." + Search of the codebase: every existing custom `ADCPHandler` subclass + is in `examples/`; none would benefit from the seam beyond what + `advertise_all=True` provides today. Don't oversell the framing. +* **D4 surface upgraded** with `ADCPHandler.__init_subclass__` reading + `advertised_tools: set[str]` class attribute — codegen emits this + on `PlatformHandler`; coding agents will pattern-match the + registration without prompting. Explicit + `register_handler_tools(...)` call stays as the escape hatch. +* **D4 `serve()` UserWarning** at boot when handler subclass is in + none of `_HANDLER_TOOLS` / `advertised_tools` / `advertise_all=True`. + Closes the silent-fallback discoverability gap (today's + `mcp_tools.py:1466` else branch). +* **D4 doc placement corrected** — extends the existing `tools/list + reflects your overrides` paragraph at `handler-authoring.md:47-56`, + NOT a new top-level section. +* **D4 split into prep PR** (reversed from the "land in foundation" + call). Framework-shared code deserves a different review lens than + decisioning-specific code; the prep PR is ~175 lines and lets the + framework-feature framing get scrutinized on its own merits. +* **D6 `Awaitable`-returning sync callable case** added — coroutine + factories not declared `async def` are unsupported and rejected at + registration; document explicitly. +* **D6 sync-handoff contextvars** require explicit + `contextvars.copy_context()` snapshot at the dispatch site + (`run_in_executor` does NOT auto-snapshot, unlike `to_thread`). +* **D7 TaskRegistry Protocol shape pinned** — every method has a + contract docstring spelling out arg/return types and account-scoping + invariants. `complete(result)` MUST be JSON-serialized spec response; + `fail(error)` MUST be `AdcpError.to_wire()` shape; cross-tenant `get` + returns None. +* **D10 broken assertion dropped** — runtime ordering check had a slice + bug (`composed[len(composed):]` is always empty); the wrapper builds + the right order anyway. Document the invariant on + `create_adcp_server_from_platform` instead. +* **D11 `BaseModel` MRO conflict footgun** documented — adopters can't + inherit from both `DecisioningPlatform` and Pydantic `BaseModel`. +* **D13 added** — vertical-slice example + integration test as + first-class deliverables (previously implicit in the file plan). +* **D14 added** — `_invoke_platform_method` contract pinned; + `REQUIRED_METHODS_PER_SPECIALISM.get(s, set())` made tolerant of + unknown specialisms (forward-compat with v6.1+ specs). +* **File plan split** into prep PR + foundation PR. Total grew from + ~1900 to ~2275 lines (extra tests for round-2-surfaced cases). From 4069dc74505c56d1c2264fbc0a1aa0d04d645f3c Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Wed, 29 Apr 2026 23:15:28 -0400 Subject: [PATCH 03/18] docs(decisioning): apply round-3 user feedback to dispatch design Round-3 review on PR #316 surfaced eight items, all resolved in-place on D1 / D5 / D9 / D13 / D14 plus added cross-tenant + arg-projection regression tests in the file plan. Highlights: - D9: cache scope key composed as (account_store qualname, account.id) for structural cross-store isolation instead of relying on adopter Account.id-uniqueness discipline. RequestContext.auth_principal added as a typed attribute (caller_identity now correctly names the cache scope key, not the auth principal). - D14: unknown specialisms emit UserWarning at boot (not DEBUG) so typos like sales-non-guarateed surface in CI without breaking v6.1+ forward-compat tolerance. - D1: drift error message names the regen command verbatim; arg-projection emits explicit kwargs (not **unpack) so Pydantic field renames trip a NameError at codegen time. - D5: serve() exposes executor= / thread_pool_size= knobs (mutually exclusive) with a documented default of min(32, cpu+4) and thread_name_prefix; framework owns lifecycle for default pools, operator owns lifecycle for BYO. - D13: examples split into hello_seller.py (sync) and hello_seller_async_handoff.py (hybrid + AdcpError round-trip). - File plan: added test_decisioning_task_registry_cross_tenant.py hostile-probe regression and test_hello_seller_async_handoff_integration.py; extended dispatch test to cover composite caller_identity, auth_principal, UserWarning, kwargs path. Foundation total ~2475 lines. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../decisioning-platform-dispatch-design.md | 342 ++++++++++++++++-- 1 file changed, 303 insertions(+), 39 deletions(-) diff --git a/docs/proposals/decisioning-platform-dispatch-design.md b/docs/proposals/decisioning-platform-dispatch-design.md index 9077f2dcb..5d30e5339 100644 --- a/docs/proposals/decisioning-platform-dispatch-design.md +++ b/docs/proposals/decisioning-platform-dispatch-design.md @@ -1,12 +1,16 @@ # DecisioningPlatform dispatch design (post-review) Pre-implementation reference for the `adcp.decisioning.{handler, dispatch, -serve, task_registry}` modules. Synthesizes 6 reviewer passes: +serve, task_registry}` modules. Synthesizes 7 reviewer passes: * **Round 1** (initial design): agentic-product-architect, python-expert * **Round 2** (post-codegen-and-framing additions): agentic-product-architect (framing), python-expert (codegen mechanics), dx-expert (handler registration UX), code-reviewer (consistency) +* **Round 3** (post-design-doc-published, on PR #316): user feedback on + Account.id leak boundary, cross-tenant probe regression, validation + noise, codegen DX, executor configurability, field-name semantics, + example coverage, kwarg unpacking Authoritative through D14. Tracks "things deferred" for v6.1 and beyond. @@ -52,6 +56,34 @@ Path: shim accepts `params: UpdateMediaBuyRequest`, dispatch helper splits to `(media_buy_id, patch, ctx)` before calling the platform method. Preserve the Protocol surface as adopters see it. +**Arg-projection MUST emit explicit kwargs, not positional**, so +adopters refactoring Protocol method signatures don't silently break +the shim. The codegen produces: + +```python +# Generated arg-projection lookup — kwargs only +ARG_PROJECTION: dict[str, Callable[[BaseModel], dict[str, Any]]] = { + "update_media_buy": lambda req: { + "media_buy_id": req.media_buy_id, + "patch": req, # the full request minus media_buy_id, modeled per spec + }, + # ... other arg-projecting tools +} + +# Inside _invoke_platform_method: +projector = ARG_PROJECTION.get(method_name) +if projector is not None: + method_kwargs = projector(params) + method_kwargs["ctx"] = ctx + result = await _call(method, **method_kwargs) +else: + result = await _call(method, params, ctx) +``` + +If an adopter refactors `update_media_buy(self, media_buy_id, patch, +ctx)` to `(self, *, media_buy_id, patch, ctx)`, the kwargs path keeps +working; positional dispatch would silently break. + **Wire-name → Python-name mapping.** Add a `_WIRE_TO_PYTHON: dict[str, str]` constant in the generator, default identity. Generator MUST fail loudly if a wire name isn't a valid Python identifier (the `si_*` @@ -108,6 +140,23 @@ helper). Diff-and-fail, NOT auto-write. Auto-write loses the explicit commit signal. One combined check is fine — drift in any artifact is equally a problem. +**Drift error message MUST be prescriptive.** A generic +`git diff --exit-code` failure forces every contributor to learn the +regen story from scratch. The pytest assertion message names the +exact regen command verbatim: + +``` +AssertionError: src/adcp/decisioning/handler.py is out of sync with the +per-specialism Protocols. Run: + + uv run python scripts/generate_decisioning_handler.py + +then commit the result. Drift detected in: +``` + +Mirror the precedent at `tests/test_mcp_schema_drift.py` (which uses +the same prescriptive shape). + **Don't make `PlatformHandler` generic over `TMeta`.** Concrete base typed as `DecisioningPlatform`; method bodies cast/narrow as needed. Generic-over-`TMeta` complicates codegen for no DX win. @@ -282,7 +331,7 @@ Reviewer's exact words: "splitting *this* piece is the highest-leverage split available because it's the one piece that touches framework- shared code." -### D5. Sync-method dispatch — explicit executor + contextvars +### D5. Sync-method dispatch — explicit executor + contextvars + configurable **Decision:** allocate a `ThreadPoolExecutor` in `adcp.decisioning.serve`. Pass it explicitly via `loop.run_in_executor(executor, ctx_snapshot.run, ...)`. @@ -299,6 +348,45 @@ result = await loop.run_in_executor( **Detection:** `asyncio.iscoroutinefunction`, not `inspect.iscoroutinefunction` (the latter doesn't unwrap `functools.partial` until 3.12). +**Configurable on `serve()` — three knobs, mutually exclusive:** + +```python +def serve( + platform: DecisioningPlatform, + *, + executor: ThreadPoolExecutor | None = None, # custom executor (operator escape hatch) + thread_pool_size: int | None = None, # size the default executor + # ... other kwargs +) -> None: + if executor is not None and thread_pool_size is not None: + raise ValueError( + "Pass either executor= or thread_pool_size=, not both. " + "thread_pool_size sizes the default executor; executor= is for " + "operators who need a vetted threadpool (e.g., audit-instrumented)." + ) + if executor is None: + # Default: min(32, cpu+4) — fine for hello-world, surprises adopters + # under load. thread_pool_size= bumps the ceiling for high-fanout + # sync deployments (salesagent's Flask + sync DB drivers profile). + size = thread_pool_size if thread_pool_size is not None else min(32, (os.cpu_count() or 1) + 4) + executor = ThreadPoolExecutor(max_workers=size, thread_name_prefix="adcp-decisioning") + # ... wire executor into dispatch middleware +``` + +**Default surprises adopters under load.** `ThreadPoolExecutor()` with +no args defaults to `min(32, cpu+4)` per Python 3.13 stdlib. That's +fine for local dev / hello-world; production deployments running +salesagent-style sync DB drivers will saturate the pool quickly. +Document on `thread_pool_size`: "Bump for high-fanout sync deployments +(SQLAlchemy + Flask + per-request sessions). For async-everywhere +deployments, the default is fine." + +**Lifecycle:** `executor.shutdown(wait=True)` registered via the +existing framework shutdown hook so it cleans up on graceful exit. +Operator-supplied executors are NOT shut down by the framework — the +operator owns the lifecycle on their side (matches the +`WebhookSender(client=...)` operator-trust contract from PR #297). + ### D6. TaskHandoff — `asyncio.create_task` already snapshots contextvars; sync path needs explicit copy **Decision:** routing detected via `asyncio.iscoroutinefunction(fn)` @@ -420,21 +508,81 @@ and `adcp.decisioning.create_adcp_server_from_platform(platform) -> (handler, mi adopter middleware composition + test ergonomics. Wrapper docstring points at the seam for advanced cases. -### D9. `caller_identity = account.id` — semantic shift acknowledged +### D9. Account-scoped cache key — structural isolation, not adopter discipline -**Decision:** +**Decision:** stop treating `Account.id` uniqueness as adopter +responsibility. The failure mode is silent cross-tenant data leakage +through the idempotency cache; documentation alone is too hands-off +for a security boundary. -- Set `context.caller_identity = account.id` in dispatch middleware. - This is the layering correction — idempotency cache scopes per - resolved account, not per raw auth principal. -- Document the invariant on `Account.id`: "MUST be unique across the - deployment's full account-resolution surface; collisions silently - leak responses across accounts." -- Set `context.metadata["adcp_decisioning.auth_principal"] = auth_info.principal` - so observability middleware that wants the original auth principal - can read it. -- Log at DEBUG: `dispatched skill=%s account_id=%s caller_identity_pre=%s` - for grep-on-leak-report. +**Compose the cache scope key from `(account_store qualname, +account.id)`**, not `account.id` alone. Two adopters using different +`AccountStore` impls — or the same impl with colliding `account.id` +values across deployments sharing infra — cannot cross-leak through +the framework's cache. + +```python +# Inside decisioning_dispatch_middleware: +account = await _maybe_await(platform.accounts.resolve(ref, auth_info)) +store_qualname = type(platform.accounts).__qualname__ +context.caller_identity = f"{store_qualname}:{account.id}" +context.account = account # typed access (D2) +context.auth_principal = auth_info.principal if auth_info else None +context.metadata["adcp_decisioning.auth_principal"] = auth_info.principal if auth_info else None +context.metadata["adcp_decisioning.account_store"] = store_qualname +``` + +**What this prevents:** + +* Cross-store leakage: `SingletonAccounts(account_id="hello")` + resolving to `account.id="hello:buyer-a"` and `ExplicitAccounts` + resolving (via a buggy loader) to `account.id="hello:buyer-a"` + produce different scope keys (`SingletonAccounts:hello:buyer-a` + vs `ExplicitAccounts:hello:buyer-a`). Cache hits cannot cross. +* Within-store collision (one adopter, identical `account.id` for + two distinct accounts) is still an adopter bug at + `AccountStore.resolve`. The framework can't structurally prevent + this case without a runtime registry that costs more than it buys. + +**Why not a runtime uniqueness registry:** distributed registries are +hard to implement correctly across processes, require coordination, +and don't help when the same store class is used by cooperating +processes with different account spaces. The composite scope key +gets the same protection at zero coordination cost. + +**Belt-and-suspenders defense in depth:** + +* `Account.id` docstring: "MUST be unique within the adopter's + deployment surface. Best practice: prefix with a deployment-stable + namespace (`f'acme-prod-{tenant_id}'`) rather than raw tenant + slugs. The framework composes the idempotency cache scope key as + `(AccountStore.__qualname__, account.id)`, so cross-store + collisions are structurally blocked; within-store collisions are + the adopter's responsibility." +* DEBUG log line on every dispatch: + `dispatched skill=%s scope_key=%s account_store=%s`. Operators + investigating a leak report grep across account-store boundaries. + +**Field-name clarification (round-3 concern that `caller_identity` +now misleads):** `caller_identity` carries the composite scope key +(framework-internal, read by `IdempotencyStore`). Adopter platform +methods that want the auth principal read **`ctx.auth_principal`** +(typed `str | None` attribute on `RequestContext`); adopter +middleware that consumes the raw `ToolContext` reads +`ctx.metadata["adcp_decisioning.auth_principal"]` (string key for +non-decisioning code paths). + +**`RequestContext` schema gains `auth_principal`** as a typed +attribute alongside `account: Account[TMeta]`: + +```python +@dataclass +class RequestContext(ToolContext, Generic[TMeta]): + account: Account[TMeta] = field(default_factory=lambda: Account(id="")) + auth_info: AuthInfo | None = None + auth_principal: str | None = None # ← NEW: typed access for adopter methods + now: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) +``` ### D10. Idempotency middleware ordering — wrapper builds correctly; runtime assert dropped @@ -518,19 +666,48 @@ async def get_adcp_capabilities( ) ``` -### D13. Vertical-slice example: `examples/hello_seller.py` + integration test +### D13. Vertical-slice examples: two runnable files + integration tests + +**Decision:** ship **two** runnable single-file examples plus matching +integration tests. The TaskHandoff projection (D6) is the most novel +piece of the foundation and the highest-risk for adopter +mis-implementation; covering it via a single integration test inside +`hello_seller.py` is too thin a guard. + +**`examples/hello_seller.py`** — sync flow only. Demonstrates: + +* `DecisioningPlatform` subclass with `capabilities` + `accounts` +* `get_products` sync read returning typed `GetProductsResponse` +* `create_media_buy` sync success returning typed + `CreateMediaBuySuccessResponse` +* `serve()` boot -**Decision:** ship a runnable single-file example -(`examples/hello_seller.py`) plus an integration test -(`tests/test_hello_seller_integration.py`) that boots the example via -ASGI transport, makes an MCP `tools/call` to a sync `get_products`, -asserts the response round-trips. Plus async-handoff path coverage and -AdcpError wire-projection coverage. +**`examples/hello_seller_async_handoff.py`** — hybrid flow. +Demonstrates: + +* `create_media_buy` returns `ctx.handoff_to_task(self._review)` for + unfamiliar buyers, sync success for pre-approved +* The `_review` async handoff fn updates progress mid-flight, then + completes +* Buyer can poll `tasks/get` and see `Submitted` → `Working` → + `Completed` lifecycle +* `AdcpError` raise from inside the platform method gets projected to + the wire `adcp_error` envelope + +**Two integration tests:** + +* `tests/test_hello_seller_integration.py` — boots + `hello_seller.py` via ASGI, MCP `tools/call` round-trip +* `tests/test_hello_seller_async_handoff_integration.py` — boots the + handoff example, exercises the full `Submitted` envelope + serialization, registry has the task, terminal-completion path + surfaces via `tasks/get` **Rationale:** the foundation PR's value claim is "the seams compose -end-to-end." Without a working example the claim is unverified. -Integration test is the seam-composition regression guard for every -subsequent change. +end-to-end." Without working examples the claim is unverified. Two +examples instead of one because TaskHandoff is the headline novel +feature; one example exercising both sync + handoff would mix +concerns and be harder for adopters to read as a template. ### D14. `_invoke_platform_method` contract + `REQUIRED_METHODS_PER_SPECIALISM` tolerance @@ -587,17 +764,21 @@ def validate_platform(platform: DecisioningPlatform) -> None: missing: list[tuple[str, str]] = [] for specialism in platform.capabilities.specialisms: # Tolerate unknown specialisms (forward-compat with v6.1+ specs) - # — log at debug, don't KeyError. validate_platform's job is to - # catch missing methods for KNOWN specialisms; unknown ones get - # a pass-through with a debug log so spec evolution doesn't - # break server boot. + # — but UserWarning, not DEBUG. A typo like "sales-non-guarateed" + # (missing 'n') silently disables required-method checking + # otherwise. UserWarning gets the forward-compat benefit AND + # catches typos at server boot. Same severity as the + # missing-handler-registration UserWarning in D4. required = REQUIRED_METHODS_PER_SPECIALISM.get(specialism) if required is None: - logger.debug( - "validate_platform: specialism %r is not known to this " - "framework version; skipping required-method check. " - "Upgrade adcp-server if buyers expect this specialism.", - specialism, + warnings.warn( + f"DecisioningPlatform claims unknown specialism {specialism!r}. " + "Either this is a typo (compare against the AdCP 3.0 specialism " + f"enum: {sorted(REQUIRED_METHODS_PER_SPECIALISM.keys())}), " + "or your framework version predates the spec. Required-method " + "validation is skipped for this specialism.", + UserWarning, + stacklevel=3, ) continue for method_name in required: @@ -651,13 +832,16 @@ public surface — minor bump). | `adcp/decisioning/serve.py` | ~150 | Wrapper around `adcp.server.serve`. Builds handler + middleware + context_factory (returns `RequestContext`, NOT `ToolContext`) + executor. `create_adcp_server_from_platform` seam returns `(handler, middleware, context_factory)` 3-tuple. | | `adcp/decisioning/specialisms/sales.py` | (existing, +10) | Add `TOOLS: set[str]` constant. | | `adcp/decisioning/platform.py` | (existing, +25) | Add `__init_subclass__` validator (D11) + `BaseModel` MRO-conflict docstring note. | -| `examples/hello_seller.py` | ~50 | Runnable single-file example exercising the full vertical slice (D13). | -| `tests/test_decisioning_dispatch.py` | ~450 | Middleware-mutation correctness; D9 `caller_identity = account.id` + `metadata["adcp_decisioning.auth_principal"]` retains raw principal; AdcpError catch + wire projection (including from sync executor branch); TaskHandoff projection (sync + async paths); sync handoff body sees ContextVar set in request scope (D6 sync-context propagation regression); validate_platform fail-fast; tolerant unknown-specialism path; `_invoke_platform_method` contract (D14). | -| `tests/test_decisioning_task_registry.py` | ~100 | `TaskRegistry` Protocol shape; `InMemoryTaskRegistry` issue/update/complete/fail; account-scoped `get` returns None on cross-tenant probe; concurrent issue (no task_id collision). | +| `examples/hello_seller.py` | ~50 | Sync flow vertical slice (D13). | +| `examples/hello_seller_async_handoff.py` | ~80 | Hybrid flow vertical slice — TaskHandoff projection + Submitted envelope round-trip + AdcpError path (D13). | +| `tests/test_decisioning_dispatch.py` | ~500 | Middleware-mutation correctness; D9 composite `caller_identity = f"{store_qualname}:{account.id}"` (cross-store leak regression); D9 `auth_principal` typed attribute population; AdcpError catch + wire projection (including from sync executor branch); TaskHandoff projection (sync + async paths); sync handoff body sees ContextVar set in request scope (D6 sync-context propagation regression); validate_platform fail-fast; D14 unknown-specialism `UserWarning` (typo regression); `_invoke_platform_method` contract (D14); arg-projection kwargs path (D1 — verifies `update_media_buy` shim refactor-safety). | +| `tests/test_decisioning_task_registry.py` | ~100 | `TaskRegistry` Protocol shape; `InMemoryTaskRegistry` issue/update/complete/fail; concurrent issue (no task_id collision). | +| `tests/test_decisioning_task_registry_cross_tenant.py` | ~80 | **Hostile-probe regression (round-3 finding):** account A creates a task; account B with different `account_id` probes for it via `get(task_id=A's_id, account_id=B)`; expect None, NOT raw_record. Adopter regressing to `if not found: return raw_record` would surface in production without this test. Plus: `complete()` then cross-tenant `get` still returns None; `fail()` then cross-tenant `get` still returns None. | | `tests/test_decisioning_platform_validation.py` | ~50 | D11: platform without `capabilities` fails at class definition; platform without `accounts` fails at class definition; valid platform passes. | | `tests/test_decisioning_capabilities_synthesis.py` | ~80 | D12 unit test: synthesized `get_adcp_capabilities` response matches `platform.capabilities` field-for-field. Cheaper than driving via integration test. | -| `tests/test_decisioning_handler_codegen.py` | ~80 | Regen-drift: regen `handler.py` into tempdir, `git diff --exit-code`. Mirrors `tests/test_mcp_schema_drift.py` pattern. Codegen-time fail-fast on missing Pydantic Request type. | -| `tests/test_hello_seller_integration.py` | ~150 | End-to-end: boot example via ASGI, MCP `tools/call` hits sync `get_products`, response round-trips. Async handoff path: `create_media_buy` returns `TaskHandoff`, Submitted envelope serializes correctly, registry has the task. AdcpError path: hostile budget rejected with structured-error envelope. | +| `tests/test_decisioning_handler_codegen.py` | ~80 | Regen-drift: regen `handler.py` into tempdir, `git diff --exit-code`. Mirrors `tests/test_mcp_schema_drift.py` pattern. **Drift error message asserts the prescriptive form** (round-3 finding) — names `uv run python scripts/generate_decisioning_handler.py` verbatim. Codegen-time fail-fast on missing Pydantic Request type. | +| `tests/test_hello_seller_integration.py` | ~150 | End-to-end sync: boot example via ASGI, MCP `tools/call` hits sync `get_products` + sync `create_media_buy`, response round-trips. AdcpError path: hostile budget rejected with structured-error envelope. | +| `tests/test_hello_seller_async_handoff_integration.py` | ~180 | End-to-end hybrid: boot the handoff example, MCP `tools/call` to `create_media_buy` returns `TaskHandoff`, Submitted envelope serializes correctly, `tasks/get` returns Submitted → Working → Completed lifecycle, registry has the terminal artifact. | **Foundation PR total:** ~2100 lines (~250 generated, ~700 tests). After prep PR + this lands: ~3500 lines on top of 1500-line foundation @@ -753,3 +937,83 @@ code-reviewer) revised or strengthened from the round-1 design: unknown specialisms (forward-compat with v6.1+ specs). * **File plan split** into prep PR + foundation PR. Total grew from ~1900 to ~2275 lines (extra tests for round-2-surfaced cases). + +## Round-3 review changelog + +User feedback on the published design doc (PR #316). Eight items in +priority order; all resolved by tightening D1 / D5 / D9 / D13 / D14 +and adding cross-tenant + arg-projection regression tests. + +* **D9 (Item 1) — Account.id uniqueness elevated to a framework-enforced + security boundary.** Round-2 left global uniqueness as adopter + responsibility; one buggy `AccountStore` would silently leak + idempotency-cache entries across stores. Cache scope key composed as + `f"{account_store.__class__.__qualname__}:{account.id}"` so two stores + collision-prone on `id` alone (e.g. `SingletonAccounts(account_id="x")` + vs. `ExplicitAccounts` returning `Account(id="x")`) get structural + isolation. The framework enforces; adopters can't downgrade. +* **D9 (Item 6) — `RequestContext.auth_principal` typed attribute.** + `caller_identity = account.id` is correct *semantically* but the + middleware-facing field name now misleads (it's the cache scope key, + not the auth principal). Added typed `auth_principal: str | None` on + `RequestContext` (sourced from `AuthInfo.principal` when present) so + middleware reading "who authenticated this request" has a + load-bearing field name. +* **D14 (Item 3) — Unknown specialisms now `UserWarning`, not DEBUG.** + Round-2 made `REQUIRED_METHODS_PER_SPECIALISM.get(s, set())` tolerant + for forward-compat. But typos like `sales-non-guarateed` (missing 'n') + silently pass tolerance and reach buyers as a no-method platform. + `UserWarning` at boot catches typos in CI without breaking + v6.1+ forward compat (warnings are non-fatal and logged once per + specialism per process). +* **D1 (Item 4) — Codegen drift error is prescriptive.** + `tests/test_decisioning_codegen_drift.py` failure message names the + exact command (`uv run python scripts/generate_decisioning_handler.py`) + and links the rationale (`docs/proposals/decisioning-platform-dispatch-design.md#d1`). + CI failures should tell a contributor *what to type next*, not just + *what's wrong*. +* **D1 (Item 8) — Arg-projection emits explicit kwargs.** `**kwargs` + unpack would silently swallow Pydantic field renames. The generator + emits the kwargs by name (`platform.update_media_buy(media_buy_id=req.media_buy_id, patch=req, ctx=ctx)`) + so a future Pydantic field rename trips a `NameError` at codegen time + rather than a runtime KeyError post-deploy. +* **D5 (Item 5) — `ThreadPoolExecutor` configurability.** Three knobs + on `create_adcp_server_from_platform`: + + * `executor=` — bring-your-own (instrumentation, custom pool) + * `thread_pool_size=int` — convenience override + * default — `ThreadPoolExecutor(max_workers=min(32, os.cpu_count() * 4))` + with `thread_name_prefix="adcp-decisioning-"` + + `executor` and `thread_pool_size` are mutually exclusive (raises + `ValueError` at server construction). Lifecycle: framework-owned + pools shut down via the existing serve-loop teardown hook; BYO pools + are the adopter's responsibility (documented). +* **D13 (Item 7) — Two example files, not one.** Original plan had a + single `examples/hello_seller.py` covering the sync path. Added + `examples/hello_seller_async_handoff.py` exercising: + + * The hybrid `SalesResult[T]` return shape (sync fast path *or* + `ctx.handoff_to_task(fn)`) + * `AdcpError(code='BUDGET_TOO_LOW', recovery='correctable', + field='total_budget')` raise-and-catch round-trip through the + dispatcher + + Two examples make the hybrid pattern concrete; one example would + bury the harder case in commentary. +* **File plan additions for items 1, 2, 3, 6, 8:** + + * `tests/test_decisioning_task_registry_cross_tenant.py` — hostile + probe regression: account A creates task `t_xyz`, account B calls + `tasks_get(task_id="t_xyz")`, must get 404 not B's view of A's + task. (Item 2.) + * `tests/test_hello_seller_async_handoff_integration.py` — wire-shape + assertions for both hybrid arms + AdcpError envelope. (Item 7.) + * `tests/test_decisioning_dispatch.py` extended with: composite + `caller_identity` cache-scope-key construction (Item 1), + `auth_principal` attribute population from `AuthInfo` (Item 6), + UserWarning emission for unknown specialism (Item 3), arg-projection + explicit-kwargs path including Pydantic field-rename simulation + (Item 8). + + Foundation PR total grew from ~2275 to ~2475 lines. From 21a93e12cfcc699328741f1d02d1531946595329 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 05:40:43 -0400 Subject: [PATCH 04/18] docs(decisioning): apply round-4 cross-language feedback to dispatch design MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round-4 review pass synthesizes (a) the TS team's review of the parallel @adcp/client port (PR #1005, EmmaLouise2018), (b) the TS team's decisioning-platform-python-port-v2.md RFC, and (c) Yahoo's ask for typed framework-owned state threading on RequestContext. Guiding principle ported from the TS port: "make it impossible for an implementer to screw up via typing." Python can't match TS's compile-time RequiredPlatformsFor gate, but per-method typed surfaces, runtime validate_platform fail-fast, and Protocol structural matching close most of the gap. Highlights: - D15 NEW: typed RequestContext sub-readers (state + resolve). - StateReader (sync) — find_by_object, find_proposal_by_id, governance_context, workflow_steps. Lets platforms read prior workflow context without re-querying their own DB. - ResourceResolver (async) — property_list, collection_list, creative_format. Framework-mediated cache + validation. - Surface ships in v6.0 with no-op stub backings; impls fill in for v6.1 (same gating as TS side). Locks the typed contract so adopters write the right shape from day one. - Round-4 changelog covers 8 cross-language items applied: - D14 enum coverage (Emma #6) - D7+serve() prod gate on InMemoryTaskRegistry (Emma #8) - Dispatch AdcpError projection consistency (Emma #10) - D6 sync-handoff register-before-cleanup race (Emma #11) - validate_platform catches validator throws (Emma #16) - Per-server status-change bus, not module-level singleton (Emma #17) - AdcpError ACCOUNT_NOT_FOUND semantic narrowing (Emma #18) - CI lint: examples can't reach into src/ (Emma #5) - Bugs structurally avoided in our hybrid SalesResult[T] design documented (Emma #2, #3, #13, design concern #14) — worth calling out in foundation PR description; the framework-design choice gets the credit. - File plan additions: state.py, resolve.py, context.py extensions for D15; four new test files for Round-4 regressions. Foundation PR total grew from ~2475 to ~2965 lines. - Items deferred to follow-up PRs: ErrorCode Literal codegen (Emma #19), workflow-step/proposal/governance backing store (D15 v6.1), tasks/get wire surface. - TS-only items (no Python equivalent) explicitly enumerated. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../decisioning-platform-dispatch-design.md | 323 +++++++++++++++++- 1 file changed, 319 insertions(+), 4 deletions(-) diff --git a/docs/proposals/decisioning-platform-dispatch-design.md b/docs/proposals/decisioning-platform-dispatch-design.md index 5d30e5339..d7e49e46a 100644 --- a/docs/proposals/decisioning-platform-dispatch-design.md +++ b/docs/proposals/decisioning-platform-dispatch-design.md @@ -1,7 +1,7 @@ # DecisioningPlatform dispatch design (post-review) Pre-implementation reference for the `adcp.decisioning.{handler, dispatch, -serve, task_registry}` modules. Synthesizes 7 reviewer passes: +serve, task_registry}` modules. Synthesizes 8 reviewer passes: * **Round 1** (initial design): agentic-product-architect, python-expert * **Round 2** (post-codegen-and-framing additions): agentic-product-architect @@ -11,8 +11,14 @@ serve, task_registry}` modules. Synthesizes 7 reviewer passes: Account.id leak boundary, cross-tenant probe regression, validation noise, codegen DX, executor configurability, field-name semantics, example coverage, kwarg unpacking +* **Round 4** (cross-language: TS team review of the parallel `@adcp/client` + port + the TS team's "Python port v2" RFC + Yahoo's ask for typed + metadata threading): RequestContext typed sub-readers (`state` / + `resolve`), validate_platform tightening, `AdcpError` projection + consistency, ErrorCode codegen, in-memory task gate, per-server + status-change bus, examples-import lint -Authoritative through D14. Tracks "things deferred" for v6.1 and beyond. +Authoritative through D15. Tracks "things deferred" for v6.1 and beyond. ## Decisions @@ -803,6 +809,146 @@ spec evolution. The buyer pays for that with a `tools/list` that doesn't include the new specialism's tools, which is the right fail-soft behavior. +### D15. `RequestContext` typed sub-readers — `state` and `resolve` + +**Decision:** widen `RequestContext[TMeta]` from +`{account, auth_info, now, handoff_to_task}` to add two typed +framework-owned sub-readers: + +```python +@dataclass +class RequestContext(ToolContext, Generic[TMeta]): + account: Account[TMeta] + auth_info: AuthInfo | None = None + auth_principal: str | None = None # round-3 D9 + now: datetime = field(default_factory=...) + + state: StateReader = field(default_factory=...) # NEW (Round 4) + resolve: ResourceResolver = field(default_factory=...) # NEW (Round 4) + + def handoff_to_task(self, fn) -> TaskHandoff[T]: ... +``` + +`StateReader` exposes sync reads on framework-owned in-flight workflow +state (no DB hit on the platform side): + +```python +class StateReader(Protocol): + def find_by_object( + self, + type: WorkflowObjectType, # 'media_buy' | 'creative' | 'product' | 'plan' | 'audience' | 'rights_grant' | 'task' + id: str, + ) -> Sequence[WorkflowStep]: + """Chronological steps that touched this object on this account.""" + ... + + def find_proposal_by_id(self, proposal_id: str) -> Proposal | None: + """Resolve a proposal_id threaded across get_products → refine + → create_media_buy without platform code.""" + ... + + def governance_context(self) -> GovernanceContextJWS | None: + """Currently in-flight verified governance context (JWS string) + or None for non-governance flows. Framework verifies signature, + plan-binding, seller-binding, phase-binding before exposure; + platform code can trust the value.""" + ... + + def workflow_steps(self) -> Sequence[WorkflowStep]: + """All chronological steps for this request's account. + Audit-read shape.""" + ... +``` + +`ResourceResolver` exposes async framework-mediated fetches with cache ++ validation built-in: + +```python +class ResourceResolver(Protocol): + async def property_list(self, list_id: str) -> PropertyList: + """Validates the id against the seller's declared lists before + returning.""" + ... + + async def collection_list(self, list_id: str) -> CollectionList: ... + + async def creative_format( + self, + format_id: FormatReferenceStructuredObject, + ) -> Format: + """Routes through ``capabilities.creative_agents`` declaration + with a 1h cache; self-hosted formats hit the local + CreativePlatform.list_formats(). Returns the resolved Format + with full asset slot definitions.""" + ... +``` + +**Why this matters (Yahoo's ask):** without these readers, every +platform method that needs prior workflow context (e.g., +`update_media_buy` checking what creative state the media buy is in, +`refine_products` reading proposal context, `get_media_buy_delivery` +reading governance bindings) has to re-query the platform's own DB, +duplicating state the framework already owns and re-validating +references the framework already validated. The TS-side approach +gives platforms typed read-only views and Yahoo specifically asked +for parity in the Python SDK. + +**Why typed sub-readers, not flat `ctx.workflow_steps()` / +`ctx.property_list(...)` methods:** the namespacing is +load-bearing for adopter mental model. `state.*` = sync, "what does +the framework know"; `resolve.*` = async, "fetch + validate". Coding +agents pattern-match the namespace. Flattening loses that. + +**Why `Protocol`-typed sub-readers, not concrete classes:** lets +adopters substitute test doubles in unit tests via dataclass replacement +(`replace(ctx, state=fake_state_reader)`). Concrete classes would +force monkey-patching. + +**v6.0 ship scope:** ship the `Protocol`-typed surface in the +foundation PR with default impls that return empty / raise +`NotImplementedError("landing in v6.1")`. Do NOT block foundation on +the workflow-step backing store (that's a v6.1 concern — same gating +as the TS side, where the round-2 review explicitly says "landing in +rc.1"). The Protocol shape locks the contract; impls fill in. + +**Field ordering in `RequestContext`:** `state` and `resolve` come +AFTER `account` / `auth_info` / `now` (existing fields) so existing +test fixtures and downstream code that constructs `RequestContext` +positionally don't break. New fields use `field(default_factory=...)` +defaults pointing at no-op stub implementations: + +```python +class _NotYetWiredStateReader: + def find_by_object(self, type, id): + return () + def find_proposal_by_id(self, proposal_id): + return None + def governance_context(self): + return None + def workflow_steps(self): + return () + +class _NotYetWiredResolver: + async def property_list(self, list_id): + raise NotImplementedError( + "ResourceResolver.property_list landing in v6.1 — " + "see docs/proposals/decisioning-platform-dispatch-design.md#d15" + ) + # ... etc +``` + +The stubs let foundation-PR examples and tests construct +`RequestContext()` without wiring a backend; production deployments +get the v6.1 backing store when it lands. + +**Rationale for shipping the surface now even with stub backings:** +adopters write platform method bodies that read `ctx.state.*` and +`ctx.resolve.*`. If the surface lands in v6.1 instead of v6.0, +every adopter's method bodies need to be rewritten to thread state +through `ctx.account.metadata` (or worse, through their own +re-implementation of the workflow store). Locking the typed surface +in v6.0 lets adopters write the right shape from day one. + ## File plan **Two PRs**, splitting the framework-shared code from the @@ -830,6 +976,9 @@ public surface — minor bump). | `adcp/decisioning/dispatch.py` | ~350 | `decisioning_dispatch_middleware`, `_invoke_platform_method`, `validate_platform` (with tolerant `REQUIRED_METHODS_PER_SPECIALISM.get`), executor lifecycle (allocate in `serve()`, shutdown via existing framework hook), `_project_handoff` (sync needs explicit `copy_context`; async gets it free from `create_task`). | | `adcp/decisioning/task_registry.py` | ~150 | `TaskRegistry` Protocol with pinned shape contracts (D7) + `InMemoryTaskRegistry` stub + `TaskHandoffContext` (consumed by handoff fns; carries `id` + `update(progress)` + `heartbeat()` stub). | | `adcp/decisioning/serve.py` | ~150 | Wrapper around `adcp.server.serve`. Builds handler + middleware + context_factory (returns `RequestContext`, NOT `ToolContext`) + executor. `create_adcp_server_from_platform` seam returns `(handler, middleware, context_factory)` 3-tuple. | +| `adcp/decisioning/state.py` | ~80 | **D15** — `StateReader` Protocol + `_NotYetWiredStateReader` no-op default + `WorkflowStep` / `WorkflowObjectType` / `Proposal` / `GovernanceContextJWS` types. | +| `adcp/decisioning/resolve.py` | ~80 | **D15** — `ResourceResolver` Protocol + `_NotYetWiredResolver` raise-with-pointer default + `PropertyList` / `CollectionList` / `Format` typed return types (re-exported from `adcp.types`). | +| `adcp/decisioning/context.py` | (existing, +30) | **D15** — add `state: StateReader` and `resolve: ResourceResolver` fields with stub defaults. Round-3: `auth_principal: str \| None` typed attribute. | | `adcp/decisioning/specialisms/sales.py` | (existing, +10) | Add `TOOLS: set[str]` constant. | | `adcp/decisioning/platform.py` | (existing, +25) | Add `__init_subclass__` validator (D11) + `BaseModel` MRO-conflict docstring note. | | `examples/hello_seller.py` | ~50 | Sync flow vertical slice (D13). | @@ -842,9 +991,13 @@ public surface — minor bump). | `tests/test_decisioning_handler_codegen.py` | ~80 | Regen-drift: regen `handler.py` into tempdir, `git diff --exit-code`. Mirrors `tests/test_mcp_schema_drift.py` pattern. **Drift error message asserts the prescriptive form** (round-3 finding) — names `uv run python scripts/generate_decisioning_handler.py` verbatim. Codegen-time fail-fast on missing Pydantic Request type. | | `tests/test_hello_seller_integration.py` | ~150 | End-to-end sync: boot example via ASGI, MCP `tools/call` hits sync `get_products` + sync `create_media_buy`, response round-trips. AdcpError path: hostile budget rejected with structured-error envelope. | | `tests/test_hello_seller_async_handoff_integration.py` | ~180 | End-to-end hybrid: boot the handoff example, MCP `tools/call` to `create_media_buy` returns `TaskHandoff`, Submitted envelope serializes correctly, `tasks/get` returns Submitted → Working → Completed lifecycle, registry has the terminal artifact. | +| `tests/test_decisioning_context_state_resolve.py` | ~120 | **D15** — `StateReader` Protocol structural match (custom impl satisfies); `ResourceResolver` Protocol structural match; default `_NotYetWiredStateReader` returns empty sequences (NOT raise — adopters reading optimistic state shouldn't crash); default `_NotYetWiredResolver.property_list()` raises `NotImplementedError` with the design-doc anchor in the message; substituting test doubles via `dataclasses.replace(ctx, state=fake)` works (round-trip regression). | +| `tests/test_decisioning_validate_platform_strict.py` | ~120 | **Round-4 (Emma #6 + #16):** specialism enum-coverage check (declaring a known specialism that has no `REQUIRED_METHODS_PER_SPECIALISM` entry must NOT silently pass — must fail server boot pointing at the spec drift); validator throws are caught and surface as `AdcpError("INVALID_REQUEST", ...)` rather than crashing the server boot. | +| `tests/test_decisioning_in_memory_registry_prod_gate.py` | ~80 | **Round-4 (Emma #8):** `serve()` + `InMemoryTaskRegistry` + `production` env raises `AdcpError` unless `ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1` set. Sales-broadcast-tv adopter forced into HITL path is the regression case. | +| `tests/test_decisioning_status_change_isolation.py` | ~80 | **Round-4 (Emma #17):** two `serve()` instances in the same process route their own `publish_status_change` events to per-instance subscribers, NOT a module-level singleton. Concurrent test files don't clobber each other's bus. | -**Foundation PR total:** ~2100 lines (~250 generated, ~700 tests). -After prep PR + this lands: ~3500 lines on top of 1500-line foundation +**Foundation PR total:** ~2475 lines (~250 generated, ~1100 tests). +After prep PR + this lands: ~3850 lines on top of 1500-line foundation skeleton already committed. ## Things deferred (track separately) @@ -1017,3 +1170,165 @@ and adding cross-tenant + arg-projection regression tests. (Item 8). Foundation PR total grew from ~2275 to ~2475 lines. + +## Round-4 review changelog + +Cross-language review pass — synthesizes (a) the TS team's review of +the parallel TypeScript port (`adcontextprotocol/adcp-client` PR #1005, +EmmaLouise2018 round-1), (b) the TS team's `decisioning-platform-python-port-v2.md` +RFC for what the Python SDK should ship, and (c) Yahoo's specific ask +for typed metadata + framework-owned state threading on +`RequestContext`. + +**Guiding principle the TS port adopted, ported here:** "make it +impossible for an implementer to screw up via typing." Python can't +match TS's compile-time `RequiredPlatformsFor` gate, but per-method +typed surfaces, runtime `validate_platform` boot-time checks, typed +`RequestContext` sub-readers, and `Protocol` structural matching close +most of the gap. Where TS got compile-time enforcement we get +boot-time fail-fast; where TS got "buyer-supplied data can't reach +this type" we get the same property via dispatch type-identity. + +### What's structurally avoided in our Python design + +The TS team's round-1 review surfaced bugs that are **structurally +unrepresentable in our hybrid `SalesResult[T]` design**: + +* **Emma #2 — `validatePlatform` allows "neither defined" path + → runtime crash.** Python uses one method per tool returning + `SalesResult[T]`, not dual `create_media_buy` + `create_media_buy_task`. + No "both defined" or "neither defined" failure modes exist. +* **Emma #3 — Missing `*Task` arms for 4 of 6 Submitted-bearing + tools.** Same reason — every mutating tool is hybrid via + `SalesResult[T]`. Python's structural confirmation: schemas show + Submitted arms on `update_media_buy`, `get_products`, `build_creative`, + `sync_catalogs` (in addition to `create_media_buy` and `sync_creatives`). +* **Emma #13 — Compile-time XOR for dual-method via TS discriminated + unions.** N/A — single method per tool. +* **Emma's design concern #14 — "Always declare HITL, resolve + immediately" anti-pattern that taxes every sync buyer with `tasks_get` + polling.** Python's `TaskHandoff[T]` is exactly the pattern Emma + asked for (`throw RequiresReviewError` from sync, framework converts + to `submitted` envelope). Worth calling out in the foundation PR + description so the framework-design choice gets the credit. + +### Items applied to the Python design + +* **D14 (Emma #6) — specialism enum coverage check.** Round-3 caught + *unknown* specialisms with `UserWarning`. Round-4 catches the inverse: + declaring a *known* specialism (in the wire enum) that has no + `REQUIRED_METHODS_PER_SPECIALISM` entry must NOT silently pass — must + fail server boot pointing at the spec drift. Test: + `test_decisioning_validate_platform_strict.py`. +* **D7 + serve() (Emma #8) — production gate on `InMemoryTaskRegistry`.** + `serve()` refuses to start when wired with `InMemoryTaskRegistry` and + `ADCP_ENV=production` (or equivalent) unless + `ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1` opt-in. Sales-broadcast-tv + adopters are *structurally forced* into the HITL path which depends on + the registry — silent in-memory fallback is a real prod foot-gun. + Test: `test_decisioning_in_memory_registry_prod_gate.py`. +* **Dispatch (Emma #10) — `AdcpError` projection consistency.** + Every code path that can raise `AdcpError` (specialism methods, + account resolver, validators, capability synthesis, + `list_accounts`-shape reads) goes through the same wire-projection + in dispatch. No path falls back to generic `SERVICE_UNAVAILABLE`. + Pinned in D14 `_invoke_platform_method` contract; verified via + `test_decisioning_dispatch.py` extension (every code path covered). +* **D6 (Emma #11) — sync-handoff register-before-cleanup race.** + TS-side bug: `taskFn` resolving synchronously runs `composed.then` + cleanup before `_registerBackground` registers, leaking the entry. + Python equivalent in our `loop.run_in_executor` + `copy_context()` path: + if the handoff fn resolves before `task_registry.register()` writes + the entry, the cleanup hook may delete a non-existent record. Add + regression test in `test_decisioning_task_registry_cross_tenant.py` + asserting register-before-resolve ordering even for synchronously + completing handoff fns. +* **`validate_platform` (Emma #16) — catch validator throws.** + Wrap each per-specialism validator in try/except; on raise, surface + as `AdcpError("INVALID_REQUEST", ...)` rather than crashing server + boot or leaving the platform marked stuck-unverified. Test: + same file as #6 above. +* **Dispatch (Emma #17) — per-server status-change bus, not + module-level singleton.** Module-level `publishStatusChange` is hostile + to multi-tenant test isolation (concurrent `serve()` instances clobber + each other's bus). Use a per-server bus on the wrapper returned by + `create_adcp_server_from_platform`; `publish_status_change` is bound + via the per-server `RequestContext` (or via explicit `server.bus` + reference passed to background workers). Test: + `test_decisioning_status_change_isolation.py`. +* **`AdcpError` (Emma #18) — `ACCOUNT_NOT_FOUND` semantics.** + Document that `ACCOUNT_NOT_FOUND` is reserved for the resolver path + (`AccountStore.resolve` → `AdcpError(code='ACCOUNT_NOT_FOUND')`). + Specialism methods raising `ACCOUNT_NOT_FOUND` get re-mapped to + `INVALID_REQUEST` with a `field='account_id'` hint, so adopter misuse + doesn't pollute the error code's meaning to buyers. Update + `AdcpError` docstring + add a dispatch test. +* **`AdcpError` (Emma #19) — codegen `ErrorCode` literal.** + Currently `AdcpError(code: str)` is free-form. Generate an `ErrorCode` + Literal type from `schemas/cache/3.0.0/enums/error-code.json` so + `AdcpError(code='BUDGET_TOO_LO')` (typo) trips mypy at adopter + edit-time. Vendor codes outside the enum stay accepted via + `ErrorCode | str` union. Tracked as deferred (codegen task on the + drift-script PR after foundation). +* **CI lint (Emma #5) — examples can't reach into `src/`.** + `examples/hello_seller.py` MUST import from `adcp.decisioning`, not + `src/adcp/decisioning`. Add a lint to CI: any `from adcp.` import in + `examples/` rejecting `from src.adcp.` paths. Avoids the TS-side + three-source-of-truth bug. + +### D15 added — typed `RequestContext` sub-readers (Yahoo's ask) + +The TS team's `decisioning-platform-python-port-v2.md` RFC + Yahoo's +explicit request: widen `RequestContext[TMeta]` to include framework- +owned typed sub-readers `state` (sync workflow-state reads) and +`resolve` (async framework-mediated fetches). Without this, every +platform method that needs prior workflow context has to re-query its +own DB, duplicating state the framework already owns and re-validating +references the framework already validated. **Surface ships in v6.0 +with no-op stub backings; impls fill in for v6.1**, so adopters can +write the right shape from day one without rewriting later. See D15 +above for the full Protocol definitions and rationale. + +### File plan additions + +* `adcp/decisioning/state.py` (~80 lines) — `StateReader` Protocol + + stub +* `adcp/decisioning/resolve.py` (~80 lines) — `ResourceResolver` + Protocol + stub +* `adcp/decisioning/context.py` (+30 lines) — wire `state` + `resolve` + fields with stub defaults (D15) +* `tests/test_decisioning_context_state_resolve.py` (~120 lines) — + D15 Protocol structural match + test-double substitution regression +* `tests/test_decisioning_validate_platform_strict.py` (~120 lines) — + Emma #6 enum coverage + Emma #16 validator-throws fail-soft +* `tests/test_decisioning_in_memory_registry_prod_gate.py` (~80 lines) — + Emma #8 prod-gate regression +* `tests/test_decisioning_status_change_isolation.py` (~80 lines) — + Emma #17 per-server bus regression +* CI: examples-import lint rule (Emma #5) — added to ruff config + (`tool.ruff.lint.flake8-tidy-imports` ban-relative-imports for + `examples/**`) + +Foundation PR total grew from ~2475 to ~2965 lines (D15 + Round-4 +tests + Emma items). + +### Items deferred to follow-up PRs (not foundation-blocking) + +* **`ErrorCode` Literal codegen** (Emma #19) — separate codegen-script + PR after foundation. Tracking issue. +* **Workflow-step / proposal / governance backing store** for `state` + reader (D15 v6.1 backing impls). Foundation ships the no-op stub. +* **`tasks/get` wire surface** for adopter HITL polling — the framework + has the registry from foundation, but the wire endpoint that buyers + hit lands with `task_registry` follow-up PR. + +### TS-only items, no Python equivalent + +* Emma #1 (JWKS material comparison) — Python uses `cryptography` + full-key import; the bug is structurally unrepresentable. +* Emma #12 (`

>` cast widening) + — Python `TypeVar` with `default=` preserves narrowing through + `Protocol` parameterization. +* Emma #15 (`resolveByHost` O(N) parsing) — Python doesn't have that + surface yet. +* Emma #20 (`typesVersions` missing) — npm-only. From 86a147c7969ae15cc6eeacb1f413660e6cad828b Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 06:10:50 -0400 Subject: [PATCH 05/18] docs(decisioning): pin framework-only RequestContext construction Round-4 follow-up: D15 documents that adopter code receives a RequestContext from the dispatch hydration helper on every request, never constructs one directly. Mirrors the TS port's to-context.ts:buildRequestContext contract. - D15 + RequestContext docstring add the @internal-construction note: direct construction is for tests only; adopters needing to modify context use dataclasses.replace. - Hydration helper _build_request_context in dispatch.py is the one production path; _NotYetWiredStateReader / _NotYetWiredResolver defaults exist solely so test fixtures and examples can construct a RequestContext without the framework. - Silent divergence between framework path and ad-hoc adopter construction is exactly the failure mode the typing-driven safety principle is supposed to prevent (no auth_principal plumbing, no v6.1 backing store hand-off). 19 decisioning unit tests still pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../decisioning-platform-dispatch-design.md | 33 +++++++++++++++++++ src/adcp/decisioning/context.py | 10 ++++++ 2 files changed, 43 insertions(+) diff --git a/docs/proposals/decisioning-platform-dispatch-design.md b/docs/proposals/decisioning-platform-dispatch-design.md index d7e49e46a..9d4d72e19 100644 --- a/docs/proposals/decisioning-platform-dispatch-design.md +++ b/docs/proposals/decisioning-platform-dispatch-design.md @@ -949,6 +949,39 @@ through `ctx.account.metadata` (or worse, through their own re-implementation of the workflow store). Locking the typed surface in v6.0 lets adopters write the right shape from day one. +**Framework-only construction (parity with TS `to-context.ts`).** +The `RequestContext` is supplied by the framework, never by the +adopter. The TS port pins this in `to-context.ts`'s file docstring +("Adopters should never construct a `RequestContext` themselves; the +framework supplies one to every specialism method call."). Mirror in +Python: + +* `RequestContext.__init__` is left as the dataclass-generated default + (necessary for `dataclasses.replace(ctx, ...)` in tests), but the + class docstring carries an `@internal-construction` note: "Adopter + code receives a `RequestContext` from the framework on every dispatch. + Direct construction is supported for tests only — production code that + builds one from outside the dispatch seam is a bug." +* The dispatch seam's hydration helper — + `_build_request_context(tool_ctx, account)` in `dispatch.py` — is the + ONE production path. Adopter wrappers / middleware that need to + modify the context use `dataclasses.replace(ctx, ...)`, not raw + construction. Documented on the helper's docstring with a worked + example for the `state` / `resolve` test-double substitution case. +* The `_NotYetWiredStateReader` and `_NotYetWiredResolver` defaults + exist *only* so test fixtures and `examples/hello_seller.py` can + construct a `RequestContext()` without the framework. Production + dispatch always supplies real (or real-stub-but-framework-instantiated) + readers via the hydration helper. This matches the TS shape where + the stub resolvers/readers live inside `buildRequestContext`, not + on adopter-construction paths. + +This pin matters because adopters who construct their own `RequestContext` +get neither the framework's `auth_principal` plumbing (D9) nor the +hydration helper's future v6.1 backing store. Silent divergence between +the framework path and ad-hoc adopter path is exactly the failure mode +the typing-driven safety principle is supposed to prevent. + ## File plan **Two PRs**, splitting the framework-shared code from the diff --git a/src/adcp/decisioning/context.py b/src/adcp/decisioning/context.py index 70a6f935c..c66c937ea 100644 --- a/src/adcp/decisioning/context.py +++ b/src/adcp/decisioning/context.py @@ -74,6 +74,16 @@ class RequestContext(ToolContext, Generic[TMeta]): A2A executor) consume it as a ``ToolContext`` while adopter Protocol methods read the typed :attr:`account` directly. + **Framework-only construction.** Adopter code receives a + ``RequestContext`` from the framework on every dispatch via the + hydration helper in ``adcp.decisioning.dispatch``. Direct + construction is supported for tests only — production code that + builds a ``RequestContext`` from outside the dispatch seam is a + bug. Adopters who need to modify the context (custom middleware, + test doubles for ``state`` / ``resolve``) should use + :func:`dataclasses.replace`, not raw construction. Mirrors the + TS-side ``to-context.ts:buildRequestContext`` contract. + :param account: The resolved account, with typed ``metadata: TMeta``. The framework's idempotency middleware reads ``ctx.caller_identity`` for cache scoping; the dispatch adapter From 53f24de1cc6b187b4590ab99a29a765063c4e9b5 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 06:16:17 -0400 Subject: [PATCH 06/18] docs(decisioning): tighten D15 stub posture, governance gate, types Round-4 review on D15 surfaced five concerns; all addressed in-place on D15 plus a tightenings subsection in the round-4 changelog. - **Stub asymmetry resolved.** Both StateReader and ResourceResolver stubs emit a one-time UserWarning per method on first call. state.* still returns type-correct empty values (empty workflow-steps IS legitimate for fresh tenants); resolve.* still raises (an empty PropertyList is divergence the framework cannot silently paper over). Asymmetry now justified per-reader. - **governance_context() fail-fast.** Added capabilities.governance_aware: bool = False. validate_platform raises AdcpError at server boot if any governance-* specialism is claimed without a real StateReader wired AND no opt-in. Framework refuses to ship silent governance-gate skipping. Defaults False; non-governance flows untouched. - **Type-stability table added.** Lock all D15-referenced types in v6.0, not just the Protocols. Account, AuthInfo, Proposal, PropertyList, CollectionList, Format, FormatReferenceStructuredObject already in adcp.types.generated_poc; WorkflowStep, WorkflowObjectType, GovernanceContextJWS framework-internal in adcp.decisioning.state, shipped foundation-stable. - **creative_format(revalidate: bool = False).** Pinned in the Protocol contract so adopters with freshness needs aren't stuck on the impl's cache TTL. Cache TTL becomes impl detail; revalidate=True is the opt-out at the Protocol level. - **ADCP_ENV reuse.** Replaces free-form ADCP_ENV=production reference with the existing SDK helper at src/adcp/validation/client_hooks.py:68 (case-insensitive ADCP_ENV in {"prod", "production"}). One prod-detection mechanism. Test additions in test_decisioning_context_state_resolve.py (~150 lines): one-time UserWarning regression, governance opt-in fail-fast, revalidate parameter contract. Foundation PR total grew from ~2475 to ~2510 lines. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../decisioning-platform-dispatch-design.md | 287 ++++++++++++++++-- 1 file changed, 258 insertions(+), 29 deletions(-) diff --git a/docs/proposals/decisioning-platform-dispatch-design.md b/docs/proposals/decisioning-platform-dispatch-design.md index 9d4d72e19..24902fa9f 100644 --- a/docs/proposals/decisioning-platform-dispatch-design.md +++ b/docs/proposals/decisioning-platform-dispatch-design.md @@ -875,11 +875,25 @@ class ResourceResolver(Protocol): async def creative_format( self, format_id: FormatReferenceStructuredObject, + *, + revalidate: bool = False, ) -> Format: """Routes through ``capabilities.creative_agents`` declaration - with a 1h cache; self-hosted formats hit the local - CreativePlatform.list_formats(). Returns the resolved Format - with full asset slot definitions.""" + with a framework-managed cache; self-hosted formats hit the + local CreativePlatform.list_formats(). Returns the resolved + Format with full asset slot definitions. + + :param revalidate: If True, bypasses the framework cache and + re-fetches from the upstream creative-agent. Adopters with + freshness needs (e.g., creative submission validating + against the most-recent format spec) pass ``revalidate=True``; + most reads should use the default (False) to amortize the + agent round-trip. + + Cache TTL is implementation detail (defaults to 1h on the + reference impl); adopters who need stricter freshness use + ``revalidate=True`` rather than depending on the TTL value. + """ ... ``` @@ -904,42 +918,201 @@ adopters substitute test doubles in unit tests via dataclass replacement (`replace(ctx, state=fake_state_reader)`). Concrete classes would force monkey-patching. -**v6.0 ship scope:** ship the `Protocol`-typed surface in the -foundation PR with default impls that return empty / raise -`NotImplementedError("landing in v6.1")`. Do NOT block foundation on -the workflow-step backing store (that's a v6.1 concern — same gating -as the TS side, where the round-2 review explicitly says "landing in -rc.1"). The Protocol shape locks the contract; impls fill in. +**v6.0 ship scope:** ship the `Protocol`-typed surface AND every type +it references in the foundation PR. Backing impls land in v6.1; the +typed *contract* (Protocol shape + every referenced type) is +foundation-stable. Do NOT block foundation on the workflow-step +backing store, BUT do NOT punt the type definitions to v6.1 either — +adopters write the right shape from day one only if every type is +locked. -**Field ordering in `RequestContext`:** `state` and `resolve` come -AFTER `account` / `auth_info` / `now` (existing fields) so existing -test fixtures and downstream code that constructs `RequestContext` -positionally don't break. New fields use `field(default_factory=...)` -defaults pointing at no-op stub implementations: +**Type-stability table (concern from round-4 review):** + +| Type | Source | v6.0 status | +|---|---|---| +| `Account[TMeta]` | `adcp.decisioning.types` | locked | +| `AuthInfo` | `adcp.decisioning.context` | locked | +| `WorkflowStep` | NEW in `adcp.decisioning.state` (framework-internal, not on the wire) | locked in foundation as a frozen `@dataclass` | +| `WorkflowObjectType` | NEW in `adcp.decisioning.state` (framework-internal `Literal`) | locked in foundation | +| `Proposal` | `adcp.types.generated_poc.core.proposal` (already exists from spec codegen) | locked (generated) | +| `GovernanceContextJWS` | NEW in `adcp.decisioning.state` (`NewType('GovernanceContextJWS', str)`) | locked in foundation | +| `PropertyList` | `adcp.types.generated_poc.core.property_list_ref` (re-export `PropertyListReference` + the resolved-list type) | locked (generated) | +| `CollectionList` | `adcp.types.generated_poc.collection.collection_list` (already exists) | locked (generated) | +| `Format` | `adcp.types.generated_poc.core.format` (already exists) | locked (generated) | +| `FormatReferenceStructuredObject` | `adcp.types.generated_poc.core.format_id` (already exists) | locked (generated) | + +The framework-internal types (`WorkflowStep`, `WorkflowObjectType`, +`GovernanceContextJWS`) ship as foundation-stable dataclasses / +literals so adopter code that pattern-matches on them doesn't refactor +when v6.1 lands. The wire-spec types are already in the generated +`adcp.types` package — just re-exported under `adcp.decisioning.state` +for one-stop import. + +**Stub posture (UserWarning on first call) — concern from round-4 review:** + +Two failure modes drove the design before round-4: +1. *Silent-empty* (TS-side `findByObject: () => []`) reads an empty + sequence in v6.0; adopter writes + `if not state.workflow_steps(): proceed_without_history`; v6.1 + wires the backing store and the platform's branch flips silently. +2. *Eager-raise* (TS-side `resolve.propertyList: throw ...`) crashes + the request the moment any platform method touches the resolver, + forcing adopters to defensively guard every read. + +The round-4 fix splits the difference: **both `state` and `resolve` +emit a one-time `UserWarning` on first call to a not-yet-wired stub +method**, then return the type-correct empty value (state) or raise +(resolve). The asymmetry between empty-return (state) and raise +(resolve) is justified: + +* `state.*` reads are read-only inspections of framework-owned + in-flight state. An empty workflow-steps list IS the correct answer + when no steps have been emitted yet (a fresh tenant has no history). + Raising here would force adopters to wrap every audit-read in + try/except, including paths that are valid in production. The + UserWarning catches the "I forgot to wire the backing store" + deployment bug; the empty return preserves the legitimate + "no-history-yet" semantics. +* `resolve.*` fetches are validated lookups. An empty PropertyList in + v6.0 vs. a real one in v6.1 is a divergence the framework cannot + silently paper over. Raising forces adopters to either (a) opt out + by not calling `resolve.*` on the v6.0 stub, or (b) wire a real + resolver themselves. + +Stub impls: ```python +import warnings + +_STATE_STUB_WARNED: set[str] = set() # one-time per method-name + class _NotYetWiredStateReader: + """v6.0 stub. Returns type-correct empty values; emits a + one-time UserWarning per method on first call so adopters notice + they're reading uninitialized state.""" + + def _warn_once(self, method_name: str) -> None: + if method_name in _STATE_STUB_WARNED: + return + _STATE_STUB_WARNED.add(method_name) + warnings.warn( + f"ctx.state.{method_name}() called against the v6.0 stub " + "StateReader; backing store lands in v6.1. Reading empty " + "results — adopter code branching on this state will see " + "different values once the backing store is wired. See " + "docs/proposals/decisioning-platform-dispatch-design.md#d15", + UserWarning, + stacklevel=3, + ) + def find_by_object(self, type, id): + self._warn_once("find_by_object") return () + def find_proposal_by_id(self, proposal_id): + self._warn_once("find_proposal_by_id") return None + def governance_context(self): + # See "governance opt-in" subsection below — this branch is + # only reachable when no specialism declares + # capabilities.governance_aware=True. Server boot fails fast + # otherwise. + self._warn_once("governance_context") return None + def workflow_steps(self): + self._warn_once("workflow_steps") return () + class _NotYetWiredResolver: + """v6.0 stub. Raises with a pointer to the wire-up follow-up so + adopters who reach for resolve.* know exactly which v6.1 task + unblocks them.""" + async def property_list(self, list_id): raise NotImplementedError( - "ResourceResolver.property_list landing in v6.1 — " - "see docs/proposals/decisioning-platform-dispatch-design.md#d15" + f"ResourceResolver.property_list({list_id!r}) called against " + "the v6.0 stub. Backing fetcher lands in v6.1 — see " + "docs/proposals/decisioning-platform-dispatch-design.md#d15. " + "Foundation-PR adopters should not invoke ctx.resolve.* yet." ) - # ... etc + + async def collection_list(self, list_id): + raise NotImplementedError(...) # same shape + + async def creative_format(self, format_id, *, revalidate=False): + raise NotImplementedError(...) # same shape ``` -The stubs let foundation-PR examples and tests construct -`RequestContext()` without wiring a backend; production deployments -get the v6.1 backing store when it lands. +The UserWarning emits via the same `warnings` filter chain as the +unknown-specialism warning (D14) — adopters running pytest with +`filterwarnings = error` get a hard-fail on accidental stub reads; +production deployments get one log line per method per process. + +**`governance_context()` security stub (concern from round-4 review):** + +Returning `None` from `governance_context()` in v6.0 is a load-bearing +security stub: governance-aware adopter code reads +`ctx.state.governance_context()` to gate plan-binding / spend-authority +checks, and a v6.0 `None` skips the gate. v6.1 wires the gate and the +adopter's gate-skipping branch evaluates against real plans. + +**Fix: opt-in capability declaration with server-boot fail-fast.** +Add `governance_aware: bool = False` to `DecisioningCapabilities`. At +server boot, `validate_platform` walks specialisms; if any specialism +that requires governance threading is claimed (`governance-spend-authority`, +`governance-delivery-monitor`) AND `capabilities.governance_aware` +is not explicitly True AND no real `StateReader` is wired, +`validate_platform` raises: + +```python +raise AdcpError( + "INVALID_REQUEST", + message=( + "Platform claims governance-* specialism(s) but the v6.0 " + "StateReader stub does not provide governance_context(). " + "Either: (a) set capabilities.governance_aware=False and drop " + "the governance-* specialism claim until v6.1, or (b) wire a " + "custom StateReader on serve(state_reader=...) that returns " + "real GovernanceContextJWS values, or (c) wait for the v6.1 " + "backing-store impl. Silent governance-gate skipping is a " + "security boundary; the framework refuses to ship that." + ), + recovery="terminal", + details={"specialisms": [...claimed governance specialisms...]}, +) +``` + +**Why the explicit opt-in:** the alternative (raise on every +`governance_context()` call) is correct but louder than necessary for +the 90% non-governance flow. The opt-in puts the decision at server +boot (one place, fail-fast) rather than at every dispatched method. +Non-governance adopters get the empty-return + UserWarning path +unchanged; governance-claiming adopters fail to ship until they wire +real governance threading. + +`capabilities.governance_aware` doc: +```python +@dataclass +class DecisioningCapabilities: + # ... existing fields ... + + governance_aware: bool = False + """Set True ONLY when the platform implements governance-* specialisms + AND has wired a custom StateReader that returns real + GovernanceContextJWS values. Setting this True with the v6.0 stub + StateReader is a fail-fast at server boot: silent governance-gate + skipping is a security regression the framework refuses to allow. + Defaults False — non-governance adopters never touch this flag.""" +``` + +**Field ordering in `RequestContext`:** `state` and `resolve` come +AFTER `account` / `auth_info` / `now` (existing fields) so existing +test fixtures and downstream code that constructs `RequestContext` +positionally don't break. New fields use `field(default_factory=...)` +defaults pointing at the stub impls above. **Rationale for shipping the surface now even with stub backings:** adopters write platform method bodies that read `ctx.state.*` and @@ -947,7 +1120,9 @@ adopters write platform method bodies that read `ctx.state.*` and every adopter's method bodies need to be rewritten to thread state through `ctx.account.metadata` (or worse, through their own re-implementation of the workflow store). Locking the typed surface -in v6.0 lets adopters write the right shape from day one. ++ all referenced types in v6.0 lets adopters write the right shape +from day one; the UserWarning + governance opt-in keep the silent- +divergence failure modes off the table. **Framework-only construction (parity with TS `to-context.ts`).** The `RequestContext` is supplied by the framework, never by the @@ -1024,13 +1199,13 @@ public surface — minor bump). | `tests/test_decisioning_handler_codegen.py` | ~80 | Regen-drift: regen `handler.py` into tempdir, `git diff --exit-code`. Mirrors `tests/test_mcp_schema_drift.py` pattern. **Drift error message asserts the prescriptive form** (round-3 finding) — names `uv run python scripts/generate_decisioning_handler.py` verbatim. Codegen-time fail-fast on missing Pydantic Request type. | | `tests/test_hello_seller_integration.py` | ~150 | End-to-end sync: boot example via ASGI, MCP `tools/call` hits sync `get_products` + sync `create_media_buy`, response round-trips. AdcpError path: hostile budget rejected with structured-error envelope. | | `tests/test_hello_seller_async_handoff_integration.py` | ~180 | End-to-end hybrid: boot the handoff example, MCP `tools/call` to `create_media_buy` returns `TaskHandoff`, Submitted envelope serializes correctly, `tasks/get` returns Submitted → Working → Completed lifecycle, registry has the terminal artifact. | -| `tests/test_decisioning_context_state_resolve.py` | ~120 | **D15** — `StateReader` Protocol structural match (custom impl satisfies); `ResourceResolver` Protocol structural match; default `_NotYetWiredStateReader` returns empty sequences (NOT raise — adopters reading optimistic state shouldn't crash); default `_NotYetWiredResolver.property_list()` raises `NotImplementedError` with the design-doc anchor in the message; substituting test doubles via `dataclasses.replace(ctx, state=fake)` works (round-trip regression). | +| `tests/test_decisioning_context_state_resolve.py` | ~150 | **D15** — `StateReader` / `ResourceResolver` Protocol structural match; default `_NotYetWiredStateReader` returns empty sequences AND emits one-time `UserWarning` per method on first call (warning suppressed on subsequent calls — module-level set); `_NotYetWiredResolver.*` raises `NotImplementedError` with the design-doc anchor; substituting test doubles via `dataclasses.replace(ctx, state=fake)` works; **governance opt-in fail-fast (D15 round-4):** platform claiming `governance-spend-authority` with default stub `StateReader` raises `AdcpError("INVALID_REQUEST")` at server boot; same platform with `capabilities.governance_aware=False` and no governance specialism passes; same platform with custom `StateReader` returning real `GovernanceContextJWS` passes; **`creative_format(revalidate=True)` parameter regression** — calling stub with `revalidate=True` raises with the same message as `revalidate=False` (parameter is part of Protocol contract, not gated on stub). | | `tests/test_decisioning_validate_platform_strict.py` | ~120 | **Round-4 (Emma #6 + #16):** specialism enum-coverage check (declaring a known specialism that has no `REQUIRED_METHODS_PER_SPECIALISM` entry must NOT silently pass — must fail server boot pointing at the spec drift); validator throws are caught and surface as `AdcpError("INVALID_REQUEST", ...)` rather than crashing the server boot. | | `tests/test_decisioning_in_memory_registry_prod_gate.py` | ~80 | **Round-4 (Emma #8):** `serve()` + `InMemoryTaskRegistry` + `production` env raises `AdcpError` unless `ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1` set. Sales-broadcast-tv adopter forced into HITL path is the regression case. | | `tests/test_decisioning_status_change_isolation.py` | ~80 | **Round-4 (Emma #17):** two `serve()` instances in the same process route their own `publish_status_change` events to per-instance subscribers, NOT a module-level singleton. Concurrent test files don't clobber each other's bus. | -**Foundation PR total:** ~2475 lines (~250 generated, ~1100 tests). -After prep PR + this lands: ~3850 lines on top of 1500-line foundation +**Foundation PR total:** ~2510 lines (~250 generated, ~1130 tests). +After prep PR + this lands: ~3885 lines on top of 1500-line foundation skeleton already committed. ## Things deferred (track separately) @@ -1255,11 +1430,15 @@ unrepresentable in our hybrid `SalesResult[T]` design**: `test_decisioning_validate_platform_strict.py`. * **D7 + serve() (Emma #8) — production gate on `InMemoryTaskRegistry`.** `serve()` refuses to start when wired with `InMemoryTaskRegistry` and - `ADCP_ENV=production` (or equivalent) unless - `ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1` opt-in. Sales-broadcast-tv - adopters are *structurally forced* into the HITL path which depends on - the registry — silent in-memory fallback is a real prod foot-gun. - Test: `test_decisioning_in_memory_registry_prod_gate.py`. + the existing SDK convention `ADCP_ENV in {"prod", "production"}` + (case-insensitive — same logic as `adcp.validation.client_hooks._default_response_mode` + reads at `src/adcp/validation/client_hooks.py:68`) unless + `ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1` opt-in is set. + Sales-broadcast-tv adopters are *structurally forced* into the HITL + path which depends on the registry — silent in-memory fallback is a + real prod foot-gun. Reuses the existing prod-detection helper to + avoid drift between two env-var conventions; do not introduce a + new variable. Test: `test_decisioning_in_memory_registry_prod_gate.py`. * **Dispatch (Emma #10) — `AdcpError` projection consistency.** Every code path that can raise `AdcpError` (specialism methods, account resolver, validators, capability synthesis, @@ -1322,6 +1501,56 @@ with no-op stub backings; impls fill in for v6.1**, so adopters can write the right shape from day one without rewriting later. See D15 above for the full Protocol definitions and rationale. +**D15 round-4 review tightenings (post-publish):** + +* **Stub asymmetry fixed.** Original D15 had `state.*` returning empty + silently and `resolve.*` raising — different posture in two readers + doc'd in the same paragraph. Round-4 review caught the asymmetry as + a real adopter foot-gun (silent-empty masks the stub state until + v6.1 wires the backing store and the platform's branch flips + silently). Fix: both stubs emit a one-time `UserWarning` per method + on first call. `state.*` still returns type-correct empty values + (an empty workflow-steps list IS legitimate for fresh tenants); + `resolve.*` still raises (an empty `PropertyList` is divergence + the framework cannot silently paper over). The asymmetry is now + justified per-reader rather than left undocumented. +* **`governance_context()` fail-fast at server boot.** Returning + `None` from `governance_context()` in v6.0 was a load-bearing + security stub — adopters claiming governance-* specialisms get + `None` and skip the gate; v6.1 wires the gate and the + gate-skipping branch evaluates against real plans. Fix: add + `capabilities.governance_aware: bool = False`. At server boot, + `validate_platform` raises `AdcpError("INVALID_REQUEST")` if any + `governance-*` specialism is claimed AND no real `StateReader` is + wired AND `governance_aware` isn't explicitly opted into. The + framework refuses to ship silent governance-gate skipping; + adopters must wire real governance threading or drop the claim. +* **Type-stability table added.** Round-4 surfaced "lock all + D15-referenced types in v6.0, not just the Protocols." D15 now + includes a per-type table: `Account`, `AuthInfo`, `Proposal`, + `PropertyList`, `CollectionList`, `Format`, + `FormatReferenceStructuredObject` are all already in + `adcp.types.generated_poc/`; `WorkflowStep`, `WorkflowObjectType`, + `GovernanceContextJWS` are framework-internal types defined fresh + in `adcp.decisioning.state` and shipped foundation-stable. Adopter + code that pattern-matches on these types doesn't refactor when v6.1 + lands. +* **`creative_format(revalidate: bool = False)` parameter pinned in + the Protocol contract.** Round-4 caught the 1h cache TTL doc'd as + Protocol contract — adopters with freshness needs would be stuck. + Pinning `revalidate=` at the Protocol level moves the cache TTL + to impl detail and gives adopters an opt-out without depending on + any specific TTL value. Test: stub raises identically with + `revalidate=True` so the parameter contract is enforced even before + the v6.1 backing impl ships. +* **Env var convention reused.** Original Round-4 referenced + `ADCP_ENV=production` as a free-form string; round-4 review caught + the drift risk vs. existing SDK convention. Fix: reuse + `_default_response_mode` logic from + `src/adcp/validation/client_hooks.py:68` — + `ADCP_ENV in {"prod", "production"}` (case-insensitive). One + prod-detection mechanism, no drift. + ### File plan additions * `adcp/decisioning/state.py` (~80 lines) — `StateReader` Protocol + From b4b161601bc06b982c34a0f69376c7f5dc92d60a Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 09:25:29 -0400 Subject: [PATCH 07/18] feat(decisioning): D15 typed RequestContext sub-readers (state, resolve) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the typed framework-owned sub-readers Yahoo asked for and the TS team's adcp-client PR #1005 already ships. Surface lands in v6.0; backing impls fill in for v6.1 — adopters write platform method bodies that read ctx.state.* and ctx.resolve.* against the real contract from day one rather than refactoring when v6.1 lands. Mirrors the TS-side `to-context.ts:buildRequestContext` shape 1:1: account, state (sync workflow-state reads), resolve (async framework-mediated fetches with cache + validation), auth_principal, handoff_to_task. Cross-language adopters get the same fields. What lands: - adcp/decisioning/state.py: StateReader Protocol + WorkflowStep frozen dataclass + WorkflowObjectType Literal + GovernanceContextJWS NewType + Proposal re-exported. _NotYetWiredStateReader v6.0 stub returns empty values + emits one-time UserWarning per method per process. - adcp/decisioning/resolve.py: ResourceResolver Protocol with property_list, collection_list, creative_format(revalidate=False). revalidate kwarg pinned in the Protocol contract — cache TTL is impl detail. _NotYetWiredResolver v6.0 stub raises NotImplementedError with design-doc anchor (#d15) on every method. Asymmetry vs. state stub justified per-reader: empty workflow list IS legitimate for fresh tenants; empty PropertyList is divergence the framework can't silently paper over. - adcp/decisioning/context.py: state, resolve, auth_principal fields on RequestContext with stub defaults via field(default_factory=...). - adcp/decisioning/platform.py: DecisioningCapabilities.governance_aware bool flag + GOVERNANCE_SPECIALISMS frozenset. Foundation-PR validate_platform reads these to fail-fast at server boot when a governance-* specialism is claimed without the opt-in. - adcp/decisioning/__init__.py: re-exports all D15 types. - adcp/types/__init__.py: surfaces FormatReferenceStructuredObject (already in _generated.py but missing from public surface). Snapshot regenerated. - tests/test_decisioning_context_state_resolve.py: 22 tests covering Protocol matching, structural custom impls, all four state stub methods (empty + warn-once + independent per-method), resolve stub raises with anchor + revalidate parameter contract enforced for both False/True, RequestContext defaults, dataclasses.replace test-double substitution, governance_aware default + opt-in, GOVERNANCE_SPECIALISMS pinned. Foundation tests: 39 passing (+22 from Stage 2). Full suite: 2417 passed, 17 skipped, 1 xfailed. ruff + mypy clean on touched files. Stage 2 of the foundation PR. Stage 3 (codegen + dispatch + serve) can start once prep PR #318 merges. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/adcp/decisioning/__init__.py | 28 ++ src/adcp/decisioning/context.py | 21 ++ src/adcp/decisioning/platform.py | 26 ++ src/adcp/decisioning/resolve.py | 165 +++++++++ src/adcp/decisioning/state.py | 242 +++++++++++++ src/adcp/types/__init__.py | 2 + tests/fixtures/public_api_snapshot.json | 1 + .../test_decisioning_context_state_resolve.py | 333 ++++++++++++++++++ 8 files changed, 818 insertions(+) create mode 100644 src/adcp/decisioning/resolve.py create mode 100644 src/adcp/decisioning/state.py create mode 100644 tests/test_decisioning_context_state_resolve.py diff --git a/src/adcp/decisioning/__init__.py b/src/adcp/decisioning/__init__.py index 19f05c42a..2c4f117fd 100644 --- a/src/adcp/decisioning/__init__.py +++ b/src/adcp/decisioning/__init__.py @@ -60,9 +60,25 @@ def create_media_buy( RequestContext, ) from adcp.decisioning.platform import ( + GOVERNANCE_SPECIALISMS, DecisioningCapabilities, DecisioningPlatform, ) +from adcp.decisioning.resolve import ( + CollectionList, + Format, + FormatReferenceStructuredObject, + PropertyList, + PropertyListReference, + ResourceResolver, +) +from adcp.decisioning.state import ( + GovernanceContextJWS, + Proposal, + StateReader, + WorkflowObjectType, + WorkflowStep, +) from adcp.decisioning.types import ( Account, AdcpError, @@ -76,13 +92,25 @@ def create_media_buy( "AccountStore", "AdcpError", "AuthInfo", + "CollectionList", "DecisioningCapabilities", "DecisioningPlatform", "ExplicitAccounts", + "Format", + "FormatReferenceStructuredObject", "FromAuthAccounts", + "GOVERNANCE_SPECIALISMS", + "GovernanceContextJWS", "MaybeAsync", + "Proposal", + "PropertyList", + "PropertyListReference", "RequestContext", + "ResourceResolver", "SalesResult", "SingletonAccounts", + "StateReader", "TaskHandoff", + "WorkflowObjectType", + "WorkflowStep", ] diff --git a/src/adcp/decisioning/context.py b/src/adcp/decisioning/context.py index c66c937ea..ada44dd8b 100644 --- a/src/adcp/decisioning/context.py +++ b/src/adcp/decisioning/context.py @@ -21,6 +21,8 @@ from typing_extensions import TypeVar +from adcp.decisioning.resolve import ResourceResolver, _make_default_resolver +from adcp.decisioning.state import StateReader, _make_default_state_reader from adcp.decisioning.types import Account, TaskHandoff from adcp.server.base import ToolContext @@ -99,13 +101,32 @@ class RequestContext(ToolContext, Generic[TMeta]): HITL background-task path. The framework dispatcher detects the returned :class:`TaskHandoff` via type-identity and projects it to the wire ``Submitted`` envelope. + + :param state: Sync reads of framework-owned in-flight workflow + state. Default is :class:`adcp.decisioning.state._NotYetWiredStateReader` + — returns empty values + emits one-time UserWarning per + method on first call. v6.1 wires the backing store. + :param resolve: Async framework-mediated fetches with cache + + validation. Default is + :class:`adcp.decisioning.resolve._NotYetWiredResolver` — raises + ``NotImplementedError`` on every call. v6.1 wires the backing + fetchers. + :param auth_principal: Typed convenience field carrying the + verified principal label (sourced from + :class:`AuthInfo.principal` when present). Distinct from + ``account.id`` (which the framework's idempotency middleware + uses for cache scope) — middleware reading "who authenticated + this request" gets a load-bearing field name. """ # Default factories so ``RequestContext()`` works in tests; in # production the dispatch adapter populates every field. account: Account[TMeta] = field(default_factory=lambda: Account(id="")) auth_info: AuthInfo | None = None + auth_principal: str | None = None now: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + state: StateReader = field(default_factory=_make_default_state_reader) + resolve: ResourceResolver = field(default_factory=_make_default_resolver) def handoff_to_task( self, diff --git a/src/adcp/decisioning/platform.py b/src/adcp/decisioning/platform.py index 900c6aad8..695f0acb0 100644 --- a/src/adcp/decisioning/platform.py +++ b/src/adcp/decisioning/platform.py @@ -44,6 +44,16 @@ class DecisioningCapabilities: :param config: Free-form adopter-defined config exposed on capabilities. Use sparingly — strongly-typed fields above are preferred. + :param governance_aware: Set ``True`` ONLY when the platform + implements ``governance-*`` specialisms AND has wired a custom + :class:`adcp.decisioning.state.StateReader` that returns real + :data:`adcp.decisioning.state.GovernanceContextJWS` values. + Setting this ``True`` with the v6.0 stub + ``StateReader`` is a fail-fast at server boot: silent + governance-gate skipping is a security regression the framework + refuses to allow. Defaults ``False`` — non-governance adopters + never touch this flag. See + ``docs/proposals/decisioning-platform-dispatch-design.md#d15``. """ specialisms: list[str] = field(default_factory=list) @@ -51,6 +61,22 @@ class DecisioningCapabilities: pricing_models: list[str] = field(default_factory=list) creative_agents: list[Any] = field(default_factory=list) config: dict[str, Any] = field(default_factory=dict) + governance_aware: bool = False + + +#: Specialisms that depend on framework-supplied +#: :data:`adcp.decisioning.state.GovernanceContextJWS` reads. Claiming +#: any of these without setting ``governance_aware=True`` (and wiring +#: a real :class:`StateReader`) trips the server-boot fail-fast in +#: :func:`adcp.decisioning.dispatch.validate_platform` — silent +#: governance-gate skipping is a security regression the framework +#: refuses to ship. +GOVERNANCE_SPECIALISMS: frozenset[str] = frozenset( + { + "governance-spend-authority", + "governance-delivery-monitor", + } +) class DecisioningPlatform: diff --git a/src/adcp/decisioning/resolve.py b/src/adcp/decisioning/resolve.py new file mode 100644 index 000000000..22db7f095 --- /dev/null +++ b/src/adcp/decisioning/resolve.py @@ -0,0 +1,165 @@ +"""Async framework-mediated resource resolver for :class:`RequestContext`. + +Defines: + +* :class:`ResourceResolver` — Protocol for async fetches of + framework-validated resources (property lists, collection lists, + creative formats). The framework owns the cache + validation; + platform methods get pre-validated typed results. +* :class:`_NotYetWiredResolver` — v6.0 stub. Raises + :class:`NotImplementedError` on every call with a pointer to the + v6.1 follow-up. Asymmetry vs. the ``state`` stub (which returns + empty + warns) is deliberate: an empty :class:`PropertyList` in v6.0 + vs. a real one in v6.1 is divergence the framework cannot silently + paper over. See ``docs/proposals/decisioning-platform-dispatch-design.md#d15``. + +The :class:`Format` and :class:`PropertyListReference` types are +re-exported from :mod:`adcp.types.generated_poc` so adopters import +once from :mod:`adcp.decisioning`. :class:`PropertyList` and +:class:`CollectionList` use the spec-defined wire shapes; the +resolver returns the same Pydantic models adopters would construct +themselves. +""" + +from __future__ import annotations + +from typing import Protocol, runtime_checkable + +# Wire types — already exported from adcp.types. Re-export for +# one-stop import from adcp.decisioning. Per CLAUDE.md import +# architecture rules, only adcp.types/{stable,aliases,_ergonomic} may +# import from generated_poc/; everywhere else uses the public +# adcp.types surface. +from adcp.types import ( + CollectionList, + Format, + FormatReferenceStructuredObject, + PropertyListReference, +) + +# ``PropertyList`` is the resolved-list shape (vs. +# ``PropertyListReference`` which is the wire-encoded reference). The +# spec models the resolved list as the existing +# ``PropertyListReference`` shape carrying the populated members; we +# alias for clarity in adopter call sites. +PropertyList = PropertyListReference + + +@runtime_checkable +class ResourceResolver(Protocol): + """Async fetches of framework-mediated resources. + + Platforms call ``ctx.resolve.property_list(list_id)`` instead of + fetching from their own DB; the framework returns a validated + typed result. The resolver routes through + ``capabilities.creative_agents`` for creative-format reads, hits + the framework's local ``CreativePlatform.list_formats`` for + self-hosted formats, and reads the seller's declared property / + collection lists with id-validation built in. + + Framework-supplied; never constructed by adopter code. The + ``RequestContext.resolve`` field is populated by the dispatch + hydration helper. Adopters substituting test doubles use + :func:`dataclasses.replace` on the context, not direct + construction. + + Mirrors the TS-side ``ResourceResolver`` interface in + ``src/lib/server/decisioning/context.ts``. v6.0 ships the contract + + the no-op stub (raises ``NotImplementedError`` on every call); + v6.1 lands the backing fetchers. + """ + + async def property_list(self, list_id: str) -> PropertyList: + """Fetch a property list by id. Framework validates the id + exists in the seller's declared lists before returning; + consumers can trust the result.""" + ... + + async def collection_list(self, list_id: str) -> CollectionList: + """Fetch a collection list by id. Same id-validation + guarantee as :meth:`property_list`.""" + ... + + async def creative_format( + self, + format_id: FormatReferenceStructuredObject, + *, + revalidate: bool = False, + ) -> Format: + """Fetch a creative format definition. + + Routes through ``capabilities.creative_agents`` declaration + with a framework-managed cache; self-hosted formats hit the + local ``CreativePlatform.list_formats``. Returns the resolved + :class:`Format` with full asset slot definitions. + + :param revalidate: When ``True``, bypasses the framework cache + and re-fetches from the upstream creative-agent. Adopters + with freshness needs (e.g., creative submission validating + against the latest format spec) pass ``revalidate=True``; + most reads use the default (``False``) to amortize the + agent round-trip. + + Cache TTL is implementation detail (defaults to 1h on the + reference impl); adopters who need stricter freshness use + ``revalidate=True`` rather than depending on the TTL value. + """ + ... + + +class _NotYetWiredResolver: + """v6.0 stub. Raises :class:`NotImplementedError` on every method + with a pointer to the v6.1 follow-up. + + Adopters reaching for ``ctx.resolve.*`` against the stub get an + immediate, locatable failure rather than a silent empty + ``PropertyList`` that diverges from real v6.1 behavior. Adopters + write custom ``ResourceResolver`` impls when they need real + fetching before the framework's backing impl ships. + + Framework-internal — not exported. + """ + + async def property_list(self, list_id: str) -> PropertyList: + raise NotImplementedError( + f"ResourceResolver.property_list({list_id!r}) called against " + "the v6.0 stub. Backing fetcher lands in v6.1 — see " + "docs/proposals/decisioning-platform-dispatch-design.md#d15. " + "Foundation-PR adopters should not invoke ctx.resolve.* yet, " + "or wire a custom ResourceResolver via " + "serve(resolver=...) for the v6.1-style behavior." + ) + + async def collection_list(self, list_id: str) -> CollectionList: + raise NotImplementedError( + f"ResourceResolver.collection_list({list_id!r}) called against " + "the v6.0 stub. Backing fetcher lands in v6.1 — see " + "docs/proposals/decisioning-platform-dispatch-design.md#d15." + ) + + async def creative_format( + self, + format_id: FormatReferenceStructuredObject, + *, + revalidate: bool = False, + ) -> Format: + raise NotImplementedError( + f"ResourceResolver.creative_format({format_id!r}, revalidate=" + f"{revalidate}) called against the v6.0 stub. Backing " + "fetcher lands in v6.1 — see " + "docs/proposals/decisioning-platform-dispatch-design.md#d15." + ) + + +def _make_default_resolver() -> ResourceResolver: + return _NotYetWiredResolver() + + +__all__ = [ + "CollectionList", + "Format", + "FormatReferenceStructuredObject", + "PropertyList", + "PropertyListReference", + "ResourceResolver", +] diff --git a/src/adcp/decisioning/state.py b/src/adcp/decisioning/state.py new file mode 100644 index 000000000..3a2592243 --- /dev/null +++ b/src/adcp/decisioning/state.py @@ -0,0 +1,242 @@ +"""Sync workflow-state reader for :class:`RequestContext`. + +Defines: + +* :class:`StateReader` — Protocol for sync reads of framework-owned + in-flight workflow state. Platform methods read this without + re-querying their own DB; the framework owns the cache. +* :class:`WorkflowStep`, :class:`WorkflowObjectType`, + :data:`GovernanceContextJWS` — framework-internal types referenced + by :class:`StateReader` methods. Defined here (not in + ``adcp.types.generated_poc/``) because they're framework-only — + not on the wire. +* :class:`_NotYetWiredStateReader` — v6.0 stub. Returns type-correct + empty values; emits a one-time :class:`UserWarning` per method on + first call so adopters notice they're reading uninitialized state. + Backing store lands in v6.1. + +The asymmetry between this stub (returns empty) and +:class:`adcp.decisioning.resolve._NotYetWiredResolver` (raises) is +deliberate. ``state.*`` reads are read-only inspections of +framework-owned state — an empty workflow-steps list IS the correct +answer for a fresh tenant. ``resolve.*`` fetches are validated +lookups — an empty :class:`PropertyList` in v6.0 vs. a real one in +v6.1 is divergence the framework cannot silently paper over. See +``docs/proposals/decisioning-platform-dispatch-design.md#d15`` for +the full rationale. +""" + +from __future__ import annotations + +import warnings +from collections.abc import Sequence +from dataclasses import dataclass +from typing import Literal, NewType, Protocol, runtime_checkable + +# Wire types referenced through the StateReader. ``Proposal`` is +# exported from adcp.types; importing from there keeps the layering +# rule in CLAUDE.md happy (only adcp.types/{stable,aliases,_ergonomic} +# may import from generated_poc/). +from adcp.types import Proposal + +#: Object types a workflow step can touch. Framework-internal — not on +#: the wire (the wire-side ``status-change-resource-type.json`` enum +#: covers a different surface). +WorkflowObjectType = Literal[ + "media_buy", + "creative", + "product", + "plan", + "audience", + "rights_grant", + "task", +] + +#: JWS-signed governance context. The framework verifies signature, +#: plan-binding, seller-binding, and phase-binding before exposing the +#: token to platform code; adopters can trust the value. Don't unwrap +#: or modify — re-pass to downstream framework calls instead. +GovernanceContextJWS = NewType("GovernanceContextJWS", str) + + +@dataclass(frozen=True) +class WorkflowStep: + """A chronological event the framework recorded against an object. + + Frozen because the framework writes the step record once at the + transition; platform code reads but does not mutate. The shape + mirrors the TS-side ``WorkflowStep`` interface so cross-language + adopters get the same fields. + + :param id: Stable step identifier (framework-allocated UUID). + :param object_type: The object this step touched. + :param object_id: Stable id of the touched object within + :attr:`object_type`. + :param tool: Wire verb that ran the step + (e.g. ``'create_media_buy'``, ``'sync_creatives'``). + :param at: ISO 8601 timestamp of the step. + :param actor: Who initiated the step. ``agent_url`` for an agent + principal, ``principal`` for a service-account principal, + possibly both. + :param status: Step outcome. ``'submitted'`` for a kicked-off task, + ``'completed'``/``'failed'`` for terminal states, + ``'progress'`` for a mid-flight update. + """ + + id: str + object_type: WorkflowObjectType + object_id: str + tool: str + at: str + actor: dict[str, str] + status: Literal["submitted", "completed", "failed", "progress"] + + +@runtime_checkable +class StateReader(Protocol): + """Sync reads of framework-owned in-flight workflow state. + + Platform methods read prior workflow context (recent media-buy + transitions, related proposals, in-flight governance bindings) + without re-querying their own DB. The framework owns the cache; the + Protocol surface is purely read. + + Framework-supplied; never constructed by adopter code. The + ``RequestContext.state`` field is populated by the dispatch + hydration helper. Adopters substituting test doubles use + :func:`dataclasses.replace` on the context, not direct + construction. + + Mirrors the TS-side ``WorkflowStateReader`` interface in + ``src/lib/server/decisioning/context.ts``. v6.0 ships the contract + + the no-op stub; v6.1 lands the backing store. + """ + + def find_by_object( + self, + object_type: WorkflowObjectType, + object_id: str, + ) -> Sequence[WorkflowStep]: + """Return workflow steps that touched the given object, + chronological. Used for "what's happened to this buy?" reads + without a platform-side fetch.""" + ... + + def find_proposal_by_id(self, proposal_id: str) -> Proposal | None: + """Resolve a ``proposal_id`` threaded across + ``get_products → refine → create_media_buy`` without platform + code. Returns ``None`` if the framework doesn't recognize the + id.""" + ... + + def governance_context(self) -> GovernanceContextJWS | None: + """Currently in-flight verified governance context (the JWS + token). ``None`` for non-governance flows. Framework verifies + before exposure; platform code can trust the value. + + Adopters claiming ``governance-*`` specialisms in + ``capabilities.specialisms`` MUST set + ``capabilities.governance_aware=True`` and wire a real + ``StateReader`` that returns real JWS tokens. The default stub + returns ``None``, which would silently skip the gate — server + boot fails fast if a governance specialism is claimed without + the opt-in. See + ``docs/proposals/decisioning-platform-dispatch-design.md#d15``. + """ + ... + + def workflow_steps(self) -> Sequence[WorkflowStep]: + """All chronological steps for this request's account. + Audit-read shape.""" + ... + + +# --------------------------------------------------------------------------- +# v6.0 stub — empty returns + one-time UserWarning per method +# --------------------------------------------------------------------------- + +#: Module-level set tracking which stub methods have already warned. +#: Module-scoped so concurrent ``serve()`` instances share the +#: warned-once state — emitting the warning per process per method, +#: not per request. +_STATE_STUB_WARNED: set[str] = set() + + +class _NotYetWiredStateReader: + """v6.0 stub. Returns type-correct empty values for every method; + emits a one-time :class:`UserWarning` per method on first call. + + Adopters who reach for ``ctx.state.*`` against the stub get the + legitimate "no history yet" semantics for fresh tenants AND a + visible warning the first time so accidentally-uninitialized state + doesn't ship silently. Adopters claiming ``governance-*`` + specialisms get the fail-fast path at server boot before this stub + is ever invoked (see :class:`StateReader.governance_context` + docstring). + + Framework-internal — not exported. Adopters write custom + ``StateReader`` impls when they need the v6.1-style behavior + before the backing store lands. + """ + + def _warn_once(self, method_name: str) -> None: + if method_name in _STATE_STUB_WARNED: + return + _STATE_STUB_WARNED.add(method_name) + warnings.warn( + f"ctx.state.{method_name}() called against the v6.0 stub " + "StateReader; backing store lands in v6.1. Reading empty " + "results — adopter code branching on this state will see " + "different values once the backing store is wired. See " + "docs/proposals/decisioning-platform-dispatch-design.md#d15", + UserWarning, + stacklevel=3, + ) + + def find_by_object( + self, + object_type: WorkflowObjectType, + object_id: str, + ) -> Sequence[WorkflowStep]: + self._warn_once("find_by_object") + return () + + def find_proposal_by_id(self, proposal_id: str) -> Proposal | None: + self._warn_once("find_proposal_by_id") + return None + + def governance_context(self) -> GovernanceContextJWS | None: + self._warn_once("governance_context") + return None + + def workflow_steps(self) -> Sequence[WorkflowStep]: + self._warn_once("workflow_steps") + return () + + +def _reset_state_stub_warned() -> None: + """Test helper — clears the module-level warned-once set. + + Production code never calls this; tests use it to assert the + one-time semantics deterministically (each test starts with a + fresh warned set). + """ + _STATE_STUB_WARNED.clear() + + +__all__ = [ + "GovernanceContextJWS", + "Proposal", + "StateReader", + "WorkflowObjectType", + "WorkflowStep", +] + + +# Re-exports needed by ``RequestContext`` field defaults but not part +# of the public adopter-facing surface — keep below ``__all__``. +def _make_default_state_reader() -> StateReader: + return _NotYetWiredStateReader() + + +_make_default_state_reader.__module__ = __name__ diff --git a/src/adcp/types/__init__.py b/src/adcp/types/__init__.py index ec7830d23..038063699 100644 --- a/src/adcp/types/__init__.py +++ b/src/adcp/types/__init__.py @@ -170,6 +170,7 @@ FormatCard, FormatCardDetailed, FormatIdParameter, + FormatReferenceStructuredObject, FrequencyCap, FrequencyCapScope, GeoCountry, @@ -920,6 +921,7 @@ def __init__(self, *args: object, **kwargs: object) -> None: "FormatCardDetailed", "FormatId", "FormatIdParameter", + "FormatReferenceStructuredObject", "Identifier", "Input", "KellerType", diff --git a/tests/fixtures/public_api_snapshot.json b/tests/fixtures/public_api_snapshot.json index 2957ed9c5..a821657f2 100644 --- a/tests/fixtures/public_api_snapshot.json +++ b/tests/fixtures/public_api_snapshot.json @@ -553,6 +553,7 @@ "FormatCardDetailed", "FormatId", "FormatIdParameter", + "FormatReferenceStructuredObject", "FrequencyCap", "FrequencyCapScope", "GeneratedTaskStatus", diff --git a/tests/test_decisioning_context_state_resolve.py b/tests/test_decisioning_context_state_resolve.py new file mode 100644 index 000000000..4e510de2a --- /dev/null +++ b/tests/test_decisioning_context_state_resolve.py @@ -0,0 +1,333 @@ +"""D15 — RequestContext typed sub-readers. + +Covers the surface added in round-4 of the dispatch design review: + +* :class:`adcp.decisioning.StateReader` Protocol structural matching +* :class:`adcp.decisioning.ResourceResolver` Protocol structural matching +* :class:`adcp.decisioning.state._NotYetWiredStateReader` v6.0 stub — + empty returns + one-time UserWarning per method +* :class:`adcp.decisioning.resolve._NotYetWiredResolver` v6.0 stub — + raises NotImplementedError with design-doc anchor +* ``creative_format(revalidate=True)`` parameter contract — the stub + raises identically regardless of the flag (parameter is part of the + Protocol, not gated on backing impl) +* ``dataclasses.replace(ctx, state=fake)`` test-double substitution + round-trip +* ``capabilities.governance_aware`` opt-in and the default + :data:`adcp.decisioning.GOVERNANCE_SPECIALISMS` constant +""" + +from __future__ import annotations + +import dataclasses +import warnings +from collections.abc import Sequence + +import pytest + +from adcp.decisioning import ( + GOVERNANCE_SPECIALISMS, + DecisioningCapabilities, + Proposal, + RequestContext, + ResourceResolver, + StateReader, + WorkflowObjectType, + WorkflowStep, +) +from adcp.decisioning.resolve import _NotYetWiredResolver +from adcp.decisioning.state import ( + _NotYetWiredStateReader, + _reset_state_stub_warned, +) + + +@pytest.fixture(autouse=True) +def reset_state_stub_warned(): + """Clear the module-level warned-once set before each test so + one-time UserWarning assertions don't see prior tests' state.""" + _reset_state_stub_warned() + + +# ---- Protocol structural matching ---- + + +def test_state_reader_protocol_runtime_checkable() -> None: + """``StateReader`` is a runtime-checkable Protocol — adopters + writing custom impls satisfy the contract structurally without + inheritance.""" + assert isinstance(_NotYetWiredStateReader(), StateReader) + + +def test_resource_resolver_protocol_runtime_checkable() -> None: + """Same structural check for ``ResourceResolver``.""" + assert isinstance(_NotYetWiredResolver(), ResourceResolver) + + +def test_custom_state_reader_satisfies_protocol() -> None: + """An adopter-written class with the right method shapes satisfies + the Protocol without subclassing.""" + + class _CustomStateReader: + def find_by_object( + self, object_type: WorkflowObjectType, object_id: str + ) -> Sequence[WorkflowStep]: + return () + + def find_proposal_by_id(self, proposal_id: str) -> Proposal | None: + return None + + def governance_context(self): # type: ignore[no-untyped-def] + return None + + def workflow_steps(self) -> Sequence[WorkflowStep]: + return () + + assert isinstance(_CustomStateReader(), StateReader) + + +# ---- _NotYetWiredStateReader: empty returns + one-time UserWarning ---- + + +def test_state_stub_find_by_object_returns_empty_and_warns_once() -> None: + """First call emits ``UserWarning``; subsequent calls return empty + silently. Regression: warned-once state is module-level so concurrent + request handlers share suppression after the first call per process.""" + reader = _NotYetWiredStateReader() + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always", UserWarning) + first = reader.find_by_object("media_buy", "mb_1") + second = reader.find_by_object("media_buy", "mb_2") + assert first == () + assert second == () + matched = [w for w in caught if "find_by_object" in str(w.message)] + assert len(matched) == 1 + assert "v6.0 stub" in str(matched[0].message) + assert "v6.1" in str(matched[0].message) + assert "#d15" in str(matched[0].message) + + +def test_state_stub_find_proposal_by_id_returns_none_and_warns_once() -> None: + reader = _NotYetWiredStateReader() + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always", UserWarning) + result = reader.find_proposal_by_id("proposal_xyz") + reader.find_proposal_by_id("proposal_abc") # 2nd call, no warning + assert result is None + matched = [w for w in caught if "find_proposal_by_id" in str(w.message)] + assert len(matched) == 1 + + +def test_state_stub_governance_context_returns_none_and_warns_once() -> None: + """Reaching ``governance_context()`` against the stub means the + governance opt-in fail-fast wasn't tripped — adopter is in a + non-governance flow. Warning fires once; result is ``None`` (no + governance threaded).""" + reader = _NotYetWiredStateReader() + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always", UserWarning) + result = reader.governance_context() + reader.governance_context() + assert result is None + matched = [w for w in caught if "governance_context" in str(w.message)] + assert len(matched) == 1 + + +def test_state_stub_workflow_steps_returns_empty_and_warns_once() -> None: + reader = _NotYetWiredStateReader() + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always", UserWarning) + result = reader.workflow_steps() + reader.workflow_steps() + assert result == () + matched = [w for w in caught if "workflow_steps" in str(w.message)] + assert len(matched) == 1 + + +def test_state_stub_separate_methods_warn_independently() -> None: + """Each method's warned-once is keyed by method name — calling + ``find_by_object`` once doesn't suppress the first + ``workflow_steps`` warning.""" + reader = _NotYetWiredStateReader() + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always", UserWarning) + reader.find_by_object("media_buy", "mb_1") + reader.workflow_steps() + reader.find_by_object("creative", "cr_1") # suppressed + reader.workflow_steps() # suppressed + methods_warned = [ + m + for m in ( + "find_by_object", + "workflow_steps", + "find_proposal_by_id", + "governance_context", + ) + if any(m in str(w.message) for w in caught) + ] + assert sorted(methods_warned) == ["find_by_object", "workflow_steps"] + + +# ---- _NotYetWiredResolver: raises with design-doc anchor ---- + + +@pytest.mark.asyncio +async def test_resolve_stub_property_list_raises_with_anchor() -> None: + """Resolver stub raises ``NotImplementedError`` with the design-doc + anchor in the message — adopters reaching for ``ctx.resolve.*`` get + a locatable failure pointing at the v6.1 follow-up.""" + resolver = _NotYetWiredResolver() + with pytest.raises(NotImplementedError) as exc_info: + await resolver.property_list("list_xyz") + msg = str(exc_info.value) + assert "list_xyz" in msg + assert "v6.0 stub" in msg + assert "v6.1" in msg + assert "#d15" in msg + + +@pytest.mark.asyncio +async def test_resolve_stub_collection_list_raises() -> None: + resolver = _NotYetWiredResolver() + with pytest.raises(NotImplementedError): + await resolver.collection_list("coll_xyz") + + +@pytest.mark.asyncio +async def test_resolve_stub_creative_format_raises_with_revalidate_false() -> None: + """Default ``revalidate=False`` raises with the same shape as the + other stubs.""" + from adcp.types import FormatReferenceStructuredObject + + resolver = _NotYetWiredResolver() + fmt = FormatReferenceStructuredObject( + agent_url="https://creative.adcontextprotocol.org", + id="display_static", + ) + with pytest.raises(NotImplementedError) as exc_info: + await resolver.creative_format(fmt) + assert "creative_format" in str(exc_info.value) + assert "revalidate=False" in str(exc_info.value) + + +@pytest.mark.asyncio +async def test_resolve_stub_creative_format_raises_with_revalidate_true() -> None: + """``revalidate=True`` ALSO raises — the parameter is part of the + Protocol contract, NOT gated on the backing impl. Adopters who need + ``revalidate=True`` semantics in v6.0 wire a custom resolver; they + don't get a different stub path for the flag.""" + from adcp.types import FormatReferenceStructuredObject + + resolver = _NotYetWiredResolver() + fmt = FormatReferenceStructuredObject( + agent_url="https://creative.adcontextprotocol.org", + id="display_static", + ) + with pytest.raises(NotImplementedError) as exc_info: + await resolver.creative_format(fmt, revalidate=True) + assert "creative_format" in str(exc_info.value) + assert "revalidate=True" in str(exc_info.value) + + +# ---- RequestContext: defaults wire the stubs ---- + + +def test_request_context_defaults_to_stubs() -> None: + """Constructing ``RequestContext()`` without explicit ``state`` / + ``resolve`` wires the v6.0 stub impls. Test fixtures and + ``examples/hello_seller.py`` rely on this for zero-config setup.""" + ctx = RequestContext() + assert isinstance(ctx.state, _NotYetWiredStateReader) + assert isinstance(ctx.resolve, _NotYetWiredResolver) + assert ctx.auth_principal is None + + +# ---- dataclasses.replace test-double substitution ---- + + +def test_dataclasses_replace_substitutes_state_reader() -> None: + """Tests substitute test doubles via ``dataclasses.replace``, NOT + raw construction (which would bypass the framework hydration helper + in production).""" + + class _FakeStateReader: + def __init__(self) -> None: + self.calls: list[str] = [] + + def find_by_object(self, object_type, object_id): # type: ignore[no-untyped-def] + self.calls.append(f"find_by_object({object_type},{object_id})") + return () + + def find_proposal_by_id(self, proposal_id): # type: ignore[no-untyped-def] + return None + + def governance_context(self): # type: ignore[no-untyped-def] + return None + + def workflow_steps(self): # type: ignore[no-untyped-def] + return () + + fake = _FakeStateReader() + base_ctx = RequestContext() + test_ctx = dataclasses.replace(base_ctx, state=fake) + + test_ctx.state.find_by_object("media_buy", "mb_1") + assert fake.calls == ["find_by_object(media_buy,mb_1)"] + assert isinstance(base_ctx.state, _NotYetWiredStateReader), ( + "replace should NOT mutate the original ctx — base_ctx.state stays " "the default stub" + ) + + +def test_dataclasses_replace_substitutes_resolver() -> None: + """Same substitution pattern for ``resolve``.""" + + class _FakeResolver: + async def property_list(self, list_id): # type: ignore[no-untyped-def] + return f"resolved:{list_id}" + + async def collection_list(self, list_id): # type: ignore[no-untyped-def] + return f"coll:{list_id}" + + async def creative_format(self, format_id, *, revalidate=False): # type: ignore[no-untyped-def] + return f"fmt:{format_id}:{revalidate}" + + fake = _FakeResolver() + test_ctx = dataclasses.replace(RequestContext(), resolve=fake) + assert test_ctx.resolve is fake + + +# ---- governance opt-in / GOVERNANCE_SPECIALISMS ---- + + +def test_capabilities_governance_aware_defaults_false() -> None: + """Non-governance adopters never touch this flag — it stays + ``False`` by default. Adopters claiming ``governance-*`` specialisms + must explicitly opt in (and wire a real ``StateReader``); otherwise + server boot fails fast in ``validate_platform``.""" + caps = DecisioningCapabilities() + assert caps.governance_aware is False + + +def test_governance_specialisms_pinned() -> None: + """The constant tracks the AdCP 3.0 governance-* specialism set. + Drift here is a foundation-PR-level decision; this test is the + locked contract.""" + assert GOVERNANCE_SPECIALISMS == frozenset( + { + "governance-spend-authority", + "governance-delivery-monitor", + } + ) + + +def test_capabilities_can_opt_into_governance_aware() -> None: + """Adopters wiring real governance set this True alongside their + custom ``StateReader``. The flag itself doesn't validate; the + fail-fast logic lives in dispatch ``validate_platform`` (foundation + PR). v6.0 ships the contract.""" + caps = DecisioningCapabilities( + specialisms=["governance-spend-authority"], + governance_aware=True, + ) + assert caps.governance_aware is True + assert "governance-spend-authority" in caps.specialisms From 9a146698d0ea15efd3129cf73b9daa08726ca14c Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 10:27:53 -0400 Subject: [PATCH 08/18] chore(decisioning): apply D15 review feedback (P1 polish) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Python-expert review on commit b4b16160 (D15) flagged six items, all P1 polish: - **governance_context warning text fixed**: previous text claimed "different values once wired" — misleading for non-governance flows where None IS the v6.1 answer. Special-cased the warning to explain that the fail-fast lands at server boot for governance adopters, and None is correct for non-governance flows. - **Removed __module__ = __name__ no-op** in state.py — module-scope function definitions already have __module__ set. - **Protocol structural-match caveats documented**: - StateReader docstring: isinstance() matches by attribute name only; return types (including NewType GovernanceContextJWS) and signatures are mypy-only enforcement. - ResourceResolver docstring: isinstance() doesn't check coroutinehood — sync method named property_list passes structural check, fails at await time. Use mypy. - **PropertyList alias pinned**: contract comment + regression test (test_property_list_alias_pinned_to_reference) tripwires future spec rev that introduces a distinct resolved-list type — drift becomes visible at CI time, not deploy time. - **governance_aware fails-fast docstring softened**: this commit ships the contract; Stage 3 dispatch lands the actual fail-fast. Docstring now reads "Stage 3 dispatch will fail-fast" rather than promising current behavior. - **Cross-instance warn-once test added**: confirms the module-level _STATE_STUB_WARNED set carries across stub instances (per process per method, not per request). Three new tests: - test_state_stub_warned_once_is_cross_instance - test_state_stub_governance_context_warning_text - test_property_list_alias_pinned_to_reference Test count: 40 (+3) in test_decisioning_context_state_resolve.py. Full suite: 2420 passed, 17 skipped, 1 xfailed. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/adcp/decisioning/platform.py | 15 ++++-- src/adcp/decisioning/resolve.py | 19 +++++-- src/adcp/decisioning/state.py | 35 ++++++++++--- .../test_decisioning_context_state_resolve.py | 51 +++++++++++++++++++ 4 files changed, 106 insertions(+), 14 deletions(-) diff --git a/src/adcp/decisioning/platform.py b/src/adcp/decisioning/platform.py index 695f0acb0..71af7cf2a 100644 --- a/src/adcp/decisioning/platform.py +++ b/src/adcp/decisioning/platform.py @@ -48,11 +48,16 @@ class DecisioningCapabilities: implements ``governance-*`` specialisms AND has wired a custom :class:`adcp.decisioning.state.StateReader` that returns real :data:`adcp.decisioning.state.GovernanceContextJWS` values. - Setting this ``True`` with the v6.0 stub - ``StateReader`` is a fail-fast at server boot: silent - governance-gate skipping is a security regression the framework - refuses to allow. Defaults ``False`` — non-governance adopters - never touch this flag. See + Defaults ``False`` — non-governance adopters never touch this + flag. + + Stage 3 dispatch (foundation PR's ``validate_platform``) will + fail-fast at server boot when a platform claims a + ``governance-*`` specialism without setting this flag and + wiring a real ``StateReader`` — silent governance-gate + skipping is a security regression the framework refuses to + ship. The flag itself is the contract that lands now; the + enforcement lands in Stage 3. See ``docs/proposals/decisioning-platform-dispatch-design.md#d15``. """ diff --git a/src/adcp/decisioning/resolve.py b/src/adcp/decisioning/resolve.py index 22db7f095..10bcf216d 100644 --- a/src/adcp/decisioning/resolve.py +++ b/src/adcp/decisioning/resolve.py @@ -39,9 +39,14 @@ # ``PropertyList`` is the resolved-list shape (vs. # ``PropertyListReference`` which is the wire-encoded reference). The -# spec models the resolved list as the existing -# ``PropertyListReference`` shape carrying the populated members; we -# alias for clarity in adopter call sites. +# spec currently models both as the same Pydantic class — the +# reference carries populated members on the response — so we alias +# for clarity in adopter call sites and on D15's StateReader contract. +# If a future spec rev introduces a distinct resolved-list type, +# adopter code typed against ``PropertyList`` would silently re-target; +# the contract test ``test_property_list_alias_pinned_to_reference`` in +# tests/test_decisioning_context_state_resolve.py tripwires that drift +# so the rename is visible at CI time, not deploy time. PropertyList = PropertyListReference @@ -67,6 +72,14 @@ class ResourceResolver(Protocol): ``src/lib/server/decisioning/context.ts``. v6.0 ships the contract + the no-op stub (raises ``NotImplementedError`` on every call); v6.1 lands the backing fetchers. + + .. note:: + :class:`runtime_checkable` Protocols only check attribute + *presence*. Whether a method is ``async def`` is irrelevant to + the runtime ``isinstance`` check — a sync method named + ``property_list`` would pass the structural check but fail at + ``await`` time. Use mypy to enforce ``async def`` signatures + across adopter impls. """ async def property_list(self, list_id: str) -> PropertyList: diff --git a/src/adcp/decisioning/state.py b/src/adcp/decisioning/state.py index 3a2592243..8035c8343 100644 --- a/src/adcp/decisioning/state.py +++ b/src/adcp/decisioning/state.py @@ -110,6 +110,15 @@ class StateReader(Protocol): Mirrors the TS-side ``WorkflowStateReader`` interface in ``src/lib/server/decisioning/context.ts``. v6.0 ships the contract + the no-op stub; v6.1 lands the backing store. + + .. note:: + :class:`runtime_checkable` Protocols match by attribute *name* + only — return types (including :data:`GovernanceContextJWS`, + which is a :func:`typing.NewType` invisible at runtime) and + method signatures are NOT enforced by ``isinstance``. A custom + impl that returns ``int`` from ``governance_context()`` will + pass the structural check; mypy is the only enforcement for + return-type contracts. Coverage gap is acceptable for v6.0. """ def find_by_object( @@ -183,11 +192,28 @@ def _warn_once(self, method_name: str) -> None: if method_name in _STATE_STUB_WARNED: return _STATE_STUB_WARNED.add(method_name) + # ``governance_context`` is a load-bearing security stub — + # adopters claiming governance-* specialisms get the fail-fast + # path at server boot before this branch is reached, so any + # code path that lands here is a non-governance flow where + # ``None`` is also the v6.1 answer (no governance threaded for + # this request). Other state methods will return real values + # in v6.1, so adopter branches on empty results would diverge. + if method_name == "governance_context": + tail = ( + "Returning None — non-governance flows get the same answer " + "in v6.1; governance-claiming platforms hit the server-boot " + "fail-fast before this stub is invoked." + ) + else: + tail = ( + "Reading empty results — adopter code branching on this " + "state will see different values once the backing store is " + "wired." + ) warnings.warn( f"ctx.state.{method_name}() called against the v6.0 stub " - "StateReader; backing store lands in v6.1. Reading empty " - "results — adopter code branching on this state will see " - "different values once the backing store is wired. See " + f"StateReader; backing store lands in v6.1. {tail} See " "docs/proposals/decisioning-platform-dispatch-design.md#d15", UserWarning, stacklevel=3, @@ -237,6 +263,3 @@ def _reset_state_stub_warned() -> None: # of the public adopter-facing surface — keep below ``__all__``. def _make_default_state_reader() -> StateReader: return _NotYetWiredStateReader() - - -_make_default_state_reader.__module__ = __name__ diff --git a/tests/test_decisioning_context_state_resolve.py b/tests/test_decisioning_context_state_resolve.py index 4e510de2a..76215346d 100644 --- a/tests/test_decisioning_context_state_resolve.py +++ b/tests/test_decisioning_context_state_resolve.py @@ -168,6 +168,57 @@ def test_state_stub_separate_methods_warn_independently() -> None: assert sorted(methods_warned) == ["find_by_object", "workflow_steps"] +def test_state_stub_warned_once_is_cross_instance() -> None: + """``_STATE_STUB_WARNED`` is module-level so concurrent ``serve()`` + instances share the warned-once state — emitting per process per + method, not per request. Two stub instances back-to-back must not + re-warn for the same method.""" + first = _NotYetWiredStateReader() + second = _NotYetWiredStateReader() + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always", UserWarning) + first.find_by_object("media_buy", "mb_1") + # Different instance, same method — must NOT re-warn. + second.find_by_object("media_buy", "mb_2") + matched = [w for w in caught if "find_by_object" in str(w.message)] + assert len(matched) == 1, ( + f"Expected exactly one warning across instances; got {len(matched)}: " + f"{[str(w.message) for w in matched]}" + ) + + +def test_state_stub_governance_context_warning_text() -> None: + """The ``governance_context`` warning text is special-cased to + explain that ``None`` IS the v6.1 answer for non-governance flows + — not the generic "different values once wired" message that + applies to other methods. Adopters in non-governance flows + shouldn't be told the value will change when it won't.""" + reader = _NotYetWiredStateReader() + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always", UserWarning) + reader.governance_context() + msg = next(str(w.message) for w in caught if "governance_context" in str(w.message)) + assert "non-governance flows get the same answer" in msg + assert "fail-fast" in msg + + +def test_property_list_alias_pinned_to_reference() -> None: + """``adcp.decisioning.PropertyList`` aliases + ``PropertyListReference`` deliberately (the spec models both as + one Pydantic class). If a future spec rev introduces a distinct + resolved-list type, adopter code typed against ``PropertyList`` + would silently re-target — this contract test trips first so the + rename is visible at CI time rather than deploy time.""" + from adcp.decisioning import PropertyList, PropertyListReference + + assert PropertyList is PropertyListReference, ( + "PropertyList must alias PropertyListReference. If the spec has " + "introduced a distinct resolved-list type, update " + "adcp.decisioning.resolve to point PropertyList at the new class " + "and migrate adopter code accordingly." + ) + + # ---- _NotYetWiredResolver: raises with design-doc anchor ---- From e961adcd5c5c8bf661c1dc6c9b83a38972bc4dbf Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 10:34:51 -0400 Subject: [PATCH 09/18] feat(decisioning): TaskRegistry Protocol + InMemoryTaskRegistry stub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stage 3 first piece. Foundational — no deps on dispatch.py or serve.py yet; those layers consume this Protocol next. What lands: - adcp/decisioning/task_registry.py: TaskRegistry runtime_checkable Protocol with per-method contract docstrings (D7). Cross-tenant safety pinned: get(task_id, expected_account_id=) MUST return None on mismatch. InMemoryTaskRegistry v6.0 reference impl (asyncio.Lock-guarded dict). Idempotent on equal terminal payloads; raises on mismatched re-completion. TaskHandoffContext (id + update + heartbeat). TaskRecord frozen-shape dataclass. Production-mode gate documented (Stage 3 serve.py wiring will refuse InMemoryTaskRegistry in ADCP_ENV in {prod, production} without ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1). - adcp/decisioning/__init__.py: re-exports. - tests/test_decisioning_task_registry.py: 22 tests covering Protocol structural matching (concrete + duck-typed), full lifecycle (issue/update_progress/complete/fail/get), idempotency on equal terminal payloads, raise on mismatch, concurrent issue() unique ids, update_progress on unknown task is silent no-op, TaskHandoffContext.update swallows registry errors, TaskHandoffContext.heartbeat is v6.0 no-op. - tests/test_decisioning_task_registry_cross_tenant.py: 8 tests covering the security boundary at every state (submitted / working / completed / failed) — cross-tenant probe returns None; same-tenant read still works; empty-string account_id is mismatch; substring/prefix not enough — exact equality required; unknown task_id + cross-tenant probe both return None. 70 decisioning tests pass total (+30 from this commit). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/adcp/decisioning/__init__.py | 10 + src/adcp/decisioning/task_registry.py | 437 ++++++++++++++++++ tests/test_decisioning_task_registry.py | 310 +++++++++++++ ..._decisioning_task_registry_cross_tenant.py | 141 ++++++ 4 files changed, 898 insertions(+) create mode 100644 src/adcp/decisioning/task_registry.py create mode 100644 tests/test_decisioning_task_registry.py create mode 100644 tests/test_decisioning_task_registry_cross_tenant.py diff --git a/src/adcp/decisioning/__init__.py b/src/adcp/decisioning/__init__.py index 2c4f117fd..f425be28e 100644 --- a/src/adcp/decisioning/__init__.py +++ b/src/adcp/decisioning/__init__.py @@ -79,6 +79,12 @@ def create_media_buy( WorkflowObjectType, WorkflowStep, ) +from adcp.decisioning.task_registry import ( + InMemoryTaskRegistry, + TaskHandoffContext, + TaskRegistry, + TaskState, +) from adcp.decisioning.types import ( Account, AdcpError, @@ -101,6 +107,7 @@ def create_media_buy( "FromAuthAccounts", "GOVERNANCE_SPECIALISMS", "GovernanceContextJWS", + "InMemoryTaskRegistry", "MaybeAsync", "Proposal", "PropertyList", @@ -111,6 +118,9 @@ def create_media_buy( "SingletonAccounts", "StateReader", "TaskHandoff", + "TaskHandoffContext", + "TaskRegistry", + "TaskState", "WorkflowObjectType", "WorkflowStep", ] diff --git a/src/adcp/decisioning/task_registry.py b/src/adcp/decisioning/task_registry.py new file mode 100644 index 000000000..0040937df --- /dev/null +++ b/src/adcp/decisioning/task_registry.py @@ -0,0 +1,437 @@ +"""Task registry for the DecisioningPlatform handoff path. + +Defines: + +* :class:`TaskRegistry` Protocol — the seam adopters substitute when + they need a durable backing store (PostgreSQL, Redis, etc.). The + Protocol shape is pinned with per-method contract docstrings; D7 of + the dispatch design names every invariant. +* :class:`InMemoryTaskRegistry` — the v6.0 reference implementation. + Process-local, lossy on restart. Suitable for local dev, CI, and + test fixtures; production deployments running ``sales-broadcast-tv`` + or any HITL flow refuse to start without an explicit opt-in (see + :func:`adcp.decisioning.serve.serve` Stage 3 wiring). +* :class:`TaskHandoffContext` — what the framework passes into the + adopter's handoff callable when ``ctx.handoff_to_task(fn)`` fires. + Carries the framework-issued task id plus ``update(progress)`` and + ``heartbeat()`` affordances. + +The registry's storage shape is intentionally minimal: +``{task_id → TaskRecord}`` keyed by the framework-allocated UUID. +Cross-tenant access control is enforced via the optional +``expected_account_id`` argument on :meth:`TaskRegistry.get` — sellers +threading ``ctx.account.id`` through to ``tasks/get`` get a None +return on mismatch (no principal-enumeration via task_id probing). + +Production-mode gate (Emma #8 / round-4): +:func:`adcp.decisioning.serve.serve` reads ``ADCP_ENV`` (case-insensitive +``{"prod", "production"}`` — same as +:func:`adcp.validation.client_hooks._default_response_mode`) and +refuses to wire :class:`InMemoryTaskRegistry` in production unless +``ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1`` is set. Sales-broadcast-tv +adopters are structurally forced into the HITL path which depends on +the registry — silent in-memory fallback is a real prod foot-gun. +""" + +from __future__ import annotations + +import asyncio +import time +import uuid +from collections.abc import Awaitable, Callable +from dataclasses import dataclass, field +from typing import Any, Literal, Protocol, runtime_checkable + +#: Terminal task states per AdCP 3.0 spec (``enums/task-status.json``). +#: ``submitted`` = task created but not yet started; ``working`` = adopter +#: callback running; ``completed`` / ``failed`` = terminal. +TaskState = Literal["submitted", "working", "completed", "failed"] + + +@dataclass +class TaskRecord: + """The framework's per-task storage row. + + Internal to the registry impl — adopters don't construct these. + The Protocol surface returns dicts on :meth:`TaskRegistry.get` + rather than the dataclass directly so the storage shape stays + swappable (a Postgres impl might return a different row class). + + :param task_id: Framework-allocated UUID. Stable across the + task's lifetime. + :param account_id: Account that owns the task. Used for the + cross-tenant access-control check in :meth:`TaskRegistry.get`. + :param state: Terminal state lifecycle. Transitions are + framework-driven; adopters drive completion via + :meth:`TaskHandoffContext.update` and the dispatcher calls + :meth:`TaskRegistry.complete` / :meth:`TaskRegistry.fail` at + the end of the handoff fn. + :param task_type: Wire-spec task type (``'create_media_buy'``, + ``'sync_creatives'``, etc.). Mirrors the Submitted envelope's + ``task_type`` field so ``tasks/get`` round-trips correctly. + :param progress: Latest progress payload written by + :meth:`TaskHandoffContext.update`. Buyers see this on + ``tasks/get`` while the task is in the ``working`` state. + :param result: Terminal artifact set by :meth:`TaskRegistry.complete`. + MUST be the JSON-serialized spec response shape (e.g. a + ``CreateMediaBuySuccessResponse`` projected through + ``model_dump()``). v6.1 adds size enforcement; for now the + registry trusts adopters. + :param error: Terminal failure payload set by + :meth:`TaskRegistry.fail`. MUST be the + :meth:`AdcpError.to_wire` shape so ``tasks/get`` returns the + spec ``adcp_error`` envelope verbatim. + :param created_at: Monotonic creation timestamp (Unix epoch + seconds). Adopters get the exact value the framework stored; + useful for SLA dashboards. + :param updated_at: Last-touched timestamp. Updated on every state + transition AND every :meth:`TaskHandoffContext.update` call. + """ + + task_id: str + account_id: str + state: TaskState + task_type: str + progress: dict[str, Any] | None = None + result: dict[str, Any] | None = None + error: dict[str, Any] | None = None + created_at: float = field(default_factory=time.time) + updated_at: float = field(default_factory=time.time) + + def to_dict(self) -> dict[str, Any]: + """Serialize for buyer consumption via ``tasks/get``. + + Adopters or middleware reading the dict shape get the exact + wire-relevant fields. ``created_at`` / ``updated_at`` are + included so admin tooling can build SLA reports. + """ + return { + "task_id": self.task_id, + "account_id": self.account_id, + "state": self.state, + "task_type": self.task_type, + "progress": self.progress, + "result": self.result, + "error": self.error, + "created_at": self.created_at, + "updated_at": self.updated_at, + } + + +@runtime_checkable +class TaskRegistry(Protocol): + """Per-account task store — the seam adopters substitute for a + durable backing implementation. + + Lifecycle (framework-driven; adopters call only :meth:`TaskHandoffContext` + methods, not these directly): + + 1. Dispatch detects ``ctx.handoff_to_task(fn)`` returned from a + platform method. Allocates a task_id and calls :meth:`issue` to + persist the ``submitted`` row. + 2. Dispatch projects the wire ``Submitted`` envelope to the buyer. + 3. Dispatch runs ``fn(task_handoff_ctx)`` in the background. The + adopter calls ``task_handoff_ctx.update(progress)`` zero or + more times; the framework routes each to :meth:`update_progress` + (also transitions ``submitted`` → ``working`` on first update). + 4. When ``fn`` returns, dispatch calls :meth:`complete` with the + terminal artifact (a JSON-serialized spec response). + 5. When ``fn`` raises :class:`adcp.decisioning.AdcpError` (or any + exception, wrapped to ``INTERNAL_ERROR``), dispatch calls + :meth:`fail` with the wire-shaped error payload. + + All write paths set ``updated_at = now``. The registry is + expected to be safe for concurrent reads; concurrent writes to + the same task are serialized by the dispatcher (one ``fn`` per + handoff, no concurrent `update_progress`/`complete` against the + same task_id). + + Cross-tenant safety: every read MUST be account-scoped. The + :meth:`get` method takes an optional ``expected_account_id`` — + when supplied (the wire ``tasks/get`` path always supplies it), + a mismatch returns ``None``, NOT the raw record. Adopters + implementing custom registries MUST honor this: returning a + cross-tenant record on probe enables principal-enumeration via + task_id guessing. See + ``tests/test_decisioning_task_registry_cross_tenant.py`` for + the regression suite. + """ + + async def issue( + self, + *, + account_id: str, + task_type: str, + ) -> str: + """Allocate a fresh task_id, persist a ``submitted`` row, and + return the id. + + :param account_id: Account that owns the task. Drives the + cross-tenant access check on subsequent reads. + :param task_type: Wire-spec task type (``'create_media_buy'``, + etc.). Surfaced on the Submitted envelope and on + ``tasks/get`` reads. + :returns: The framework-allocated task_id (string UUID). + """ + ... + + async def update_progress( + self, + task_id: str, + progress: dict[str, Any], + ) -> None: + """Write a progress payload and transition ``submitted`` → + ``working`` on first call. No-op transition on subsequent + calls (already in ``working``). + + Errors here are swallowed by the dispatch wrapper — a transient + registry write failure must NOT abort the adopter's background + handoff. Buyer-facing impact is a missed progress event, not a + failed task. Adopter impls of this method that need durability + guarantees should buffer + retry internally. + """ + ... + + async def complete( + self, + task_id: str, + result: dict[str, Any], + ) -> None: + """Mark the task ``completed`` with ``result`` as the terminal + artifact. + + ``result`` MUST be the JSON-serialized spec response shape + (e.g. ``CreateMediaBuySuccessResponse`` via ``model_dump()``). + Idempotent on repeated calls with equal ``result``; + non-idempotent re-completion with different result raises + ``ValueError``. + """ + ... + + async def fail( + self, + task_id: str, + error: dict[str, Any], + ) -> None: + """Mark the task ``failed`` with ``error`` as the terminal + wire-shaped error payload. + + ``error`` MUST be the :meth:`AdcpError.to_wire` shape so + ``tasks/get`` round-trips the spec ``adcp_error`` envelope + verbatim. Idempotent on repeated calls with equal ``error``. + """ + ... + + async def get( + self, + task_id: str, + *, + expected_account_id: str | None = None, + ) -> dict[str, Any] | None: + """Look up a task record. Cross-tenant probes return ``None``. + + :param task_id: Framework-allocated id from a prior :meth:`issue`. + :param expected_account_id: When supplied, the registry MUST + return ``None`` if the stored record's ``account_id`` does + not match. The wire ``tasks/get`` path always supplies the + authenticated principal's account_id so adopters can't + probe across tenants. + :returns: The record dict (per :meth:`TaskRecord.to_dict`) or + ``None`` if the id is unknown OR a cross-tenant mismatch. + """ + ... + + +# --------------------------------------------------------------------------- +# In-memory reference implementation — v6.0 ships this; v6.1 lands a +# durable Postgres-backed counterpart that implements the same Protocol. +# --------------------------------------------------------------------------- + + +class InMemoryTaskRegistry: + """Process-local task registry — v6.0 reference implementation. + + Storage is a plain ``dict[str, TaskRecord]`` guarded by an + :class:`asyncio.Lock`. Adequate for local dev, CI, and test + fixtures; production deployments wire a durable counterpart + (PostgreSQL, Redis, etc.) implementing the same :class:`TaskRegistry` + Protocol. + + Production-mode gate: :func:`adcp.decisioning.serve.serve` refuses + to wire this when ``ADCP_ENV`` indicates production unless + ``ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1`` is set. The gate lives + in ``serve.py`` (Stage 3 dispatch) — this class itself is benign; + constructing it is fine. Production sellers running + ``sales-broadcast-tv`` or any HITL flow get the explicit refusal + so silent in-memory fallback can't bite oncall. + """ + + def __init__(self) -> None: + self._records: dict[str, TaskRecord] = {} + self._lock = asyncio.Lock() + + async def issue( + self, + *, + account_id: str, + task_type: str, + ) -> str: + task_id = f"task_{uuid.uuid4().hex[:16]}" + async with self._lock: + self._records[task_id] = TaskRecord( + task_id=task_id, + account_id=account_id, + state="submitted", + task_type=task_type, + ) + return task_id + + async def update_progress( + self, + task_id: str, + progress: dict[str, Any], + ) -> None: + async with self._lock: + record = self._records.get(task_id) + if record is None: + # Silent no-op — the dispatch wrapper expects this method + # to never raise on transient lookup failure (see Protocol + # docstring). + return + record.progress = dict(progress) + if record.state == "submitted": + record.state = "working" + record.updated_at = time.time() + + async def complete( + self, + task_id: str, + result: dict[str, Any], + ) -> None: + async with self._lock: + record = self._records.get(task_id) + if record is None: + raise ValueError(f"Task {task_id!r} not found") + if record.state == "completed": + if record.result == result: + return # idempotent + raise ValueError(f"Task {task_id!r} already completed with a different result") + record.state = "completed" + record.result = dict(result) + record.updated_at = time.time() + + async def fail( + self, + task_id: str, + error: dict[str, Any], + ) -> None: + async with self._lock: + record = self._records.get(task_id) + if record is None: + raise ValueError(f"Task {task_id!r} not found") + if record.state == "failed": + if record.error == error: + return # idempotent + raise ValueError(f"Task {task_id!r} already failed with a different error") + record.state = "failed" + record.error = dict(error) + record.updated_at = time.time() + + async def get( + self, + task_id: str, + *, + expected_account_id: str | None = None, + ) -> dict[str, Any] | None: + async with self._lock: + record = self._records.get(task_id) + if record is None: + return None + if expected_account_id is not None and record.account_id != expected_account_id: + # Cross-tenant probe — return None, NOT raw record. + # Critical security boundary: returning the record + # here enables principal-enumeration via task_id + # probing. The dispatch path that calls this method + # always passes the authenticated principal's + # account_id; adopter impls implementing this Protocol + # MUST preserve this behavior. + return None + return record.to_dict() + + +# --------------------------------------------------------------------------- +# TaskHandoffContext — what the framework passes into adopter handoff fns +# --------------------------------------------------------------------------- + + +@dataclass +class TaskHandoffContext: + """Per-task context passed to the handoff fn registered via + :meth:`adcp.decisioning.RequestContext.handoff_to_task`. + + Adopter pattern:: + + def create_media_buy(self, req, ctx): + if self._needs_review(req): + return ctx.handoff_to_task(self._async_review) + + return CreateMediaBuySuccess(media_buy_id="mb_1", ...) + + async def _async_review(self, task_ctx: TaskHandoffContext): + await task_ctx.update({"message": "Trafficker reviewing"}) + decision = await self._wait_for_trafficker(task_ctx.id) + return CreateMediaBuySuccess(media_buy_id=decision.id, ...) + + The framework allocates ``task_ctx.id`` BEFORE invoking the + handoff fn so the adopter can persist the id to its own backend + (storyboard runner row, Slack thread reference, etc.) before + kicking off slow work. This fixes a documented v1 ergonomics bug + where adopters could only learn the task_id AFTER returning. + + Constructed by :func:`adcp.decisioning.dispatch._build_handoff_context`; + never instantiated by adopter code. + """ + + id: str + _registry: TaskRegistry + _heartbeat_impl: Callable[[], Awaitable[None]] = field(default_factory=lambda: _noop_heartbeat) + + async def update(self, progress: dict[str, Any]) -> None: + """Write a progress payload. Transitions ``submitted`` → + ``working`` on first call. + + Errors are swallowed: a transient registry write failure must + not abort the handoff. Buyer-facing impact is a missed + progress event, not a failed task. + """ + try: + await self._registry.update_progress(self.id, progress) + except Exception: + # Swallow — preserve the handoff fn's progress in the + # face of registry transients. Adopters who need + # delivery guarantees plug a durable registry. + return + + async def heartbeat(self) -> None: + """Liveness signal for operator infrastructure. v6.1 stub. + + v6.0 ships as a no-op so adopter code calling + ``await task_ctx.heartbeat()`` future-proofs against the + eventual implementation. Operator-side TTL-reset wiring lands + with the durable registry impl. + """ + await self._heartbeat_impl() + + +async def _noop_heartbeat() -> None: + """Default no-op heartbeat — adequate for v6.0.""" + await asyncio.sleep(0) + + +__all__ = [ + "InMemoryTaskRegistry", + "TaskHandoffContext", + "TaskRecord", + "TaskRegistry", + "TaskState", +] diff --git a/tests/test_decisioning_task_registry.py b/tests/test_decisioning_task_registry.py new file mode 100644 index 000000000..7e5917207 --- /dev/null +++ b/tests/test_decisioning_task_registry.py @@ -0,0 +1,310 @@ +"""Unit tests for adcp.decisioning.task_registry. + +Covers: + +* :class:`TaskRegistry` Protocol structural matching +* :class:`InMemoryTaskRegistry` lifecycle: + - issue() returns unique task_id; row stored in 'submitted' + - update_progress transitions submitted → working on first call + - update_progress is no-op state-transition on subsequent calls + - update_progress on unknown task_id silently no-ops (per Protocol + contract — registry transients must not abort handoff) + - complete() transitions to 'completed' with result; idempotent on + equal result; raises on different result + - fail() transitions to 'failed' with error; idempotent on equal + error; raises on different error + - get() returns the dict; cross-tenant probe returns None + - concurrent issue() yields unique task_ids +* :class:`TaskHandoffContext` ergonomics: + - update() routes to registry.update_progress; swallows transient errors + - heartbeat() is a v6.0 no-op + +The hostile-probe regression is in +``test_decisioning_task_registry_cross_tenant.py`` (separate file per +the dispatch design's file plan — covers the security boundary +explicitly). +""" + +from __future__ import annotations + +import asyncio +from typing import Any +from unittest.mock import AsyncMock + +import pytest + +from adcp.decisioning.task_registry import ( + InMemoryTaskRegistry, + TaskHandoffContext, + TaskRegistry, + _noop_heartbeat, +) + +# ---- Protocol structural matching ---- + + +def test_in_memory_task_registry_satisfies_protocol() -> None: + """``InMemoryTaskRegistry`` matches the ``TaskRegistry`` Protocol + structurally — adopters writing custom registries don't need to + inherit, just implement the methods.""" + assert isinstance(InMemoryTaskRegistry(), TaskRegistry) + + +def test_custom_registry_satisfies_protocol_via_duck_typing() -> None: + """Adopter-written class with the right methods matches without + inheritance.""" + + class _Stub: + async def issue(self, *, account_id: str, task_type: str) -> str: + return "task_x" + + async def update_progress(self, task_id: str, progress: dict[str, Any]) -> None: + pass + + async def complete(self, task_id: str, result: dict[str, Any]) -> None: + pass + + async def fail(self, task_id: str, error: dict[str, Any]) -> None: + pass + + async def get( + self, + task_id: str, + *, + expected_account_id: str | None = None, + ) -> dict[str, Any] | None: + return None + + assert isinstance(_Stub(), TaskRegistry) + + +# ---- InMemoryTaskRegistry — issue + initial state ---- + + +@pytest.mark.asyncio +async def test_issue_returns_unique_task_id() -> None: + """Each ``issue()`` allocates a fresh UUID-based id and persists + a 'submitted' row.""" + reg = InMemoryTaskRegistry() + a = await reg.issue(account_id="acct_a", task_type="create_media_buy") + b = await reg.issue(account_id="acct_a", task_type="create_media_buy") + assert a != b + assert a.startswith("task_") + assert b.startswith("task_") + + +@pytest.mark.asyncio +async def test_issue_initial_state_is_submitted() -> None: + reg = InMemoryTaskRegistry() + tid = await reg.issue(account_id="acct_a", task_type="create_media_buy") + rec = await reg.get(tid, expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "submitted" + assert rec["task_type"] == "create_media_buy" + assert rec["account_id"] == "acct_a" + assert rec["progress"] is None + assert rec["result"] is None + assert rec["error"] is None + + +@pytest.mark.asyncio +async def test_concurrent_issue_yields_unique_ids() -> None: + """Concurrent calls under the asyncio.Lock all get distinct ids; + no collision regression.""" + reg = InMemoryTaskRegistry() + ids = await asyncio.gather( + *[reg.issue(account_id="acct_a", task_type="create_media_buy") for _ in range(20)] + ) + assert len(set(ids)) == 20 + + +# ---- update_progress lifecycle ---- + + +@pytest.mark.asyncio +async def test_update_progress_transitions_to_working_on_first_call() -> None: + reg = InMemoryTaskRegistry() + tid = await reg.issue(account_id="acct_a", task_type="create_media_buy") + await reg.update_progress(tid, {"step": 1, "message": "validating"}) + rec = await reg.get(tid, expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "working" + assert rec["progress"] == {"step": 1, "message": "validating"} + + +@pytest.mark.asyncio +async def test_update_progress_subsequent_calls_dont_change_state() -> None: + reg = InMemoryTaskRegistry() + tid = await reg.issue(account_id="acct_a", task_type="create_media_buy") + await reg.update_progress(tid, {"step": 1}) + await reg.update_progress(tid, {"step": 2}) + rec = await reg.get(tid, expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "working" + assert rec["progress"] == {"step": 2} + + +@pytest.mark.asyncio +async def test_update_progress_unknown_task_is_silent_noop() -> None: + """Per Protocol contract: registry transients must not abort the + handoff. Unknown task_id → silent return.""" + reg = InMemoryTaskRegistry() + # Should NOT raise. + await reg.update_progress("nonexistent", {"step": 1}) + + +# ---- complete ---- + + +@pytest.mark.asyncio +async def test_complete_transitions_to_completed() -> None: + reg = InMemoryTaskRegistry() + tid = await reg.issue(account_id="acct_a", task_type="create_media_buy") + await reg.complete(tid, {"media_buy_id": "mb_1"}) + rec = await reg.get(tid, expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "completed" + assert rec["result"] == {"media_buy_id": "mb_1"} + + +@pytest.mark.asyncio +async def test_complete_is_idempotent_on_equal_result() -> None: + """Repeated complete() with the same result is a no-op — safe for + retries on transient post-completion failures.""" + reg = InMemoryTaskRegistry() + tid = await reg.issue(account_id="acct_a", task_type="create_media_buy") + await reg.complete(tid, {"media_buy_id": "mb_1"}) + await reg.complete(tid, {"media_buy_id": "mb_1"}) # idempotent + rec = await reg.get(tid, expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "completed" + + +@pytest.mark.asyncio +async def test_complete_with_different_result_raises() -> None: + """Re-completion with a different result is a programmer error, + not silent overwrite.""" + reg = InMemoryTaskRegistry() + tid = await reg.issue(account_id="acct_a", task_type="create_media_buy") + await reg.complete(tid, {"media_buy_id": "mb_1"}) + with pytest.raises(ValueError, match="already completed"): + await reg.complete(tid, {"media_buy_id": "mb_2"}) + + +@pytest.mark.asyncio +async def test_complete_unknown_task_raises() -> None: + reg = InMemoryTaskRegistry() + with pytest.raises(ValueError, match="not found"): + await reg.complete("nonexistent", {"x": 1}) + + +# ---- fail ---- + + +@pytest.mark.asyncio +async def test_fail_transitions_to_failed() -> None: + reg = InMemoryTaskRegistry() + tid = await reg.issue(account_id="acct_a", task_type="create_media_buy") + err = { + "code": "BUDGET_TOO_LOW", + "message": "Below floor", + "recovery": "correctable", + } + await reg.fail(tid, err) + rec = await reg.get(tid, expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "failed" + assert rec["error"] == err + + +@pytest.mark.asyncio +async def test_fail_is_idempotent_on_equal_error() -> None: + reg = InMemoryTaskRegistry() + tid = await reg.issue(account_id="acct_a", task_type="create_media_buy") + err = {"code": "BUDGET_TOO_LOW", "message": "Below floor"} + await reg.fail(tid, err) + await reg.fail(tid, err) # idempotent + rec = await reg.get(tid, expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "failed" + + +@pytest.mark.asyncio +async def test_fail_with_different_error_raises() -> None: + reg = InMemoryTaskRegistry() + tid = await reg.issue(account_id="acct_a", task_type="create_media_buy") + await reg.fail(tid, {"code": "BUDGET_TOO_LOW"}) + with pytest.raises(ValueError, match="already failed"): + await reg.fail(tid, {"code": "POLICY_VIOLATION"}) + + +@pytest.mark.asyncio +async def test_fail_unknown_task_raises() -> None: + reg = InMemoryTaskRegistry() + with pytest.raises(ValueError, match="not found"): + await reg.fail("nonexistent", {"code": "BUDGET_TOO_LOW"}) + + +# ---- get ---- + + +@pytest.mark.asyncio +async def test_get_returns_none_for_unknown_id() -> None: + reg = InMemoryTaskRegistry() + assert await reg.get("nonexistent") is None + assert await reg.get("nonexistent", expected_account_id="acct_a") is None + + +@pytest.mark.asyncio +async def test_get_without_expected_account_returns_record() -> None: + """Unscoped get (e.g., admin tooling) returns the record without + cross-tenant filtering.""" + reg = InMemoryTaskRegistry() + tid = await reg.issue(account_id="acct_a", task_type="create_media_buy") + rec = await reg.get(tid) # no expected_account_id + assert rec is not None + assert rec["account_id"] == "acct_a" + + +# ---- TaskHandoffContext ---- + + +@pytest.mark.asyncio +async def test_handoff_context_update_routes_to_registry() -> None: + reg = InMemoryTaskRegistry() + tid = await reg.issue(account_id="acct_a", task_type="create_media_buy") + handoff_ctx = TaskHandoffContext(id=tid, _registry=reg) + await handoff_ctx.update({"step": 1}) + rec = await reg.get(tid, expected_account_id="acct_a") + assert rec is not None + assert rec["progress"] == {"step": 1} + + +@pytest.mark.asyncio +async def test_handoff_context_update_swallows_registry_errors() -> None: + """A transient registry write failure must not abort the handoff + fn. ``update`` swallows; the buyer-facing impact is a missed + progress event, not a failed task.""" + failing_registry = AsyncMock(spec=TaskRegistry) + failing_registry.update_progress.side_effect = RuntimeError("DB down") + handoff_ctx = TaskHandoffContext(id="task_x", _registry=failing_registry) + # Must NOT raise. + await handoff_ctx.update({"step": 1}) + failing_registry.update_progress.assert_called_once_with("task_x", {"step": 1}) + + +@pytest.mark.asyncio +async def test_handoff_context_heartbeat_is_noop() -> None: + """v6.0 ships heartbeat as a no-op — adopters can call it for + future-proofing without effect today.""" + reg = InMemoryTaskRegistry() + handoff_ctx = TaskHandoffContext(id="task_x", _registry=reg) + # Just verify it returns without error. + await handoff_ctx.heartbeat() + + +@pytest.mark.asyncio +async def test_noop_heartbeat_is_awaitable() -> None: + """Module-level _noop_heartbeat is an awaitable; importable for + custom test harnesses.""" + await _noop_heartbeat() diff --git a/tests/test_decisioning_task_registry_cross_tenant.py b/tests/test_decisioning_task_registry_cross_tenant.py new file mode 100644 index 000000000..6089beaf5 --- /dev/null +++ b/tests/test_decisioning_task_registry_cross_tenant.py @@ -0,0 +1,141 @@ +"""Cross-tenant security boundary regression suite for +:class:`adcp.decisioning.task_registry.InMemoryTaskRegistry`. + +The wire ``tasks/get`` path passes the authenticated principal's +account_id as ``expected_account_id``. The registry MUST return None +on mismatch — returning the raw record enables principal-enumeration +via task_id probing (an attacker with one valid task_id can confirm +its existence regardless of which account they're authenticated as). + +This is a separate file (vs. ``test_decisioning_task_registry.py``) +because the security boundary deserves explicit, prominently-named +tests. If a future implementer regresses the cross-tenant check, the +test name on the failure should be unambiguous about what broke. + +Round-3 dispatch design D7: "cross-tenant ``get`` returns None." +Emma TS-side review #11 (Round 4): same regression caught on the JS +port; mirror the test surface here. +""" + +from __future__ import annotations + +import pytest + +from adcp.decisioning.task_registry import InMemoryTaskRegistry + + +@pytest.mark.asyncio +async def test_cross_tenant_get_on_submitted_task_returns_none() -> None: + """Account A creates a task; account B probes it. B gets None, + NOT A's task data.""" + reg = InMemoryTaskRegistry() + tid_a = await reg.issue(account_id="acct_a", task_type="create_media_buy") + cross = await reg.get(tid_a, expected_account_id="acct_b") + assert cross is None, ( + "Cross-tenant probe must return None; returning A's record to B " + "leaks task existence and enables principal-enumeration" + ) + + +@pytest.mark.asyncio +async def test_cross_tenant_get_on_working_task_returns_none() -> None: + """Same regression after the task has been touched by + update_progress (state=working).""" + reg = InMemoryTaskRegistry() + tid_a = await reg.issue(account_id="acct_a", task_type="create_media_buy") + await reg.update_progress(tid_a, {"step": 1}) + cross = await reg.get(tid_a, expected_account_id="acct_b") + assert cross is None + + +@pytest.mark.asyncio +async def test_cross_tenant_get_on_completed_task_returns_none() -> None: + """After the task is completed, the cross-tenant check still + holds — the result payload is just as sensitive (probably more + so) than the existence signal.""" + reg = InMemoryTaskRegistry() + tid_a = await reg.issue(account_id="acct_a", task_type="create_media_buy") + await reg.complete(tid_a, {"media_buy_id": "mb_1"}) + cross = await reg.get(tid_a, expected_account_id="acct_b") + assert cross is None, ( + "Completed-task cross-tenant probe must return None; the result " + "payload is the very thing the attacker wants to read" + ) + + +@pytest.mark.asyncio +async def test_cross_tenant_get_on_failed_task_returns_none() -> None: + """Failure-state probe is also blocked — the error payload may + reveal seller-side validation rules or business logic that + shouldn't leak across tenants.""" + reg = InMemoryTaskRegistry() + tid_a = await reg.issue(account_id="acct_a", task_type="create_media_buy") + await reg.fail( + tid_a, + { + "code": "POLICY_VIOLATION", + "message": "buyer fails fraud heuristic 3.2", + "recovery": "terminal", + }, + ) + cross = await reg.get(tid_a, expected_account_id="acct_b") + assert cross is None + + +@pytest.mark.asyncio +async def test_owner_can_read_their_own_task_after_state_transitions() -> None: + """Sanity: the cross-tenant block doesn't break the legitimate + same-tenant read path. Account A reads its own task at every + state.""" + reg = InMemoryTaskRegistry() + tid_a = await reg.issue(account_id="acct_a", task_type="create_media_buy") + + # submitted + rec = await reg.get(tid_a, expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "submitted" + + # working + await reg.update_progress(tid_a, {"step": 1}) + rec = await reg.get(tid_a, expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "working" + + # completed + await reg.complete(tid_a, {"media_buy_id": "mb_1"}) + rec = await reg.get(tid_a, expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "completed" + + +@pytest.mark.asyncio +async def test_cross_tenant_probe_with_unknown_id_also_returns_none() -> None: + """Hostile-probe variant: attacker guesses a task_id that doesn't + exist at all. Must return None just like the existence-check + case — distinguishing 'no such task' from 'task exists but wrong + tenant' would itself be a side-channel.""" + reg = InMemoryTaskRegistry() + cross = await reg.get("task_definitely_not_real", expected_account_id="acct_b") + assert cross is None + + +@pytest.mark.asyncio +async def test_cross_tenant_probe_does_not_match_on_substring() -> None: + """Edge case: account A is "acct" and account B is "acct_b". A + naive prefix or substring check would let A see B's tasks. The + registry must use exact equality.""" + reg = InMemoryTaskRegistry() + tid_b = await reg.issue(account_id="acct_b", task_type="create_media_buy") + # "acct" is a prefix of "acct_b" — exact-match check rejects. + cross = await reg.get(tid_b, expected_account_id="acct") + assert cross is None + + +@pytest.mark.asyncio +async def test_cross_tenant_probe_with_empty_string_account_returns_none() -> None: + """Empty-string account_id is not a valid principal; must be + treated as a mismatch rather than as "no scoping".""" + reg = InMemoryTaskRegistry() + tid_a = await reg.issue(account_id="acct_a", task_type="create_media_buy") + cross = await reg.get(tid_a, expected_account_id="") + assert cross is None From df35e5bedb9dbf49c097865f0e5bea7edc9050dd Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 10:46:18 -0400 Subject: [PATCH 10/18] =?UTF-8?q?feat(decisioning):=20dispatch=20layer=20?= =?UTF-8?q?=E2=80=94=20validate=5Fplatform=20+=20invoke=20+=20handoff?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stage 3 second piece. Builds on task_registry.py (commit e961adcd) to ship the dispatch seam that ties RequestContext hydration, account resolution, executor lifecycle, AdcpError projection, and TaskHandoff lifecycle together. - adcp/decisioning/dispatch.py: * REQUIRED_METHODS_PER_SPECIALISM (sales-* 9 specialisms; pinned by contract test). * validate_platform(platform) — server-boot fail-fast with governance opt-in security gate (D15 round-4) and forward-compatible unknown-specialism UserWarning (D14 round-3). Validator throws caught + projected to INVALID_REQUEST so boot never crashes (Emma #16). * compose_caller_identity(account, store) — composite key per round-3 D9 (structural cross-store isolation). * _build_request_context — hydration helper mirroring TS to-context.ts:buildRequestContext. Stub state/resolve when not supplied; v6.1 backing impls plug in via kwargs. * _invoke_platform_method — async-vs-sync detection (asyncio, not inspect — partial-unwrap drift), sync runs on executor with explicit contextvars snapshot (D6). TaskHandoff returns flow through _project_handoff. Non-AdcpError exceptions wrap to INTERNAL_ERROR with __cause__ preserved. * _project_handoff — registry.issue → Submitted envelope → background fn (asyncio.create_task or run_in_executor) → registry.complete/fail. - tests/test_decisioning_dispatch.py: 27 tests covering every surface (validate_platform happy + 7 failure paths; compose_caller_identity composite + isolation; _build_request_context hydration variants; _invoke_platform_method async/sync/contextvars/ errors/arg-projector; _project_handoff envelope/lifecycle). Foundation tests: 97 (+27). Full suite: 2477 passed, 17 skipped, 1 xfailed. ruff + mypy clean. Stage 3 next: codegen handler.py + serve.py wrapper. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/adcp/decisioning/dispatch.py | 623 +++++++++++++++++++++++++ tests/test_decisioning_dispatch.py | 717 +++++++++++++++++++++++++++++ 2 files changed, 1340 insertions(+) create mode 100644 src/adcp/decisioning/dispatch.py create mode 100644 tests/test_decisioning_dispatch.py diff --git a/src/adcp/decisioning/dispatch.py b/src/adcp/decisioning/dispatch.py new file mode 100644 index 000000000..8d05d9629 --- /dev/null +++ b/src/adcp/decisioning/dispatch.py @@ -0,0 +1,623 @@ +"""Dispatch layer for the v6.0 DecisioningPlatform framework. + +The dispatch layer ties everything together at the seam between the +existing ``adcp.server`` transport machinery and the new +``DecisioningPlatform`` Protocol-driven adopter shape: + +* :func:`validate_platform` — server-boot fail-fast: confirms every + claimed specialism has its required methods, governance opt-in is + honored, and ``accounts`` is a real ``AccountStore``. +* :func:`compose_caller_identity` — composite cache scope key + ``f"{store_qualname}:{account.id}"`` (round-3 D9 — structural + cross-store isolation). +* :func:`_build_request_context` — the hydration helper that turns a + ``ToolContext`` + resolved ``Account`` into a typed + ``RequestContext`` per D2 / D9 / D15. +* :func:`_invoke_platform_method` — the method-call seam. Detects + async-vs-sync, runs sync on a thread-pool executor with + ``contextvars`` snapshot, projects ``TaskHandoff`` returns, wraps + non-``AdcpError`` exceptions to ``INTERNAL_ERROR`` (wire never + leaks a stack trace). +* :func:`_project_handoff` — TaskHandoff lifecycle: allocates + ``task_id``, projects the wire ``Submitted`` envelope, kicks off + the adopter's handoff fn in the background, persists terminal + artifact via the task registry. + +Codegen-emitted ``handler.py`` (Stage 3 next file) calls +``_invoke_platform_method`` from each typed shim; ``serve.py`` +(Stage 3 last) wires the executor + registry + middleware. + +This module is framework-internal — adopters import nothing from +here. The Protocol contracts adopters write against live in +:mod:`adcp.decisioning.specialisms.*`. +""" + +from __future__ import annotations + +import asyncio +import contextvars +import functools +import logging +import warnings +from concurrent.futures import ThreadPoolExecutor +from typing import TYPE_CHECKING, Any + +from adcp.decisioning.platform import ( + GOVERNANCE_SPECIALISMS, + DecisioningCapabilities, + DecisioningPlatform, +) +from adcp.decisioning.state import _NotYetWiredStateReader +from adcp.decisioning.task_registry import ( + TaskHandoffContext, + TaskRegistry, +) +from adcp.decisioning.types import AdcpError, TaskHandoff, is_task_handoff + +if TYPE_CHECKING: + from pydantic import BaseModel + + from adcp.decisioning.accounts import AccountStore + from adcp.decisioning.context import AuthInfo, RequestContext + from adcp.decisioning.types import Account + from adcp.server.base import ToolContext + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# REQUIRED_METHODS_PER_SPECIALISM — what each specialism must implement +# --------------------------------------------------------------------------- + +#: Required platform methods per specialism. ``validate_platform`` walks +#: ``capabilities.specialisms`` against this map at server boot and +#: fail-fasts when a claimed specialism is missing methods. +#: +#: Keyed by specialism slug (matches the AdCP wire enum in +#: ``schemas/cache/enums/specialism.json``). v6.0 ships ``sales-*``; +#: v6.1 adds the rest as new specialism Protocols land. +#: +#: Drift policy: when a specialism Protocol gains a required method, +#: bump this map AND add a v6.x migration note. ``validate_platform`` +#: tolerates *unknown* specialisms (forward-compat with v6.x+ specs) +#: but only via UserWarning — see D14 round-3. +REQUIRED_METHODS_PER_SPECIALISM: dict[str, frozenset[str]] = { + # All nine sales-* specialisms share the unified hybrid SalesPlatform + # surface. Per the SalesPlatform docstring, every sales-* claim + # requires the five core methods. The four optional methods + # (get_media_buys, provide_performance_feedback, + # list_creative_formats, list_creatives) are present-or-absent — + # not enforced here. The v6.0 rc.1 spec mandates them; v6.0 alpha + # tolerates absence so adopters can ship in stages. + "sales-non-guaranteed": frozenset( + { + "get_products", + "create_media_buy", + "update_media_buy", + "sync_creatives", + "get_media_buy_delivery", + } + ), + "sales-guaranteed": frozenset( + { + "get_products", + "create_media_buy", + "update_media_buy", + "sync_creatives", + "get_media_buy_delivery", + } + ), + "sales-broadcast-tv": frozenset( + { + "get_products", + "create_media_buy", + "update_media_buy", + "sync_creatives", + "get_media_buy_delivery", + } + ), + "sales-streaming-tv": frozenset( + { + "get_products", + "create_media_buy", + "update_media_buy", + "sync_creatives", + "get_media_buy_delivery", + } + ), + "sales-social": frozenset( + { + "get_products", + "create_media_buy", + "update_media_buy", + "sync_creatives", + "get_media_buy_delivery", + } + ), + "sales-exchange": frozenset( + { + "get_products", + "create_media_buy", + "update_media_buy", + "sync_creatives", + "get_media_buy_delivery", + } + ), + "sales-proposal-mode": frozenset( + { + "get_products", + "create_media_buy", + "update_media_buy", + "sync_creatives", + "get_media_buy_delivery", + } + ), + # Catalog-driven and retail-media require the sales core PLUS + # sync_catalogs (to push the inventory taxonomy). v6.1 adds + # log_event + sync_event_sources for retail-media; for v6.0 alpha + # we leave those off the required list so adopters can ship sales + # core first. + "sales-catalog-driven": frozenset( + { + "get_products", + "create_media_buy", + "update_media_buy", + "sync_creatives", + "get_media_buy_delivery", + "sync_catalogs", + } + ), + "sales-retail-media": frozenset( + { + "get_products", + "create_media_buy", + "update_media_buy", + "sync_creatives", + "get_media_buy_delivery", + "sync_catalogs", + } + ), +} + + +# --------------------------------------------------------------------------- +# validate_platform — server-boot fail-fast +# --------------------------------------------------------------------------- + + +def validate_platform(platform: DecisioningPlatform) -> None: + """Server-boot validator — fail-fast before the first request. + + Checks (in order): + + 1. ``platform.capabilities`` is a populated + :class:`DecisioningCapabilities` (not the base default). + 2. ``platform.accounts`` is a real :class:`AccountStore` + (anything truthy with a ``resolve`` method) — None catches + subclasses that forgot to attach a store. + 3. Each claimed specialism's required methods are implemented + on the platform subclass. Unknown specialisms emit + ``UserWarning`` (forward-compat with v6.x+ specs); known + specialisms missing methods raise ``AdcpError("INVALID_REQUEST")``. + 4. **Governance opt-in fail-fast (D15 round-4):** if any claimed + specialism is in :data:`GOVERNANCE_SPECIALISMS` AND + ``capabilities.governance_aware`` is False AND the platform + hasn't wired a custom :class:`StateReader` (i.e., the dispatch + hydration helper would supply ``_NotYetWiredStateReader``), + raise. Silent governance-gate skipping is a security + regression the framework refuses to ship. + + Catches per-validator exceptions and re-projects to + ``AdcpError("INVALID_REQUEST")`` so server boot never crashes + with a raw stack trace — the operator sees one structured + diagnostic per problem (Round-4 Emma #16). + + :raises AdcpError: on any blocking validation failure. The error + ``details`` carry per-issue diagnostics for operator triage. + """ + if not isinstance(platform.capabilities, DecisioningCapabilities): + raise AdcpError( + "INVALID_REQUEST", + message=( + "DecisioningPlatform.capabilities must be a " + "DecisioningCapabilities instance — found " + f"{type(platform.capabilities).__name__!r}. Subclasses MUST " + "set ``capabilities = DecisioningCapabilities(...)`` on the " + "class body." + ), + recovery="terminal", + ) + + accounts = getattr(platform, "accounts", None) + if accounts is None: + raise AdcpError( + "INVALID_REQUEST", + message=( + "DecisioningPlatform.accounts is None — subclasses MUST set " + "an AccountStore (SingletonAccounts, ExplicitAccounts, " + "FromAuthAccounts, or a custom AccountStore impl) on the " + "class body." + ), + recovery="terminal", + ) + + # Specialism-method coverage. + missing: list[tuple[str, str]] = [] + unknown: list[str] = [] + governance_specialisms_claimed: list[str] = [] + for specialism in platform.capabilities.specialisms: + if specialism in GOVERNANCE_SPECIALISMS: + governance_specialisms_claimed.append(specialism) + try: + required = REQUIRED_METHODS_PER_SPECIALISM.get(specialism) + except Exception as exc: + # Defensive: a custom REQUIRED_METHODS_PER_SPECIALISM impl + # (test-monkeypatch, etc.) that raises must not crash boot. + # Round-4 Emma #16 — wrap validator throws. + logger.warning( + "REQUIRED_METHODS_PER_SPECIALISM lookup raised for %r: %r", + specialism, + exc, + ) + required = None + if required is None: + unknown.append(specialism) + continue + for method_name in required: + if not _has_overridden_method(platform, method_name): + missing.append((specialism, method_name)) + + if unknown: + warnings.warn( + ( + f"DecisioningPlatform claims unknown specialism(s) " + f"{sorted(unknown)!r}. Either typos (compare against the AdCP " + f"specialism enum: {sorted(REQUIRED_METHODS_PER_SPECIALISM.keys())}), " + "or your framework version predates the spec. Required-method " + "validation is skipped for these specialisms; tools/list will " + "advertise the spec set this framework version knows." + ), + UserWarning, + stacklevel=2, + ) + + if missing: + raise AdcpError( + "INVALID_REQUEST", + message=( + "DecisioningPlatform claims specialisms but is missing " + f"required methods: {missing}. Implement each on your " + "subclass or remove the specialism from " + "capabilities.specialisms." + ), + recovery="terminal", + details={"missing": [{"specialism": s, "method": m} for s, m in missing]}, + ) + + # Governance opt-in fail-fast (D15 round-4). + if governance_specialisms_claimed and not platform.capabilities.governance_aware: + raise AdcpError( + "INVALID_REQUEST", + message=( + f"Platform claims governance-* specialism(s) " + f"{governance_specialisms_claimed!r} but " + "capabilities.governance_aware is False. Set " + "governance_aware=True AND wire a custom StateReader that " + "returns real GovernanceContextJWS values, OR drop the " + "governance-* specialism claim. Silent governance-gate " + "skipping is a security boundary; the framework refuses " + "to ship that. See " + "docs/proposals/decisioning-platform-dispatch-design.md#d15" + ), + recovery="terminal", + details={ + "governance_specialisms": sorted(governance_specialisms_claimed), + "governance_aware": False, + }, + ) + + +def _has_overridden_method(platform: DecisioningPlatform, method_name: str) -> bool: + """True when the platform subclass provides ``method_name``. + + The base :class:`DecisioningPlatform` class itself doesn't define + specialism methods (D11 — base is intentionally minimal). So + ``hasattr(platform, method_name)`` is sufficient: if the attribute + exists, the subclass put it there. + """ + return hasattr(platform, method_name) and callable(getattr(platform, method_name)) + + +# --------------------------------------------------------------------------- +# compose_caller_identity — D9 round-3 composite cache scope key +# --------------------------------------------------------------------------- + + +def compose_caller_identity( + account: Account[Any], + store: AccountStore[Any], +) -> str: + """Compose the cache scope key from store qualname + account id. + + Round-3 D9: the framework's idempotency middleware reads + ``ctx.caller_identity`` for cache scoping. Using ``account.id`` + alone leaks across stores when two adopters use different + ``AccountStore`` impls but happen to mint colliding ids. The + composite ``f"{store qualname}:{account.id}"`` gives structural + cross-store isolation at zero coordination cost. + + Within-store collisions (one impl, identical ``account.id`` for + two distinct accounts) remain an adopter bug at + ``AccountStore.resolve``; the framework can't structurally prevent + that without a runtime registry costing more than it buys. + """ + return f"{type(store).__qualname__}:{account.id}" + + +# --------------------------------------------------------------------------- +# _build_request_context — the hydration helper +# --------------------------------------------------------------------------- + + +def _build_request_context( + tool_ctx: ToolContext, + account: Account[Any], + auth_info: AuthInfo | None, + *, + state_reader: Any | None = None, + resource_resolver: Any | None = None, +) -> RequestContext[Any]: + """Hydrate a :class:`RequestContext` per the D2 + D9 + D15 contract. + + Mirrors the TS-side ``to-context.ts:buildRequestContext``. The + framework supplies the context per request; adopters never + construct one (the class docstring on + :class:`adcp.decisioning.RequestContext` carries the + ``@internal-construction`` note). + + :param tool_ctx: The framework's :class:`ToolContext` from the + underlying transport. Carries ``request_id``, ``tenant_id``, + and ``metadata``; we extend its caller_identity to the + composite key. + :param account: Resolved account from the platform's + :class:`AccountStore.resolve`. + :param auth_info: Optional verified principal info — when present, + ``auth_principal`` is populated from ``auth_info.principal``. + :param state_reader: Custom ``StateReader`` impl. Defaults to the + v6.0 stub. Accept as a parameter so ``serve()`` can wire a + v6.1 backing store without touching dispatch. + :param resource_resolver: Custom ``ResourceResolver`` impl. Same + plumbing rationale as ``state_reader``. + """ + # Local import to avoid a circular at module-load time. dispatch.py + # is imported by serve.py; context.py and accounts.py both reach + # back into adcp.decisioning, so the cycle is real if we hoist. + from adcp.decisioning.context import RequestContext + from adcp.decisioning.resolve import _NotYetWiredResolver + + auth_principal = auth_info.principal if auth_info is not None else None + + # Build the RequestContext with the explicit state/resolve kwargs + # if provided; otherwise let the dataclass default factories + # supply the v6.0 stubs. + ctx_kwargs: dict[str, Any] = { + "request_id": tool_ctx.request_id, + "caller_identity": tool_ctx.caller_identity, + "tenant_id": tool_ctx.tenant_id, + "metadata": dict(tool_ctx.metadata), + "account": account, + "auth_info": auth_info, + "auth_principal": auth_principal, + } + if state_reader is not None: + ctx_kwargs["state"] = state_reader + else: + ctx_kwargs["state"] = _NotYetWiredStateReader() + if resource_resolver is not None: + ctx_kwargs["resolve"] = resource_resolver + else: + ctx_kwargs["resolve"] = _NotYetWiredResolver() + + return RequestContext(**ctx_kwargs) + + +# --------------------------------------------------------------------------- +# _invoke_platform_method + _project_handoff — the call seam +# --------------------------------------------------------------------------- + + +async def _invoke_platform_method( + platform: DecisioningPlatform, + method_name: str, + params: BaseModel, + ctx: RequestContext[Any], + *, + executor: ThreadPoolExecutor, + registry: TaskRegistry, + arg_projector: dict[str, Any] | None = None, +) -> Any: + """Invoke a platform method, projecting hybrid returns. + + Detects async-vs-sync via ``asyncio.iscoroutinefunction`` (NOT + ``inspect.iscoroutinefunction`` — the latter doesn't unwrap + ``functools.partial`` until 3.12). Sync methods run on the + explicit thread-pool executor with an explicit + ``contextvars.copy_context()`` snapshot so middleware-set + ContextVars survive the cross-thread hop (D5 + D6). + + ``TaskHandoff`` returns flow through :func:`_project_handoff` to + allocate a task_id, kick off the handoff fn, and project the + Submitted envelope. + + Wraps any non-:class:`AdcpError` exception to + ``AdcpError("INTERNAL_ERROR", recovery="terminal")`` so the wire + response never leaks a stack trace. Adopters get the original + exception logged via the framework's observability hooks (the + raise re-raises the wrapped error; the original is the + ``__cause__``). + + :param arg_projector: Optional kwargs dict for tools whose Python + method signature differs from the wire shape (D1 + arg-projection, e.g. ``update_media_buy(media_buy_id, patch, + ctx)``). Codegen-emitted shims pass this for those tools; + most tools call with ``None``. + """ + method = getattr(platform, method_name) + + try: + if asyncio.iscoroutinefunction(method): + if arg_projector is not None: + result = await method(**arg_projector, ctx=ctx) + else: + result = await method(params, ctx) + else: + ctx_snapshot = contextvars.copy_context() + loop = asyncio.get_running_loop() + if arg_projector is not None: + projected_kwargs = {**arg_projector, "ctx": ctx} + result = await loop.run_in_executor( + executor, + functools.partial(ctx_snapshot.run, method, **projected_kwargs), + ) + else: + result = await loop.run_in_executor( + executor, + functools.partial(ctx_snapshot.run, method, params, ctx), + ) + except AdcpError: + # Adopter raised structured error — propagate verbatim. The + # outer middleware projects to the wire envelope. + raise + except Exception as exc: + # Wrap unexpected exceptions so the wire never sees a stack + # trace. Adopter logs the original via observability hooks; + # __cause__ is preserved for server-side debugging. + logger.exception( + "Unhandled exception in platform.%s — wrapping to INTERNAL_ERROR", + method_name, + ) + raise AdcpError( + "INTERNAL_ERROR", + message="An internal error occurred", + recovery="terminal", + ) from exc + + if is_task_handoff(result): + return await _project_handoff( + result, + ctx, + method_name=method_name, + registry=registry, + executor=executor, + ) + return result + + +async def _project_handoff( + handoff: TaskHandoff[Any], + ctx: RequestContext[Any], + *, + method_name: str, + registry: TaskRegistry, + executor: ThreadPoolExecutor, +) -> dict[str, Any]: + """Promote a TaskHandoff to a background task. + + Lifecycle: + + 1. Allocate ``task_id`` via ``registry.issue(account_id=..., + task_type=method_name)``. The registry persists the row in + ``submitted`` state. + 2. Kick off the handoff fn in the background via + :func:`asyncio.create_task` (async fn) or + :func:`loop.run_in_executor` (sync fn) with an explicit + ``contextvars.copy_context()`` snapshot. ``create_task`` + inherits the snapshot for free; ``run_in_executor`` doesn't, + hence the explicit copy. + 3. The background task awaits the handoff fn's return; on success + calls ``registry.complete(task_id, result.model_dump() if + Pydantic else result)``; on :class:`AdcpError` calls + ``registry.fail(task_id, error.to_wire())``; on any other + exception, wraps to ``INTERNAL_ERROR`` and calls + ``registry.fail``. + 4. Returns the wire ``Submitted`` envelope dict to the synchronous + caller (the platform method's typed shim), which projects it + to the buyer. + + :param method_name: Wire-spec verb name (``'create_media_buy'``, + etc.) — used as ``task_type`` on the registry row so + ``tasks/get`` round-trips correctly. + + The handoff fn is extracted via the type-identity dispatch in + :func:`adcp.decisioning.types.is_task_handoff`. Subclassed + TaskHandoff instances (deliberate non-feature) silently take the + sync-return path before reaching this function. + """ + fn = handoff._fn + + task_id = await registry.issue( + account_id=ctx.account.id, + task_type=method_name, + ) + + # Hand off to background. The wire envelope returns immediately; + # the fn runs to completion in the background and persists the + # terminal artifact via the registry. + handoff_ctx = TaskHandoffContext(id=task_id, _registry=registry) + + async def _run() -> None: + try: + if asyncio.iscoroutinefunction(fn): + result = await fn(handoff_ctx) + else: + ctx_snapshot = contextvars.copy_context() + loop = asyncio.get_running_loop() + result = await loop.run_in_executor( + executor, + functools.partial(ctx_snapshot.run, fn, handoff_ctx), + ) + except AdcpError as exc: + await registry.fail(task_id, exc.to_wire()) + return + except Exception: + logger.exception( + "Unhandled exception in handoff fn for task %s — wrapping", + task_id, + ) + wrapped = AdcpError( + "INTERNAL_ERROR", + message="An internal error occurred during background task", + recovery="terminal", + ) + await registry.fail(task_id, wrapped.to_wire()) + return + + # Persist terminal artifact. Pydantic responses get + # ``model_dump()``; dict responses pass through. + if hasattr(result, "model_dump"): + await registry.complete(task_id, result.model_dump()) + elif isinstance(result, dict): + await registry.complete(task_id, result) + else: + # Adopter returned an unexpected type (not Pydantic, not + # dict). Best effort: stringify into a 'value' wrapper so + # tasks/get returns something. Real impls always return + # the typed Pydantic response. + await registry.complete(task_id, {"value": str(result)}) + + # ``asyncio.create_task`` snapshots contextvars automatically + # — no explicit copy needed at this site. + asyncio.create_task(_run()) + + # Wire ``Submitted`` envelope per spec. + return { + "task_id": task_id, + "status": "submitted", + "task_type": method_name, + } + + +__all__ = [ + "REQUIRED_METHODS_PER_SPECIALISM", + "compose_caller_identity", + "validate_platform", +] diff --git a/tests/test_decisioning_dispatch.py b/tests/test_decisioning_dispatch.py new file mode 100644 index 000000000..b2b659b0b --- /dev/null +++ b/tests/test_decisioning_dispatch.py @@ -0,0 +1,717 @@ +"""Unit tests for adcp.decisioning.dispatch. + +Covers the seam that ties RequestContext hydration, account +resolution, executor lifecycle, AdcpError projection, and +TaskHandoff lifecycle together. Per the dispatch design doc's file +plan + round-3/4 review additions. +""" + +from __future__ import annotations + +import asyncio +import warnings +from concurrent.futures import ThreadPoolExecutor +from contextvars import ContextVar +from typing import Any + +import pytest +from pydantic import BaseModel + +from adcp.decisioning import ( + AdcpError, + AuthInfo, + DecisioningCapabilities, + DecisioningPlatform, + InMemoryTaskRegistry, + SingletonAccounts, +) +from adcp.decisioning.dispatch import ( + REQUIRED_METHODS_PER_SPECIALISM, + _build_request_context, + _invoke_platform_method, + _project_handoff, + compose_caller_identity, + validate_platform, +) +from adcp.decisioning.types import Account, TaskHandoff +from adcp.server.base import ToolContext + + +@pytest.fixture +def executor(): + """ThreadPoolExecutor fixture — small pool, cleaned up per test.""" + pool = ThreadPoolExecutor(max_workers=4, thread_name_prefix="test-dispatch-") + yield pool + pool.shutdown(wait=True) + + +# ---- validate_platform ---- + + +class _ValidPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities(specialisms=["sales-non-guaranteed"]) + accounts = SingletonAccounts(account_id="hello") + + def get_products(self, req, ctx): + return {"products": []} + + def create_media_buy(self, req, ctx): + return {"media_buy_id": "mb_1"} + + def update_media_buy(self, media_buy_id, patch, ctx): + return {"media_buy_id": media_buy_id, "status": "active"} + + def sync_creatives(self, req, ctx): + return {"creatives": []} + + def get_media_buy_delivery(self, req, ctx): + return {"deliveries": []} + + +def test_validate_platform_passes_for_valid_subclass() -> None: + """Happy path — fully-implemented platform passes validation.""" + validate_platform(_ValidPlatform()) + + +def test_validate_platform_raises_when_capabilities_is_default() -> None: + """Subclass that forgets to set ``capabilities`` inherits the + base class's ``DecisioningCapabilities()`` (empty) — that's + actually fine (no specialisms claimed = no methods required). + But subclass that REPLACES with a non-DecisioningCapabilities + type fails fast.""" + + class _BogusCapsPlatform(DecisioningPlatform): + capabilities = "not a DecisioningCapabilities" # type: ignore[assignment] + accounts = SingletonAccounts(account_id="hello") + + with pytest.raises(AdcpError, match="must be a DecisioningCapabilities"): + validate_platform(_BogusCapsPlatform()) + + +def test_validate_platform_raises_when_accounts_none() -> None: + """Subclass that forgets to attach an AccountStore fails fast.""" + + class _MissingAccountsPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + + with pytest.raises(AdcpError, match="accounts is None"): + validate_platform(_MissingAccountsPlatform()) + + +def test_validate_platform_raises_on_missing_specialism_method() -> None: + """Platform claims sales-non-guaranteed but only implements 3 of + the 5 required methods — raises with per-method diagnostics.""" + + class _PartialSalesPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities(specialisms=["sales-non-guaranteed"]) + accounts = SingletonAccounts(account_id="hello") + + def get_products(self, req, ctx): + return {} + + def create_media_buy(self, req, ctx): + return {} + + def update_media_buy(self, media_buy_id, patch, ctx): + return {} + + # Missing: sync_creatives, get_media_buy_delivery + + with pytest.raises(AdcpError) as exc_info: + validate_platform(_PartialSalesPlatform()) + assert exc_info.value.code == "INVALID_REQUEST" + missing_methods = {m["method"] for m in exc_info.value.details["missing"]} + assert "sync_creatives" in missing_methods + assert "get_media_buy_delivery" in missing_methods + + +def test_validate_platform_warns_on_unknown_specialism() -> None: + """Unknown specialism — typo or future spec — emits UserWarning, + NOT an AdcpError raise. Forward-compat with v6.x+ specs (round-3 + D14).""" + + class _UnknownSpecialismPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities(specialisms=["this-does-not-exist-yet"]) + accounts = SingletonAccounts(account_id="hello") + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always", UserWarning) + validate_platform(_UnknownSpecialismPlatform()) + matched = [w for w in caught if "this-does-not-exist-yet" in str(w.message)] + assert len(matched) == 1 + assert "typos" in str(matched[0].message) + + +def test_validate_platform_governance_aware_required_for_governance_specialism() -> None: + """A platform claiming a governance-* specialism without setting + capabilities.governance_aware=True fails fast — silent gate + skipping is a security regression. (D15 round-4)""" + + class _GovernanceWithoutOptInPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities( + specialisms=["governance-spend-authority"], + governance_aware=False, + ) + accounts = SingletonAccounts(account_id="hello") + + with pytest.raises(AdcpError) as exc_info: + validate_platform(_GovernanceWithoutOptInPlatform()) + assert exc_info.value.code == "INVALID_REQUEST" + msg = str(exc_info.value) + assert "governance" in msg.lower() + assert "governance_aware" in msg + + +def test_validate_platform_governance_aware_optin_passes() -> None: + """Platform with governance_aware=True passes validation. (The + real Stage-3 wiring will additionally require a custom + StateReader; that check is per-request, not boot-time, since the + StateReader is supplied by serve()/dispatch.)""" + + class _GovernanceOptInPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities( + specialisms=["governance-spend-authority"], + governance_aware=True, + ) + accounts = SingletonAccounts(account_id="hello") + + # Note: governance-spend-authority isn't in + # REQUIRED_METHODS_PER_SPECIALISM yet (v6.0 ships only sales-*), + # so it'll emit an "unknown specialism" UserWarning. That's fine + # — the governance_aware flag is what we're testing here. + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", UserWarning) + validate_platform(_GovernanceOptInPlatform()) + + +def test_validate_platform_empty_specialisms_passes() -> None: + """Platform with no specialism claims passes — useful for + custom-base sellers that don't fit a spec specialism.""" + + class _NoClaimsPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities(specialisms=[]) + accounts = SingletonAccounts(account_id="hello") + + validate_platform(_NoClaimsPlatform()) + + +def test_required_methods_per_specialism_pinned_for_sales() -> None: + """Contract test — locks the sales core method set so future + spec churn surfaces as a visible test failure.""" + expected_core = { + "get_products", + "create_media_buy", + "update_media_buy", + "sync_creatives", + "get_media_buy_delivery", + } + for slug in ( + "sales-non-guaranteed", + "sales-guaranteed", + "sales-broadcast-tv", + "sales-streaming-tv", + "sales-social", + "sales-exchange", + "sales-proposal-mode", + ): + assert REQUIRED_METHODS_PER_SPECIALISM[slug] == expected_core, f"sales core drift on {slug}" + + +# ---- compose_caller_identity (D9 round-3) ---- + + +def test_compose_caller_identity_uses_store_qualname_and_account_id() -> None: + store = SingletonAccounts(account_id="acme") + account: Account[Any] = Account(id="acme:buyer-a") + key = compose_caller_identity(account, store) + assert key == "SingletonAccounts:acme:buyer-a" + + +def test_compose_caller_identity_isolates_across_stores() -> None: + """Two different store classes with the same account.id produce + different cache keys — structural cross-store isolation (round-3 + D9).""" + + class _CustomStore: + resolution = "explicit" + + def resolve(self, ref, auth_info=None): + return Account(id="x") + + a = SingletonAccounts(account_id="hello") + b = _CustomStore() + same_account: Account[Any] = Account(id="x") + assert compose_caller_identity(same_account, a) != compose_caller_identity(same_account, b) + + +# ---- _build_request_context ---- + + +def test_build_request_context_threads_account_and_auth() -> None: + tool_ctx = ToolContext( + request_id="req_1", + caller_identity="caller_x", + tenant_id="tenant_y", + metadata={"foo": "bar"}, + ) + account: Account[Any] = Account(id="acct_a", name="Acme") + auth = AuthInfo(kind="signed_request", principal="buyer-a", key_id="kid-1") + + ctx = _build_request_context(tool_ctx, account, auth) + + assert ctx.account is account + assert ctx.auth_info is auth + assert ctx.auth_principal == "buyer-a" + assert ctx.request_id == "req_1" + assert ctx.caller_identity == "caller_x" + assert ctx.tenant_id == "tenant_y" + assert ctx.metadata == {"foo": "bar"} + + +def test_build_request_context_with_no_auth() -> None: + """Unauthenticated dev path (singleton fixtures): auth_principal + is None, auth_info is None.""" + tool_ctx = ToolContext() + account: Account[Any] = Account(id="dev") + ctx = _build_request_context(tool_ctx, account, None) + assert ctx.auth_info is None + assert ctx.auth_principal is None + + +def test_build_request_context_supplies_stubs_when_no_state_resolver() -> None: + """Default state/resolve are the v6.0 stubs — adopter call + sites work without explicit wiring.""" + from adcp.decisioning.resolve import _NotYetWiredResolver + from adcp.decisioning.state import _NotYetWiredStateReader + + ctx = _build_request_context(ToolContext(), Account(id="x"), None) + assert isinstance(ctx.state, _NotYetWiredStateReader) + assert isinstance(ctx.resolve, _NotYetWiredResolver) + + +def test_build_request_context_threads_custom_state_and_resolver() -> None: + """Stage-3 serve() can wire a v6.1-style backing store; dispatch + plumbs it through unchanged.""" + + class _FakeStateReader: + def find_by_object(self, t, i): + return ("custom",) + + def find_proposal_by_id(self, p): + return None + + def governance_context(self): + return None + + def workflow_steps(self): + return () + + class _FakeResolver: + async def property_list(self, list_id): + return f"resolved:{list_id}" + + async def collection_list(self, list_id): + return None + + async def creative_format(self, format_id, *, revalidate=False): + return None + + fake_state = _FakeStateReader() + fake_resolve = _FakeResolver() + ctx = _build_request_context( + ToolContext(), + Account(id="x"), + None, + state_reader=fake_state, + resource_resolver=fake_resolve, + ) + assert ctx.state is fake_state + assert ctx.resolve is fake_resolve + + +# ---- _invoke_platform_method ---- + + +class _ProductsRequest(BaseModel): + """Stand-in Pydantic request for tests.""" + + foo: str = "bar" + + +class _ProductsResponse(BaseModel): + products: list[dict[str, Any]] = [] + + +@pytest.mark.asyncio +async def test_invoke_async_method_returns_typed_response( + executor: ThreadPoolExecutor, +) -> None: + class _AsyncPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="x") + + async def get_products(self, req, ctx): + return _ProductsResponse(products=[{"id": "p1"}]) + + ctx = _build_request_context(ToolContext(), Account(id="x"), None) + result = await _invoke_platform_method( + _AsyncPlatform(), + "get_products", + _ProductsRequest(), + ctx, + executor=executor, + registry=InMemoryTaskRegistry(), + ) + assert isinstance(result, _ProductsResponse) + assert result.products == [{"id": "p1"}] + + +@pytest.mark.asyncio +async def test_invoke_sync_method_runs_on_executor( + executor: ThreadPoolExecutor, +) -> None: + """Sync platform method runs in a worker thread — verified via + thread-name introspection.""" + seen_thread_names: list[str] = [] + + class _SyncPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="x") + + def get_products(self, req, ctx): + import threading + + seen_thread_names.append(threading.current_thread().name) + return _ProductsResponse(products=[{"id": "sync"}]) + + ctx = _build_request_context(ToolContext(), Account(id="x"), None) + result = await _invoke_platform_method( + _SyncPlatform(), + "get_products", + _ProductsRequest(), + ctx, + executor=executor, + registry=InMemoryTaskRegistry(), + ) + assert isinstance(result, _ProductsResponse) + assert seen_thread_names[0].startswith( + "test-dispatch-" + ), f"sync method should run on the test executor; ran on {seen_thread_names}" + + +@pytest.mark.asyncio +async def test_invoke_sync_method_propagates_contextvars( + executor: ThreadPoolExecutor, +) -> None: + """Sync handler running on the executor sees ContextVars set in + the request scope (D6 — explicit copy_context). Without the + explicit snapshot, the executor thread sees the default value + instead of the request-scoped one.""" + request_id_var: ContextVar[str] = ContextVar("test_request_id", default="default") + seen: list[str] = [] + + class _SyncPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="x") + + def get_products(self, req, ctx): + seen.append(request_id_var.get()) + return _ProductsResponse() + + ctx = _build_request_context(ToolContext(), Account(id="x"), None) + + request_id_var.set("req_xyz") + await _invoke_platform_method( + _SyncPlatform(), + "get_products", + _ProductsRequest(), + ctx, + executor=executor, + registry=InMemoryTaskRegistry(), + ) + assert seen == ["req_xyz"] + + +@pytest.mark.asyncio +async def test_invoke_re_raises_adcp_error( + executor: ThreadPoolExecutor, +) -> None: + class _RaisingPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="x") + + async def get_products(self, req, ctx): + raise AdcpError( + "BUDGET_TOO_LOW", + message="below floor", + recovery="correctable", + ) + + ctx = _build_request_context(ToolContext(), Account(id="x"), None) + with pytest.raises(AdcpError) as exc_info: + await _invoke_platform_method( + _RaisingPlatform(), + "get_products", + _ProductsRequest(), + ctx, + executor=executor, + registry=InMemoryTaskRegistry(), + ) + # Verbatim — NOT wrapped to INTERNAL_ERROR. + assert exc_info.value.code == "BUDGET_TOO_LOW" + assert exc_info.value.recovery == "correctable" + + +@pytest.mark.asyncio +async def test_invoke_wraps_unexpected_exceptions_to_internal_error( + executor: ThreadPoolExecutor, +) -> None: + class _CrashingPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="x") + + async def get_products(self, req, ctx): + raise ValueError("oops, internal-state bug") + + ctx = _build_request_context(ToolContext(), Account(id="x"), None) + with pytest.raises(AdcpError) as exc_info: + await _invoke_platform_method( + _CrashingPlatform(), + "get_products", + _ProductsRequest(), + ctx, + executor=executor, + registry=InMemoryTaskRegistry(), + ) + assert exc_info.value.code == "INTERNAL_ERROR" + assert exc_info.value.recovery == "terminal" + # Original exception preserved as __cause__ for server-side + # debugging — wire response stays opaque. + assert isinstance(exc_info.value.__cause__, ValueError) + assert "oops, internal-state bug" not in str(exc_info.value) + + +@pytest.mark.asyncio +async def test_invoke_with_arg_projector_uses_kwargs( + executor: ThreadPoolExecutor, +) -> None: + """Tools whose Python signature differs from wire shape (D1 + arg-projection — e.g. update_media_buy(media_buy_id, patch, + ctx)) get the kwargs dict passed through.""" + + class _PatchModel(BaseModel): + media_buy_id: str + new_status: str + + class _ProjectingPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="x") + + async def update_media_buy(self, media_buy_id, patch, ctx): + return {"media_buy_id": media_buy_id, "status": patch.new_status} + + ctx = _build_request_context(ToolContext(), Account(id="x"), None) + patch = _PatchModel(media_buy_id="mb_1", new_status="active") + result = await _invoke_platform_method( + _ProjectingPlatform(), + "update_media_buy", + patch, + ctx, + executor=executor, + registry=InMemoryTaskRegistry(), + arg_projector={"media_buy_id": "mb_1", "patch": patch}, + ) + assert result == {"media_buy_id": "mb_1", "status": "active"} + + +# ---- _project_handoff (TaskHandoff lifecycle) ---- + + +@pytest.mark.asyncio +async def test_handoff_returns_submitted_envelope( + executor: ThreadPoolExecutor, +) -> None: + """The synchronous return is the wire Submitted envelope — + {task_id, status, task_type}. Buyer pattern-matches on shape.""" + registry = InMemoryTaskRegistry() + ctx = _build_request_context(ToolContext(), Account(id="acct_a"), None) + completed = asyncio.Event() + + async def _handoff_fn(task_ctx): + completed.set() + return {"media_buy_id": "mb_1"} + + handoff = TaskHandoff(_handoff_fn) + envelope = await _project_handoff( + handoff, + ctx, + method_name="create_media_buy", + registry=registry, + executor=executor, + ) + assert envelope["status"] == "submitted" + assert envelope["task_type"] == "create_media_buy" + assert envelope["task_id"].startswith("task_") + + # Wait for the background task to complete so the assertion below + # is deterministic. (CI may schedule background tasks slowly.) + await asyncio.wait_for(completed.wait(), timeout=2.0) + # Yield once more so the registry.complete() call lands. + await asyncio.sleep(0.05) + + rec = await registry.get(envelope["task_id"], expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "completed" + assert rec["result"] == {"media_buy_id": "mb_1"} + + +@pytest.mark.asyncio +async def test_handoff_async_fn_completes_via_registry( + executor: ThreadPoolExecutor, +) -> None: + """Async handoff fn returns a Pydantic model; framework calls + model_dump() and persists the dict via registry.complete.""" + registry = InMemoryTaskRegistry() + ctx = _build_request_context(ToolContext(), Account(id="acct_a"), None) + + async def _handoff_fn(task_ctx): + return _ProductsResponse(products=[{"id": "x"}]) + + envelope = await _project_handoff( + TaskHandoff(_handoff_fn), + ctx, + method_name="create_media_buy", + registry=registry, + executor=executor, + ) + # Wait for background task to finish. + await asyncio.sleep(0.1) + rec = await registry.get(envelope["task_id"], expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "completed" + assert rec["result"] == {"products": [{"id": "x"}]} + + +@pytest.mark.asyncio +async def test_handoff_adcp_error_persists_via_registry_fail( + executor: ThreadPoolExecutor, +) -> None: + """When the handoff fn raises AdcpError, the framework calls + registry.fail with the to_wire() shape so tasks/get returns + the spec adcp_error envelope.""" + registry = InMemoryTaskRegistry() + ctx = _build_request_context(ToolContext(), Account(id="acct_a"), None) + + async def _handoff_fn(task_ctx): + raise AdcpError( + "POLICY_VIOLATION", + message="rejected", + recovery="correctable", + field="package", + ) + + envelope = await _project_handoff( + TaskHandoff(_handoff_fn), + ctx, + method_name="create_media_buy", + registry=registry, + executor=executor, + ) + await asyncio.sleep(0.1) + rec = await registry.get(envelope["task_id"], expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "failed" + assert rec["error"]["code"] == "POLICY_VIOLATION" + assert rec["error"]["recovery"] == "correctable" + assert rec["error"]["field"] == "package" + + +@pytest.mark.asyncio +async def test_handoff_unexpected_exception_wraps_to_internal_error( + executor: ThreadPoolExecutor, +) -> None: + """Non-AdcpError exception in the handoff fn wraps to + INTERNAL_ERROR — wire response never leaks the original.""" + registry = InMemoryTaskRegistry() + ctx = _build_request_context(ToolContext(), Account(id="acct_a"), None) + + async def _handoff_fn(task_ctx): + raise RuntimeError("internal bug") + + envelope = await _project_handoff( + TaskHandoff(_handoff_fn), + ctx, + method_name="create_media_buy", + registry=registry, + executor=executor, + ) + await asyncio.sleep(0.1) + rec = await registry.get(envelope["task_id"], expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "failed" + assert rec["error"]["code"] == "INTERNAL_ERROR" + # Original exception text NOT exposed. + assert "internal bug" not in rec["error"].get("message", "") + + +@pytest.mark.asyncio +async def test_handoff_sync_fn_runs_on_executor( + executor: ThreadPoolExecutor, +) -> None: + """Sync handoff fn runs on the executor with explicit + contextvars snapshot. (Async fn uses asyncio.create_task which + inherits contextvars for free; sync needs the explicit copy.)""" + registry = InMemoryTaskRegistry() + ctx = _build_request_context(ToolContext(), Account(id="acct_a"), None) + + def _sync_handoff_fn(task_ctx): + import threading + + return {"thread": threading.current_thread().name} + + envelope = await _project_handoff( + TaskHandoff(_sync_handoff_fn), + ctx, + method_name="create_media_buy", + registry=registry, + executor=executor, + ) + await asyncio.sleep(0.1) + rec = await registry.get(envelope["task_id"], expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "completed" + assert rec["result"]["thread"].startswith("test-dispatch-") + + +@pytest.mark.asyncio +async def test_handoff_invoked_via_invoke_platform_method( + executor: ThreadPoolExecutor, +) -> None: + """End-to-end: a platform method returning ctx.handoff_to_task(fn) + flows through _invoke_platform_method and produces the Submitted + envelope without the caller knowing it was a handoff.""" + registry = InMemoryTaskRegistry() + ctx = _build_request_context(ToolContext(), Account(id="acct_a"), None) + + async def _async_review(task_ctx): + return _ProductsResponse(products=[{"id": "reviewed"}]) + + class _HybridPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="x") + + async def create_media_buy(self, req, ctx): + return ctx.handoff_to_task(_async_review) + + result = await _invoke_platform_method( + _HybridPlatform(), + "create_media_buy", + _ProductsRequest(), + ctx, + executor=executor, + registry=registry, + ) + # Returned the wire envelope, NOT the handoff marker. + assert isinstance(result, dict) + assert result["status"] == "submitted" + assert result["task_type"] == "create_media_buy" From 8e095b807825fd71ee6d7586e3c9909003c25bd8 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 10:58:35 -0400 Subject: [PATCH 11/18] =?UTF-8?q?feat(decisioning):=20PlatformHandler=20?= =?UTF-8?q?=E2=80=94=20wire-shape=20shims=20for=20SalesPlatform?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stage 3 third piece. Builds on dispatch.py + task_registry.py to ship the wire-shape shim layer the framework's typed-handler dispatch routes wire requests through. - adcp/decisioning/handler.py: PlatformHandler(ADCPHandler[ToolContext]) — codegen target (hand-written for v6.0 alpha; codegen drift test Stage 4 follow-up). Constructor takes DecisioningPlatform + executor + registry; optional state_reader + resource_resolver kwargs plumb through to _build_request_context. advertised_tools: ClassVar[set[str]] declares all 9 sales-* tools. Auto-registers via __init_subclass__ once prep PR #318 merges and foundation rebases. Per-tool typed shims: resolve account → build RequestContext → invoke platform method → return typed response (or AdcpError flows through verbatim). update_media_buy uses arg-projection (D1 — Python signature is media_buy_id+patch+ctx vs wire shape having both at top level). list_creative_formats + provide_performance_feedback have no 'account' field on wire — shim passes None, adopter store handles via 'singleton' or 'derived' resolution. AccountReference handling: tolerant of both Pydantic instance (typical wire path) and raw dict (test fixtures, custom dispatch). Liskov narrowing: param types narrow from base ADCPHandler's Pydantic | dict union to just Pydantic — endorsed by docs/handler-authoring.md typed-dispatch pattern. Per-method # type: ignore[override] documents the intentional narrowing. - tests/test_decisioning_handler.py: 12 tests covering routing — advertised_tools, get_products (account+auth+error+wrap), create_media_buy sync + handoff, update_media_buy arg-projection, sync_creatives, list_creative_formats, async account resolver, dict-shaped auth_info re-coercion. Foundation tests: 109 (+12). Full suite: 2489 passed, 17 skipped, 1 xfailed. ruff + mypy clean. Stage 3 next: serve.py wrapper. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/adcp/decisioning/handler.py | 422 ++++++++++++++++++++++++++++++ tests/test_decisioning_handler.py | 402 ++++++++++++++++++++++++++++ 2 files changed, 824 insertions(+) create mode 100644 src/adcp/decisioning/handler.py create mode 100644 tests/test_decisioning_handler.py diff --git a/src/adcp/decisioning/handler.py b/src/adcp/decisioning/handler.py new file mode 100644 index 000000000..45f766f31 --- /dev/null +++ b/src/adcp/decisioning/handler.py @@ -0,0 +1,422 @@ +"""``PlatformHandler`` — wire-shape shims that route to a DecisioningPlatform. + +This module is the codegen target — ``scripts/generate_decisioning_handler.py`` +will (in a follow-up PR) emit this file by walking the per-specialism +Protocols. For v6.0 alpha foundation, the file is hand-written; the +codegen drift test ships in Stage 4. + +Each shim: + +1. Accepts the typed Pydantic request + framework :class:`ToolContext`. +2. Resolves the account via ``platform.accounts.resolve``. +3. Builds the typed :class:`RequestContext` via + :func:`_build_request_context` (D2 + D9 + D15). +4. Calls :func:`_invoke_platform_method` to invoke the platform method, + which projects ``TaskHandoff`` and wraps non-``AdcpError`` exceptions + to the wire envelope. +5. Returns the typed response (or raises :class:`AdcpError` which the + framework's existing wire-projection codepath catches). + +The class-level ``advertised_tools: ClassVar[set[str]]`` declaration is +auto-registered with the framework's tool-discovery seam via +:meth:`adcp.server.base.ADCPHandler.__init_subclass__` (PR #318). Adopters +get a focused ``tools/list`` filter without manual registration. +""" + +from __future__ import annotations + +import asyncio +from typing import TYPE_CHECKING, Any, ClassVar, cast + +from adcp.decisioning.context import AuthInfo +from adcp.decisioning.dispatch import ( + _build_request_context, + _invoke_platform_method, +) +from adcp.server.base import ADCPHandler, ToolContext + +if TYPE_CHECKING: + from concurrent.futures import ThreadPoolExecutor + + from adcp.decisioning.platform import DecisioningPlatform + from adcp.decisioning.resolve import ResourceResolver + from adcp.decisioning.state import StateReader + from adcp.decisioning.task_registry import TaskRegistry + from adcp.decisioning.types import Account + from adcp.types import ( + AccountReference, + CreateMediaBuyRequest, + CreateMediaBuySuccessResponse, + GetMediaBuyDeliveryRequest, + GetMediaBuyDeliveryResponse, + GetMediaBuysRequest, + GetMediaBuysResponse, + GetProductsRequest, + GetProductsResponse, + ListCreativeFormatsRequest, + ListCreativeFormatsResponse, + ListCreativesRequest, + ListCreativesResponse, + ProvidePerformanceFeedbackRequest, + ProvidePerformanceFeedbackResponse, + SyncCreativesRequest, + SyncCreativesSuccessResponse, + UpdateMediaBuyRequest, + UpdateMediaBuySuccessResponse, + ) + + +# --------------------------------------------------------------------------- +# Class-level advertised tool surface +# --------------------------------------------------------------------------- + +#: All sales-* tools the v6.0 PlatformHandler shim covers. Auto-registered +#: with the framework's tool-discovery seam via ``__init_subclass__`` — +#: ``tools/list`` filters to this set unless the operator passes +#: ``advertise_all=True``. Adopters who only implement a subset of these +#: methods on their ``DecisioningPlatform`` subclass: the framework's +#: existing override-detection (``_is_method_overridden``) handles the +#: filter — methods inherited from the base ``DecisioningPlatform`` (which +#: doesn't define them) are NOT in the override set, so the framework +#: drops the tool from ``tools/list`` automatically. +_SALES_ADVERTISED_TOOLS: frozenset[str] = frozenset( + { + "get_products", + "create_media_buy", + "update_media_buy", + "sync_creatives", + "get_media_buy_delivery", + "get_media_buys", + "provide_performance_feedback", + "list_creative_formats", + "list_creatives", + } +) + + +class PlatformHandler(ADCPHandler[ToolContext]): + """ADCPHandler subclass that routes wire requests to a + :class:`DecisioningPlatform` via :func:`_invoke_platform_method`. + + Constructed by :func:`adcp.decisioning.serve.create_adcp_server_from_platform` + — adopters never instantiate directly. The handler holds: + + * ``platform`` — the adopter's :class:`DecisioningPlatform` subclass + instance. Method dispatches read/call this. + * ``executor`` — the framework-allocated thread-pool for sync platform + methods (D5). + * ``registry`` — the :class:`TaskRegistry` for handoff lifecycle. + * Optional ``state_reader`` / ``resource_resolver`` — Stage-3+ wiring + for v6.1 backing-store impls; defaults to the v6.0 stubs. + + Per-method shims follow the same template: + + 1. Extract ``account_ref`` from the typed request (when the tool + carries ``account`` on the wire). + 2. Resolve via ``platform.accounts.resolve(ref, auth_info=...)``. + 3. Build :class:`RequestContext` via :func:`_build_request_context`. + 4. Invoke the platform method via :func:`_invoke_platform_method`. + + Adopters who don't override a given platform method get the framework's + ``not_supported`` baseline (per ADCPHandler) on those tools — and the + override-detection filter drops the tool from ``tools/list`` unless + they pass ``advertise_all=True``. + """ + + advertised_tools: ClassVar[set[str]] = set(_SALES_ADVERTISED_TOOLS) + + _agent_type = "decisioning platform" + + def __init__( + self, + platform: DecisioningPlatform, + *, + executor: ThreadPoolExecutor, + registry: TaskRegistry, + state_reader: StateReader | None = None, + resource_resolver: ResourceResolver | None = None, + ) -> None: + super().__init__() + self._platform = platform + self._executor = executor + self._registry = registry + self._state_reader = state_reader + self._resource_resolver = resource_resolver + + # ----- account resolution helper ----- + + async def _resolve_account( + self, + ref: AccountReference | None, + ctx: ToolContext, + ) -> Account[Any]: + """Resolve a wire :class:`AccountReference` to a typed + :class:`Account` via the platform's :class:`AccountStore`. + + Pulls auth info from ``ctx.metadata['auth_info']`` when the + operator's ``context_factory`` populates it; otherwise None. + Adopter ``AccountStore`` impls handle missing-auth cases per + their own resolution mode (singleton tolerates None; + from_auth raises ``AUTH_INVALID``; explicit resolves by ref). + ``AccountStore.resolve`` takes a dict — convert the typed + Pydantic ``AccountReference`` via ``model_dump()`` so adopter + store impls see a normalized shape. + """ + auth_info = self._extract_auth_info(ctx) + # Handle both Pydantic AccountReference (typical wire path) and + # raw dict (test fixtures using model_construct, custom dispatch + # paths). Adopter stores implementing custom shapes are + # responsible for whatever they accept. + ref_dict: dict[str, Any] | None + if ref is None: + ref_dict = None + elif hasattr(ref, "model_dump"): + ref_dict = ref.model_dump() + elif isinstance(ref, dict): + ref_dict = ref + else: + ref_dict = cast("dict[str, Any]", ref) + result = self._platform.accounts.resolve(ref_dict, auth_info=auth_info) + if asyncio.iscoroutine(result): + return cast("Account[Any]", await result) + return cast("Account[Any]", result) + + @staticmethod + def _extract_auth_info(ctx: ToolContext) -> AuthInfo | None: + """Pull AuthInfo from ToolContext.metadata when present. + + The framework's existing auth integrations (BearerTokenAuthMiddleware, + custom context_factory) populate ``ctx.metadata`` with + principal/scope info. Adopter conventions vary; this helper checks + for an ``adcp.auth_info`` key — Stage 3 ``serve()`` wiring sets + this from the canonical principal. Returns None when no auth key + is present (dev / singleton fixtures). + """ + raw = ctx.metadata.get("adcp.auth_info") if ctx.metadata else None + if isinstance(raw, AuthInfo): + return raw + if isinstance(raw, dict): + return AuthInfo( + kind=raw.get("kind", "derived"), + key_id=raw.get("key_id"), + principal=raw.get("principal"), + scopes=list(raw.get("scopes", [])), + ) + return None + + def _build_ctx( + self, + tool_ctx: ToolContext, + account: Account[Any], + ) -> Any: + """Wrap :func:`_build_request_context` with the handler's + wired StateReader / ResourceResolver overrides.""" + auth_info = self._extract_auth_info(tool_ctx) + return _build_request_context( + tool_ctx, + account, + auth_info, + state_reader=self._state_reader, + resource_resolver=self._resource_resolver, + ) + + # ----- Sales tools ----- + + async def get_products( # type: ignore[override] + self, + params: GetProductsRequest, + context: ToolContext | None = None, + ) -> GetProductsResponse: + tool_ctx = context or ToolContext() + account = await self._resolve_account(params.account, tool_ctx) + ctx = self._build_ctx(tool_ctx, account) + return cast( + "GetProductsResponse", + await _invoke_platform_method( + self._platform, + "get_products", + params, + ctx, + executor=self._executor, + registry=self._registry, + ), + ) + + async def create_media_buy( # type: ignore[override] + self, + params: CreateMediaBuyRequest, + context: ToolContext | None = None, + ) -> CreateMediaBuySuccessResponse: + tool_ctx = context or ToolContext() + account = await self._resolve_account(params.account, tool_ctx) + ctx = self._build_ctx(tool_ctx, account) + return cast( + "CreateMediaBuySuccessResponse", + await _invoke_platform_method( + self._platform, + "create_media_buy", + params, + ctx, + executor=self._executor, + registry=self._registry, + ), + ) + + async def update_media_buy( # type: ignore[override] + self, + params: UpdateMediaBuyRequest, + context: ToolContext | None = None, + ) -> UpdateMediaBuySuccessResponse: + """Wire shape carries ``media_buy_id`` + the patch fields at the + same level on ``UpdateMediaBuyRequest``. The platform method + signature is ``update_media_buy(media_buy_id, patch, ctx)`` — + cleaner adopter ergonomics. Arg-projection per D1. + """ + tool_ctx = context or ToolContext() + account = await self._resolve_account(params.account, tool_ctx) + ctx = self._build_ctx(tool_ctx, account) + return cast( + "UpdateMediaBuySuccessResponse", + await _invoke_platform_method( + self._platform, + "update_media_buy", + params, + ctx, + executor=self._executor, + registry=self._registry, + arg_projector={"media_buy_id": params.media_buy_id, "patch": params}, + ), + ) + + async def sync_creatives( # type: ignore[override] + self, + params: SyncCreativesRequest, + context: ToolContext | None = None, + ) -> SyncCreativesSuccessResponse: + tool_ctx = context or ToolContext() + account = await self._resolve_account(params.account, tool_ctx) + ctx = self._build_ctx(tool_ctx, account) + return cast( + "SyncCreativesSuccessResponse", + await _invoke_platform_method( + self._platform, + "sync_creatives", + params, + ctx, + executor=self._executor, + registry=self._registry, + ), + ) + + async def get_media_buy_delivery( # type: ignore[override] + self, + params: GetMediaBuyDeliveryRequest, + context: ToolContext | None = None, + ) -> GetMediaBuyDeliveryResponse: + tool_ctx = context or ToolContext() + account = await self._resolve_account(params.account, tool_ctx) + ctx = self._build_ctx(tool_ctx, account) + return cast( + "GetMediaBuyDeliveryResponse", + await _invoke_platform_method( + self._platform, + "get_media_buy_delivery", + params, + ctx, + executor=self._executor, + registry=self._registry, + ), + ) + + # ----- Optional sales tools (gated by capabilities + override) ----- + + async def get_media_buys( # type: ignore[override] + self, + params: GetMediaBuysRequest, + context: ToolContext | None = None, + ) -> GetMediaBuysResponse: + tool_ctx = context or ToolContext() + account = await self._resolve_account(params.account, tool_ctx) + ctx = self._build_ctx(tool_ctx, account) + return cast( + "GetMediaBuysResponse", + await _invoke_platform_method( + self._platform, + "get_media_buys", + params, + ctx, + executor=self._executor, + registry=self._registry, + ), + ) + + async def provide_performance_feedback( # type: ignore[override] + self, + params: ProvidePerformanceFeedbackRequest, + context: ToolContext | None = None, + ) -> ProvidePerformanceFeedbackResponse: + """Wire request has no ``account`` field — resolve via auth only. + Adopters in ``explicit`` resolution mode get an + ``ACCOUNT_NOT_FOUND`` from their AccountStore unless they wire + a derived/singleton path or extend ``AccountStore.resolve`` to + handle the no-ref case (see python-port-v2 RFC TODO(rc.1)).""" + tool_ctx = context or ToolContext() + account = await self._resolve_account(None, tool_ctx) + ctx = self._build_ctx(tool_ctx, account) + return cast( + "ProvidePerformanceFeedbackResponse", + await _invoke_platform_method( + self._platform, + "provide_performance_feedback", + params, + ctx, + executor=self._executor, + registry=self._registry, + ), + ) + + async def list_creative_formats( # type: ignore[override] + self, + params: ListCreativeFormatsRequest, + context: ToolContext | None = None, + ) -> ListCreativeFormatsResponse: + """Wire request has no ``account`` field. See + :meth:`provide_performance_feedback` for the no-ref account + resolution caveat.""" + tool_ctx = context or ToolContext() + account = await self._resolve_account(None, tool_ctx) + ctx = self._build_ctx(tool_ctx, account) + return cast( + "ListCreativeFormatsResponse", + await _invoke_platform_method( + self._platform, + "list_creative_formats", + params, + ctx, + executor=self._executor, + registry=self._registry, + ), + ) + + async def list_creatives( # type: ignore[override] + self, + params: ListCreativesRequest, + context: ToolContext | None = None, + ) -> ListCreativesResponse: + tool_ctx = context or ToolContext() + account = await self._resolve_account(params.account, tool_ctx) + ctx = self._build_ctx(tool_ctx, account) + return cast( + "ListCreativesResponse", + await _invoke_platform_method( + self._platform, + "list_creatives", + params, + ctx, + executor=self._executor, + registry=self._registry, + ), + ) + + +__all__ = ["PlatformHandler"] diff --git a/tests/test_decisioning_handler.py b/tests/test_decisioning_handler.py new file mode 100644 index 000000000..1e68c651d --- /dev/null +++ b/tests/test_decisioning_handler.py @@ -0,0 +1,402 @@ +"""Unit tests for adcp.decisioning.handler.PlatformHandler. + +Covers the wire-shape shim layer that routes typed Pydantic requests +through dispatch._invoke_platform_method to the adopter's +DecisioningPlatform method bodies. + +Each test exercises one shim end-to-end: typed request → account +resolution → RequestContext build → method invocation → typed +response. Errors flow through verbatim (AdcpError) or wrapped +(unexpected exceptions → INTERNAL_ERROR). +""" + +from __future__ import annotations + +import asyncio +from concurrent.futures import ThreadPoolExecutor +from typing import Any + +import pytest + +from adcp.decisioning import ( + AdcpError, + AuthInfo, + DecisioningCapabilities, + DecisioningPlatform, + InMemoryTaskRegistry, + SingletonAccounts, +) +from adcp.decisioning.handler import PlatformHandler +from adcp.server.base import ToolContext + + +@pytest.fixture +def executor(): + pool = ThreadPoolExecutor(max_workers=4, thread_name_prefix="test-handler-") + yield pool + pool.shutdown(wait=True) + + +def _make_handler(platform: DecisioningPlatform, executor: ThreadPoolExecutor) -> PlatformHandler: + return PlatformHandler( + platform, + executor=executor, + registry=InMemoryTaskRegistry(), + ) + + +# ---- advertised_tools class attribute ---- + + +def test_advertised_tools_covers_sales_specialism() -> None: + """The class-level set declares all 9 sales tools — both the 5 + required (every sales-* specialism) and the 4 optional (rc.1+ + sales additions).""" + assert "get_products" in PlatformHandler.advertised_tools + assert "create_media_buy" in PlatformHandler.advertised_tools + assert "update_media_buy" in PlatformHandler.advertised_tools + assert "sync_creatives" in PlatformHandler.advertised_tools + assert "get_media_buy_delivery" in PlatformHandler.advertised_tools + # Optional but covered. + assert "get_media_buys" in PlatformHandler.advertised_tools + assert "provide_performance_feedback" in PlatformHandler.advertised_tools + assert "list_creative_formats" in PlatformHandler.advertised_tools + assert "list_creatives" in PlatformHandler.advertised_tools + + +# ---- get_products — sync read, account-bearing wire request ---- + + +@pytest.mark.asyncio +async def test_get_products_routes_through_platform(executor) -> None: + from adcp.types import GetProductsRequest, GetProductsResponse + + received_account_id: list[str] = [] + + class _Platform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="hello") + + async def get_products(self, req, ctx): + received_account_id.append(ctx.account.id) + return GetProductsResponse(products=[]) + + handler = _make_handler(_Platform(), executor) + req = GetProductsRequest(buying_mode="brief", brief="any inventory") + resp = await handler.get_products(req, ToolContext()) + assert isinstance(resp, GetProductsResponse) + # SingletonAccounts synthesizes per-principal id; with no auth_info + # the principal is "anonymous". + assert received_account_id == ["hello:anonymous"] + + +@pytest.mark.asyncio +async def test_get_products_threads_auth_info_to_account(executor) -> None: + """ToolContext.metadata['adcp.auth_info'] flows into account + resolution AND onto the RequestContext.auth_info field.""" + from adcp.types import GetProductsRequest, GetProductsResponse + + received_principal: list[str] = [] + received_auth_info: list[Any] = [] + + class _Platform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="acme") + + async def get_products(self, req, ctx): + received_principal.append(ctx.account.id) + received_auth_info.append(ctx.auth_info) + return GetProductsResponse(products=[]) + + handler = _make_handler(_Platform(), executor) + ctx = ToolContext( + metadata={ + "adcp.auth_info": AuthInfo( + kind="signed_request", + principal="buyer-x", + key_id="kid-1", + ), + } + ) + await handler.get_products( + GetProductsRequest(buying_mode="brief", brief="any inventory"), + ctx, + ) + assert received_principal == ["acme:buyer-x"] + assert received_auth_info[0].principal == "buyer-x" + + +@pytest.mark.asyncio +async def test_get_products_propagates_adcp_error_verbatim(executor) -> None: + """Adopter raises AdcpError → flows through dispatch verbatim.""" + from adcp.types import GetProductsRequest + + class _Platform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="hello") + + async def get_products(self, req, ctx): + raise AdcpError( + "POLICY_VIOLATION", + message="cannot show inventory", + recovery="terminal", + ) + + handler = _make_handler(_Platform(), executor) + with pytest.raises(AdcpError) as exc_info: + await handler.get_products( + GetProductsRequest(buying_mode="brief", brief="any"), ToolContext() + ) + assert exc_info.value.code == "POLICY_VIOLATION" + + +@pytest.mark.asyncio +async def test_get_products_wraps_unexpected_exception(executor) -> None: + """Unexpected exception in adopter code → INTERNAL_ERROR.""" + from adcp.types import GetProductsRequest + + class _Platform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="hello") + + async def get_products(self, req, ctx): + raise KeyError("internal") + + handler = _make_handler(_Platform(), executor) + with pytest.raises(AdcpError) as exc_info: + await handler.get_products( + GetProductsRequest(buying_mode="brief", brief="any"), ToolContext() + ) + assert exc_info.value.code == "INTERNAL_ERROR" + # Original exception preserved as __cause__; not exposed in message. + assert isinstance(exc_info.value.__cause__, KeyError) + + +# ---- create_media_buy — hybrid, returns Submitted envelope on handoff ---- + + +@pytest.mark.asyncio +async def test_create_media_buy_sync_path_returns_typed_response(executor) -> None: + from adcp.types import ( + CreateMediaBuyRequest, + CreateMediaBuySuccessResponse, + ) + + class _Platform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="hello") + + async def create_media_buy(self, req, ctx): + return CreateMediaBuySuccessResponse( + media_buy_id="mb_xyz", + packages=[], + status="active", + ) + + handler = _make_handler(_Platform(), executor) + resp = await handler.create_media_buy( + CreateMediaBuyRequest( + account={"account_id": "acct_a"}, + brand={"domain": "example.com"}, + idempotency_key="idem_aaaa1234567890", + start_time="2026-05-01T00:00:00Z", + end_time="2026-05-31T23:59:59Z", + ), + ToolContext(), + ) + assert isinstance(resp, CreateMediaBuySuccessResponse) + assert resp.media_buy_id == "mb_xyz" + + +@pytest.mark.asyncio +async def test_create_media_buy_handoff_path_returns_submitted_envelope( + executor, +) -> None: + """Adopter returns ctx.handoff_to_task(fn) → handler returns the + wire Submitted envelope (dict) instead of a Pydantic Success.""" + from adcp.types import CreateMediaBuyRequest + + async def _async_review(task_ctx): + return {"media_buy_id": "mb_after_review", "status": "active"} + + class _HybridPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="hello") + + async def create_media_buy(self, req, ctx): + return ctx.handoff_to_task(_async_review) + + handler = _make_handler(_HybridPlatform(), executor) + result = await handler.create_media_buy( + CreateMediaBuyRequest( + account={"account_id": "acct_a"}, + brand={"domain": "example.com"}, + idempotency_key="idem_aaaa1234567890", + start_time="2026-05-01T00:00:00Z", + end_time="2026-05-31T23:59:59Z", + ), + ToolContext(), + ) + # Wire envelope, not Pydantic. + assert isinstance(result, dict) + assert result["status"] == "submitted" + assert result["task_type"] == "create_media_buy" + + +# ---- update_media_buy — arg-projected (media_buy_id, patch, ctx) ---- + + +@pytest.mark.asyncio +async def test_update_media_buy_arg_projects_media_buy_id_and_patch( + executor, +) -> None: + """The shim splits UpdateMediaBuyRequest into separate + media_buy_id + patch kwargs — adopters write + ``update_media_buy(media_buy_id, patch, ctx)`` with the full + request as ``patch``.""" + from adcp.types import UpdateMediaBuyRequest, UpdateMediaBuySuccessResponse + + seen_args: dict[str, Any] = {} + + class _Platform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="hello") + + async def update_media_buy(self, media_buy_id, patch, ctx): + seen_args["media_buy_id"] = media_buy_id + seen_args["patch_paused"] = patch.paused + return UpdateMediaBuySuccessResponse( + media_buy_id=media_buy_id, + status="paused", + packages=[], + ) + + handler = _make_handler(_Platform(), executor) + req = UpdateMediaBuyRequest( + account={"account_id": "acct_a"}, + media_buy_id="mb_1", + idempotency_key="idem_bbbb1234567890", + paused=True, + ) + resp = await handler.update_media_buy(req, ToolContext()) + assert isinstance(resp, UpdateMediaBuySuccessResponse) + assert seen_args == {"media_buy_id": "mb_1", "patch_paused": True} + + +# ---- sync_creatives — hybrid for creative review ---- + + +@pytest.mark.asyncio +async def test_sync_creatives_routes_through_platform(executor) -> None: + from adcp.types import SyncCreativesRequest, SyncCreativesSuccessResponse + + class _Platform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="hello") + + async def sync_creatives(self, req, ctx): + return SyncCreativesSuccessResponse(creatives=[]) + + handler = _make_handler(_Platform(), executor) + # SyncCreativesRequest has tight validation (creatives minItems=1, + # asset URL+format requirements). The handler-level routing is + # already covered by get_products / create_media_buy / update_media_buy + # tests; a simpler invocation via model_construct(_fields_set=None) + # bypasses the pydantic validator and exercises the dispatch path. + req = SyncCreativesRequest.model_construct( + account={"account_id": "acct_a"}, + creatives=[], + idempotency_key="idem_cccc1234567890", + ) + resp = await handler.sync_creatives(req, ToolContext()) + assert isinstance(resp, SyncCreativesSuccessResponse) + + +# ---- no-account tools ---- + + +@pytest.mark.asyncio +async def test_list_creative_formats_resolves_with_no_ref(executor) -> None: + """Wire request has no ``account`` field; shim passes None to + AccountStore.resolve. SingletonAccounts handles the None case + (synthesizes anonymous), so the shim flow works.""" + from adcp.types import ListCreativeFormatsRequest, ListCreativeFormatsResponse + + class _Platform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="hello") + + async def list_creative_formats(self, req, ctx): + return ListCreativeFormatsResponse(formats=[]) + + handler = _make_handler(_Platform(), executor) + resp = await handler.list_creative_formats( + ListCreativeFormatsRequest(), + ToolContext(), + ) + assert isinstance(resp, ListCreativeFormatsResponse) + + +# ---- account-resolver Awaitable + sync paths both work ---- + + +@pytest.mark.asyncio +async def test_handler_awaits_async_account_resolver(executor) -> None: + """Custom AccountStore impls may be async — handler must await.""" + from adcp.decisioning.types import Account + from adcp.types import GetProductsRequest, GetProductsResponse + + received_id: list[str] = [] + + class _AsyncStore: + resolution = "explicit" + + async def resolve(self, ref, auth_info=None): + await asyncio.sleep(0) # actual async work + return Account(id="async-resolved") + + class _Platform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = _AsyncStore() + + async def get_products(self, req, ctx): + received_id.append(ctx.account.id) + return GetProductsResponse(products=[]) + + handler = _make_handler(_Platform(), executor) + await handler.get_products(GetProductsRequest(buying_mode="brief", brief="any"), ToolContext()) + assert received_id == ["async-resolved"] + + +@pytest.mark.asyncio +async def test_handler_extract_auth_info_from_dict(executor) -> None: + """Operators populating ctx.metadata['adcp.auth_info'] as a dict + (instead of an AuthInfo instance — common shape from generic + middleware) get re-coerced to AuthInfo.""" + from adcp.types import GetProductsRequest, GetProductsResponse + + received_kind: list[str] = [] + + class _Platform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="x") + + async def get_products(self, req, ctx): + received_kind.append(ctx.auth_info.kind if ctx.auth_info else "none") + return GetProductsResponse(products=[]) + + handler = _make_handler(_Platform(), executor) + ctx = ToolContext( + metadata={ + "adcp.auth_info": { + "kind": "bearer", + "principal": "buyer-y", + "scopes": ["read"], + } + } + ) + await handler.get_products( + GetProductsRequest(buying_mode="brief", brief="any inventory"), + ctx, + ) + assert received_kind == ["bearer"] From 5f76cbb46c33c0765950f358362a72440632f338 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 11:08:24 -0400 Subject: [PATCH 12/18] =?UTF-8?q?feat(decisioning):=20serve.py=20wrapper?= =?UTF-8?q?=20=E2=80=94=20public=20adopter=20surface?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stage 3 final piece. Two public entry points wire the foundation layers together: - create_adcp_server_from_platform(platform, ...) → (handler, executor, registry) 3-tuple. Adopters wanting full control over MCP/A2A wiring use this seam. - serve(platform, ...) → one-call wrapper that builds the handler and starts the MCP server via adcp.server.serve. Most adopters use this. Forwards host/port/transport/etc. via **serve_kwargs. Wires per the dispatch design doc: - D5 ThreadPoolExecutor configurability: * executor= (BYO operator-vetted pool — operator owns lifecycle) * thread_pool_size= (size the framework-allocated default) * default min(32, cpu+4) with thread_name_prefix="adcp-decisioning-" * executor= and thread_pool_size= are mutually exclusive - Emma #8 production-mode gate on InMemoryTaskRegistry: * Reads ADCP_ENV (case-insensitive {"prod", "production"} — same convention as adcp.validation.client_hooks._default_response_mode) * Refuses to start in production with InMemoryTaskRegistry unless ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1 explicitly set * Custom durable registry bypasses the gate - D15 state_reader / resource_resolver kwargs plumbed through to PlatformHandler. - validate_platform called before handler construction; failure surfaces as AdcpError to the caller. 24 tests in test_decisioning_serve.py covering all the above scenarios. Foundation tests: 133 (+24). Full suite: 2513 passed, 17 skipped, 1 xfailed. ruff + mypy clean. Stage 3 complete. Stage 4 next: examples/hello_seller.py + integration tests + ruff lint rule banning examples reaching into src/. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/adcp/decisioning/__init__.py | 6 + src/adcp/decisioning/serve.py | 250 ++++++++++++++++++++++++++ tests/test_decisioning_serve.py | 300 +++++++++++++++++++++++++++++++ 3 files changed, 556 insertions(+) create mode 100644 src/adcp/decisioning/serve.py create mode 100644 tests/test_decisioning_serve.py diff --git a/src/adcp/decisioning/__init__.py b/src/adcp/decisioning/__init__.py index f425be28e..4ecaf34e3 100644 --- a/src/adcp/decisioning/__init__.py +++ b/src/adcp/decisioning/__init__.py @@ -72,6 +72,10 @@ def create_media_buy( PropertyListReference, ResourceResolver, ) +from adcp.decisioning.serve import ( + create_adcp_server_from_platform, + serve, +) from adcp.decisioning.state import ( GovernanceContextJWS, Proposal, @@ -121,6 +125,8 @@ def create_media_buy( "TaskHandoffContext", "TaskRegistry", "TaskState", + "create_adcp_server_from_platform", + "serve", "WorkflowObjectType", "WorkflowStep", ] diff --git a/src/adcp/decisioning/serve.py b/src/adcp/decisioning/serve.py new file mode 100644 index 000000000..f1e7b6fea --- /dev/null +++ b/src/adcp/decisioning/serve.py @@ -0,0 +1,250 @@ +"""Public adopter surface for the v6.0 DecisioningPlatform framework. + +Two entry points: + +* :func:`create_adcp_server_from_platform` — build the + :class:`PlatformHandler` + supporting machinery (executor, registry) + from a :class:`DecisioningPlatform` instance and return them as a + 3-tuple ``(handler, executor, registry)``. Adopters wanting to + compose with their own MCP/A2A wiring use this seam. + +* :func:`serve` — the one-call wrapper that builds the handler AND + starts the MCP server. Most adopters call this. Mirrors + :func:`adcp.server.serve` for parity with the existing handler + workflow. + +Stage-3 wiring per the dispatch design doc: + +* D5 — explicit ``ThreadPoolExecutor`` for sync platform methods, + with three configuration knobs (``executor=`` / ``thread_pool_size=`` + / default ``min(32, cpu+4)``). Mutually exclusive validation; + framework owns lifecycle for default pools. +* Emma #8 — production-mode gate on :class:`InMemoryTaskRegistry`. + Reads ``ADCP_ENV`` (case-insensitive ``{"prod", "production"}`` — + same convention as + :func:`adcp.validation.client_hooks._default_response_mode`). Refuses + to start in production with the in-memory registry unless + ``ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1`` is set. +""" + +from __future__ import annotations + +import os +from concurrent.futures import ThreadPoolExecutor +from typing import TYPE_CHECKING, Any + +from adcp.decisioning.dispatch import validate_platform +from adcp.decisioning.handler import PlatformHandler +from adcp.decisioning.task_registry import InMemoryTaskRegistry +from adcp.decisioning.types import AdcpError + +if TYPE_CHECKING: + from adcp.decisioning.platform import DecisioningPlatform + from adcp.decisioning.resolve import ResourceResolver + from adcp.decisioning.state import StateReader + from adcp.decisioning.task_registry import TaskRegistry + + +def _is_production_env() -> bool: + """Detect production via ``ADCP_ENV`` env var. + + Case-insensitive ``{"prod", "production"}`` — matches the existing + SDK convention at + :func:`adcp.validation.client_hooks._default_response_mode` (the + same env var the validation hook reads). Reused here so adopters + don't manage two prod-detection mechanisms. + """ + val = os.environ.get("ADCP_ENV", "").strip().lower() + return val in {"prod", "production"} + + +def _default_thread_pool_size() -> int: + """Default executor size — ``min(32, cpu+4)`` per Python stdlib's + own ThreadPoolExecutor default. Adequate for hello-world / local + dev; sellers running sync DB drivers under load bump via + ``thread_pool_size=`` (or supply a custom ``executor=``). + """ + return min(32, (os.cpu_count() or 1) + 4) + + +def create_adcp_server_from_platform( + platform: DecisioningPlatform, + *, + executor: ThreadPoolExecutor | None = None, + thread_pool_size: int | None = None, + registry: TaskRegistry | None = None, + state_reader: StateReader | None = None, + resource_resolver: ResourceResolver | None = None, +) -> tuple[PlatformHandler, ThreadPoolExecutor, TaskRegistry]: + """Build the :class:`PlatformHandler` + supporting wiring from a + :class:`DecisioningPlatform`. + + Returns a 3-tuple ``(handler, executor, registry)``. The handler + wraps the platform; the executor is wired into dispatch for sync + platform methods; the registry handles + :class:`adcp.decisioning.TaskHandoff` lifecycle. + + Adopters who need full control over the MCP server wiring use this + seam — compose the returned handler with their own + :func:`adcp.server.create_mcp_server` call. Most adopters use + :func:`serve` instead. + + Validates the platform at server boot via + :func:`validate_platform` — fails fast on missing specialism + methods, missing ``accounts``, governance opt-in violations + (D15 round-4), and unknown specialisms (UserWarning per round-3 + D14). + + :param platform: The adopter's :class:`DecisioningPlatform` + subclass instance. + :param executor: Bring-your-own :class:`ThreadPoolExecutor` — + for operators with audit-instrumented thread pools or + wrappers around stdlib's executor. Mutually exclusive with + ``thread_pool_size``. Operator owns lifecycle (caller's + ``shutdown(wait=True)`` responsibility). + :param thread_pool_size: Size the default framework-allocated + executor. Mutually exclusive with ``executor``. Default is + :func:`_default_thread_pool_size`. + :param registry: Bring-your-own :class:`TaskRegistry` — typically + a v6.1 durable backing store. Default is + :class:`InMemoryTaskRegistry`, which the production-mode + gate refuses unless + ``ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1`` is set. + :param state_reader: Custom :class:`StateReader` impl + (D15 — workflow-state reads). Default is the v6.0 stub + (empty returns + one-time UserWarning per method). + :param resource_resolver: Custom :class:`ResourceResolver` impl + (D15 — async framework-mediated fetches). Default is the + v6.0 stub (raises ``NotImplementedError`` with a pointer to + v6.1). + + :raises ValueError: when ``executor`` and ``thread_pool_size`` are + both supplied (D5 mutually-exclusive validation). + :raises AdcpError: from :func:`validate_platform` when the + platform fails server-boot validation, OR when the production + gate refuses :class:`InMemoryTaskRegistry`. + """ + # D5: executor / thread_pool_size mutually exclusive. + if executor is not None and thread_pool_size is not None: + raise ValueError( + "Pass either executor= or thread_pool_size=, not both. " + "thread_pool_size sizes the default executor; executor= is " + "for operators wiring an audit-instrumented or otherwise " + "vetted threadpool." + ) + + # Allocate executor. + if executor is None: + size = thread_pool_size if thread_pool_size is not None else _default_thread_pool_size() + executor = ThreadPoolExecutor( + max_workers=size, + thread_name_prefix="adcp-decisioning-", + ) + + # Allocate registry, with production-mode gate (Emma #8). + if registry is None: + registry = InMemoryTaskRegistry() + if isinstance(registry, InMemoryTaskRegistry) and _is_production_env(): + opt_in = os.environ.get("ADCP_DECISIONING_ALLOW_INMEMORY_TASKS", "").strip() + if opt_in != "1": + raise AdcpError( + "INVALID_REQUEST", + message=( + "InMemoryTaskRegistry refuses to start in production " + "(ADCP_ENV is 'prod' or 'production'). HITL flows " + "depend on the registry — silent in-memory fallback " + "would lose tasks across process restarts. Either " + "wire a durable TaskRegistry impl (see " + "adcp.decisioning.TaskRegistry Protocol; v6.1 ships " + "PostgresTaskRegistry) OR set " + "ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1 to " + "explicitly opt into in-memory tasks (e.g., for " + "single-process pilots)." + ), + recovery="terminal", + details={ + "registry": type(registry).__name__, + "ADCP_ENV": os.environ.get("ADCP_ENV", ""), + }, + ) + + # Validate the platform AFTER executor + registry exist so any + # validation diagnostic includes the wiring context. Failure here + # propagates to the caller. + validate_platform(platform) + + handler = PlatformHandler( + platform, + executor=executor, + registry=registry, + state_reader=state_reader, + resource_resolver=resource_resolver, + ) + return handler, executor, registry + + +def serve( + platform: DecisioningPlatform, + *, + name: str | None = None, + executor: ThreadPoolExecutor | None = None, + thread_pool_size: int | None = None, + registry: TaskRegistry | None = None, + state_reader: StateReader | None = None, + resource_resolver: ResourceResolver | None = None, + advertise_all: bool = False, + **serve_kwargs: Any, +) -> None: + """One-call wrapper — build the handler and serve over MCP. + + Most adopters use this. For full control, use + :func:`create_adcp_server_from_platform` and compose with + :func:`adcp.server.create_mcp_server` / ``serve()`` directly. + + :param platform: The :class:`DecisioningPlatform` subclass + instance. + :param name: Server name advertised on AdCP capabilities. Defaults + to the platform class's ``__name__``. + :param executor: BYO :class:`ThreadPoolExecutor` per + :func:`create_adcp_server_from_platform` D5 contract. + :param thread_pool_size: Default-executor size override. + :param registry: BYO :class:`TaskRegistry`. Default is + :class:`InMemoryTaskRegistry` (gated for production). + :param state_reader: Custom :class:`StateReader` impl (D15). + :param resource_resolver: Custom :class:`ResourceResolver` impl (D15). + :param advertise_all: Forwarded to :func:`adcp.server.serve`. When + ``True``, ``tools/list`` advertises every method on the + handler regardless of override status. Default ``False`` — + the override-detection filter trims unimplemented platform + methods. Adopters with explicit-not-supported intent (e.g., + spec-compliance storyboards) pass ``True``. + :param serve_kwargs: Forwarded to :func:`adcp.server.serve`. Use + for ``host``, ``port``, ``transport``, ``test_controller``, + ``context_factory``, ``middleware``, etc. + """ + # Local import to avoid a circular at module-load time. Adopter + # serves never run during foundation imports anyway. + from adcp.server.serve import serve as _adcp_serve + + handler, _executor, _registry = create_adcp_server_from_platform( + platform, + executor=executor, + thread_pool_size=thread_pool_size, + registry=registry, + state_reader=state_reader, + resource_resolver=resource_resolver, + ) + + server_name = name or type(platform).__name__ + _adcp_serve( + handler, + name=server_name, + advertise_all=advertise_all, + **serve_kwargs, + ) + + +__all__ = [ + "create_adcp_server_from_platform", + "serve", +] diff --git a/tests/test_decisioning_serve.py b/tests/test_decisioning_serve.py new file mode 100644 index 000000000..a6f89caa4 --- /dev/null +++ b/tests/test_decisioning_serve.py @@ -0,0 +1,300 @@ +"""Unit tests for adcp.decisioning.serve. + +Covers: + +* :func:`create_adcp_server_from_platform` — builds the handler + + validates the platform + wires executor + registry. +* :func:`serve` — one-call wrapper smoke (we don't actually start + an MCP server in tests; the wrapper composition is verified by + inspecting that ``create_adcp_server_from_platform`` would have + been called with the right kwargs via mock). +* D5 executor configurability — BYO ``executor=`` AND ``thread_pool_size=`` + are mutually exclusive; default fires ``min(32, cpu+4)``. +* Emma #8 production-mode gate — ``ADCP_ENV in {prod, production}`` + with ``InMemoryTaskRegistry`` raises unless + ``ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1``. +""" + +from __future__ import annotations + +import os +from concurrent.futures import ThreadPoolExecutor +from unittest.mock import patch + +import pytest + +from adcp.decisioning import ( + AdcpError, + DecisioningCapabilities, + DecisioningPlatform, + InMemoryTaskRegistry, + SingletonAccounts, +) +from adcp.decisioning.serve import ( + _default_thread_pool_size, + _is_production_env, + create_adcp_server_from_platform, +) + + +class _BarePlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="hello") + + +# ---- _is_production_env ---- + + +def test_is_production_env_default_false() -> None: + with patch.dict(os.environ, {}, clear=False): + os.environ.pop("ADCP_ENV", None) + assert _is_production_env() is False + + +@pytest.mark.parametrize("value", ["prod", "production", "PROD", "Production"]) +def test_is_production_env_recognizes_prod_aliases(value: str) -> None: + with patch.dict(os.environ, {"ADCP_ENV": value}): + assert _is_production_env() is True + + +@pytest.mark.parametrize("value", ["dev", "staging", "test", "preprod", ""]) +def test_is_production_env_rejects_non_prod_values(value: str) -> None: + with patch.dict(os.environ, {"ADCP_ENV": value}): + assert _is_production_env() is False + + +# ---- _default_thread_pool_size ---- + + +def test_default_thread_pool_size_capped_at_32() -> None: + """Stdlib's ThreadPoolExecutor uses min(32, cpu+4) — we mirror + that. The 32 ceiling matters on large machines so the framework + doesn't accidentally over-allocate threads.""" + size = _default_thread_pool_size() + assert 1 <= size <= 32 + + +# ---- create_adcp_server_from_platform — happy path ---- + + +def test_create_returns_handler_executor_registry_tuple() -> None: + """Returns the 3-tuple per the public contract.""" + platform = _BarePlatform() + handler, executor, registry = create_adcp_server_from_platform(platform) + assert handler is not None + assert isinstance(executor, ThreadPoolExecutor) + assert isinstance(registry, InMemoryTaskRegistry) + executor.shutdown(wait=True) + + +def test_create_default_executor_uses_named_threads() -> None: + """Framework-allocated default executor sets a thread_name_prefix + for operator visibility (D5).""" + platform = _BarePlatform() + _, executor, _ = create_adcp_server_from_platform(platform) + # We can't easily inspect ThreadPoolExecutor's prefix without + # submitting a task — verify via thread name lookup. + fut = executor.submit(lambda: __import__("threading").current_thread().name) + name = fut.result(timeout=2.0) + assert name.startswith("adcp-decisioning-"), f"Expected adcp-decisioning- prefix, got: {name}" + executor.shutdown(wait=True) + + +# ---- D5 — executor / thread_pool_size mutually exclusive ---- + + +def test_create_rejects_both_executor_and_thread_pool_size() -> None: + platform = _BarePlatform() + custom = ThreadPoolExecutor(max_workers=2) + try: + with pytest.raises(ValueError, match="not both"): + create_adcp_server_from_platform( + platform, + executor=custom, + thread_pool_size=8, + ) + finally: + custom.shutdown(wait=True) + + +def test_create_uses_byo_executor_unchanged() -> None: + """Operator-supplied executor is wired through verbatim — same + instance the caller passed in.""" + platform = _BarePlatform() + custom = ThreadPoolExecutor(max_workers=2, thread_name_prefix="byo-") + try: + _, executor, _ = create_adcp_server_from_platform(platform, executor=custom) + assert executor is custom + finally: + custom.shutdown(wait=True) + + +def test_create_thread_pool_size_overrides_default() -> None: + """``thread_pool_size=`` sizes the framework-allocated default + executor.""" + platform = _BarePlatform() + _, executor, _ = create_adcp_server_from_platform(platform, thread_pool_size=2) + assert executor._max_workers == 2 # type: ignore[attr-defined] + executor.shutdown(wait=True) + + +# ---- Emma #8 production-mode gate ---- + + +def test_create_raises_in_production_with_default_in_memory_registry() -> None: + """ADCP_ENV=production + default InMemoryTaskRegistry + no opt-in + → AdcpError. Sales-broadcast-tv adopters depend on the registry; + silent in-memory fallback would lose tasks across restarts.""" + platform = _BarePlatform() + with patch.dict( + os.environ, + {"ADCP_ENV": "production"}, + clear=False, + ): + os.environ.pop("ADCP_DECISIONING_ALLOW_INMEMORY_TASKS", None) + with pytest.raises(AdcpError) as exc_info: + create_adcp_server_from_platform(platform) + assert exc_info.value.code == "INVALID_REQUEST" + msg = str(exc_info.value) + assert "InMemoryTaskRegistry" in msg + assert "ADCP_DECISIONING_ALLOW_INMEMORY_TASKS" in msg + + +def test_create_passes_in_production_with_explicit_opt_in() -> None: + """The opt-in env var lets adopters explicitly accept in-memory + tasks in prod (e.g., for single-process pilots). Setting it to + '1' bypasses the gate.""" + platform = _BarePlatform() + with patch.dict( + os.environ, + { + "ADCP_ENV": "production", + "ADCP_DECISIONING_ALLOW_INMEMORY_TASKS": "1", + }, + ): + handler, executor, registry = create_adcp_server_from_platform(platform) + assert isinstance(registry, InMemoryTaskRegistry) + executor.shutdown(wait=True) + + +def test_create_passes_in_production_with_custom_durable_registry() -> None: + """When the operator supplies a non-InMemory registry, the gate + doesn't fire — a v6.1-style PostgresTaskRegistry would be + accepted in prod without the opt-in.""" + + class _DurableStub: + async def issue(self, *, account_id, task_type): + return "task_x" + + async def update_progress(self, task_id, progress): + pass + + async def complete(self, task_id, result): + pass + + async def fail(self, task_id, error): + pass + + async def get(self, task_id, *, expected_account_id=None): + return None + + platform = _BarePlatform() + custom_reg = _DurableStub() + with patch.dict(os.environ, {"ADCP_ENV": "production"}): + os.environ.pop("ADCP_DECISIONING_ALLOW_INMEMORY_TASKS", None) + handler, executor, registry = create_adcp_server_from_platform( + platform, registry=custom_reg # type: ignore[arg-type] + ) + assert registry is custom_reg + executor.shutdown(wait=True) + + +def test_create_passes_in_dev_env_with_default_registry() -> None: + """No prod gate — defaults work in local dev / CI.""" + platform = _BarePlatform() + with patch.dict(os.environ, {"ADCP_ENV": "dev"}): + handler, executor, registry = create_adcp_server_from_platform(platform) + assert isinstance(registry, InMemoryTaskRegistry) + executor.shutdown(wait=True) + + +# ---- Validation pass-through ---- + + +def test_create_propagates_validate_platform_failure() -> None: + """validate_platform's failure (missing required methods, etc.) + propagates from create_adcp_server_from_platform — the caller + sees the structured AdcpError before any wiring is exposed.""" + + class _PartialSalesPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities(specialisms=["sales-non-guaranteed"]) + accounts = SingletonAccounts(account_id="x") + # Missing all 5 required sales-* methods. + + with pytest.raises(AdcpError) as exc_info: + create_adcp_server_from_platform(_PartialSalesPlatform()) + assert exc_info.value.code == "INVALID_REQUEST" + assert "missing" in str(exc_info.value).lower() + + +def test_create_propagates_governance_opt_in_failure() -> None: + """D15 governance fail-fast surfaces from + create_adcp_server_from_platform.""" + + class _UnsafeGovernancePlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities( + specialisms=["governance-spend-authority"], + governance_aware=False, + ) + accounts = SingletonAccounts(account_id="x") + + with pytest.raises(AdcpError) as exc_info: + create_adcp_server_from_platform(_UnsafeGovernancePlatform()) + assert "governance" in str(exc_info.value).lower() + + +# ---- Custom state_reader / resource_resolver plumbing (D15) ---- + + +def test_create_threads_state_reader_to_handler() -> None: + """Custom StateReader impl flows through to the handler so when + the handler hydrates RequestContext per request, adopter platform + methods see ``ctx.state.`` instead of the + v6.0 stub.""" + + class _CustomStateReader: + def find_by_object(self, t, i): + return () + + def find_proposal_by_id(self, p): + return None + + def governance_context(self): + return None + + def workflow_steps(self): + return () + + custom = _CustomStateReader() + platform = _BarePlatform() + handler, executor, _ = create_adcp_server_from_platform(platform, state_reader=custom) + assert handler._state_reader is custom + executor.shutdown(wait=True) + + +def test_create_threads_resource_resolver_to_handler() -> None: + class _CustomResolver: + async def property_list(self, list_id): + return None + + async def collection_list(self, list_id): + return None + + async def creative_format(self, format_id, *, revalidate=False): + return None + + custom = _CustomResolver() + platform = _BarePlatform() + handler, executor, _ = create_adcp_server_from_platform(platform, resource_resolver=custom) + assert handler._resource_resolver is custom + executor.shutdown(wait=True) From c2b04073be659053181bab9175012061e9b317f2 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 11:30:38 -0400 Subject: [PATCH 13/18] =?UTF-8?q?fix(decisioning):=20Stage-3=20review=20P0?= =?UTF-8?q?=20fixes=20=E2=80=94=20wiring,=20GC,=20durability=20marker?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three independent reviewer passes (code-reviewer, security-reviewer, python-expert) flagged six P0/security blockers in Stage 3. All fixed; full suite 2519 passed, mypy + ruff clean. P0 fixes: 1. compose_caller_identity wired into _build_request_context. Was exported + tested but never called from the dispatch path — D9 round-3 cross-store cache isolation did not exist at runtime. _build_request_context now accepts store= and sets ctx.caller_identity to the composite key. handler.py passes self._platform.accounts on every dispatch. 2. Background _run() task strong-referenced. asyncio.create_task only weak-refs; under GC pressure tasks vanish before completion, leaving registry stuck in 'submitted' forever. Tracked in module-level _BACKGROUND_HANDOFF_TASKS set with add_done_callback cleanup. Documented Python footgun. 3. Production gate uses is_durable marker, not isinstance. The isinstance(registry, InMemoryTaskRegistry) check was bypassable by duck-typed re-implementations AND fired incorrectly on safe instrumentation subclasses. New TaskRegistry Protocol declares is_durable: ClassVar[bool]; InMemoryTaskRegistry sets False. Subclasses inherit False (gate fires); custom durable impls set True explicitly. Safe-by-default. 4. Empty/ account_id rejected. AccountStore returning Account(id="") or default Account(id="") silently collapsed every empty-id tenant into one cache scope class — cross-tenant data leak. Both compose_caller_identity AND InMemoryTaskRegistry.issue now reject empty/whitespace/ fail-fast. 5. compose_caller_identity uses module + qualname. __qualname__ alone collides for two MyStore classes in different packages. Now composes f"{module}.{qualname}:{account.id}". 6. _project_handoff contextvars comment corrected. Comment claimed asyncio.create_task auto-snapshots — it inherits, not snapshots. Updated to explain the inherit-by-reference semantics and why it's the right behavior here. Test additions: - test_compose_caller_identity_uses_module_qualname_and_account_id (replaces qualname-only test) - test_compose_caller_identity_rejects_empty_account_id - test_build_request_context_uses_composite_key_when_store_supplied (the load-bearing wiring regression) - test_handoff_background_task_is_strong_referenced - test_create_passes_in_production_with_custom_durable_registry (updated to use is_durable marker) - test_create_raises_when_inmemory_subclass_used_in_production (subclass-bypass regression) - test_create_raises_when_duck_typed_non_durable_used_in_production (safe-by-default regression) - test_in_memory_task_registry_is_not_durable Foundation tests: 145 (+12). Full suite: 2519 passed, 17 skipped, 1 xfailed. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/adcp/decisioning/dispatch.py | 97 +++++++++++++++++++++---- src/adcp/decisioning/handler.py | 5 +- src/adcp/decisioning/serve.py | 25 +++++-- src/adcp/decisioning/task_registry.py | 50 +++++++++++-- tests/test_decisioning_dispatch.py | 86 +++++++++++++++++++++- tests/test_decisioning_serve.py | 59 ++++++++++++++- tests/test_decisioning_task_registry.py | 19 ++++- 7 files changed, 305 insertions(+), 36 deletions(-) diff --git a/src/adcp/decisioning/dispatch.py b/src/adcp/decisioning/dispatch.py index 8d05d9629..e23dfcbf7 100644 --- a/src/adcp/decisioning/dispatch.py +++ b/src/adcp/decisioning/dispatch.py @@ -336,21 +336,47 @@ def compose_caller_identity( account: Account[Any], store: AccountStore[Any], ) -> str: - """Compose the cache scope key from store qualname + account id. + """Compose the cache scope key from ``module + qualname + account.id``. - Round-3 D9: the framework's idempotency middleware reads - ``ctx.caller_identity`` for cache scoping. Using ``account.id`` + Round-3 D9 + Round-4 review: the framework's idempotency middleware + reads ``ctx.caller_identity`` for cache scoping. Using ``account.id`` alone leaks across stores when two adopters use different ``AccountStore`` impls but happen to mint colliding ids. The - composite ``f"{store qualname}:{account.id}"`` gives structural - cross-store isolation at zero coordination cost. - - Within-store collisions (one impl, identical ``account.id`` for - two distinct accounts) remain an adopter bug at + composite ``f"{store_module}.{store_qualname}:{account.id}"`` gives + structural cross-store isolation at zero coordination cost. + + Includes ``__module__`` because ``__qualname__`` is the dotted path + *within* a module — two ``MyStore`` classes in different packages + share the same qualname. Without the module prefix the isolation + promise breaks across cross-package re-implementations. + + Empty / whitespace ``account.id`` raises ``AdcpError`` — + ``Account(id="")`` would silently collapse every tenant whose + AccountStore returns the empty default into a single cache scope. + The dataclass default ``Account(id="")`` is also rejected so + a misconfigured store that forgets to populate ``id`` fails fast + rather than leaking buy-side data. + + Within-store collisions (one impl, identical ``account.id`` for two + distinct accounts) remain an adopter bug at ``AccountStore.resolve``; the framework can't structurally prevent that without a runtime registry costing more than it buys. """ - return f"{type(store).__qualname__}:{account.id}" + if not account.id or not account.id.strip() or account.id == "": + raise AdcpError( + "INVALID_REQUEST", + message=( + f"AccountStore returned an account with empty/unset id " + f"({account.id!r}). The framework refuses to scope the " + "idempotency cache by an empty key — every empty-id " + "tenant would share state. Fix: ensure your " + "AccountStore.resolve always returns Account(id=) " + "and never leaves the dataclass default." + ), + recovery="terminal", + ) + cls = type(store) + return f"{cls.__module__}.{cls.__qualname__}:{account.id}" # --------------------------------------------------------------------------- @@ -363,6 +389,7 @@ def _build_request_context( account: Account[Any], auth_info: AuthInfo | None, *, + store: AccountStore[Any] | None = None, state_reader: Any | None = None, resource_resolver: Any | None = None, ) -> RequestContext[Any]: @@ -374,14 +401,27 @@ def _build_request_context( :class:`adcp.decisioning.RequestContext` carries the ``@internal-construction`` note). + Sets ``ctx.caller_identity`` to the composite cache scope key + via :func:`compose_caller_identity` when ``store`` is supplied. + Wiring this is critical — it's the framework's idempotency + middleware's only safeguard against cross-store cache collisions + (D9 round-3). When ``store`` is ``None`` (test fixtures, custom + dispatch paths), falls back to ``tool_ctx.caller_identity`` + verbatim. Production callers from ``handler.py`` always supply + the store. + :param tool_ctx: The framework's :class:`ToolContext` from the underlying transport. Carries ``request_id``, ``tenant_id``, - and ``metadata``; we extend its caller_identity to the + and ``metadata``; we override its caller_identity to the composite key. :param account: Resolved account from the platform's :class:`AccountStore.resolve`. :param auth_info: Optional verified principal info — when present, ``auth_principal`` is populated from ``auth_info.principal``. + :param store: The AccountStore that produced ``account``. Required + for the production cache-isolation guarantee; the dispatch + adapter always supplies it. Test fixtures may pass ``None`` + to skip the composite-key derivation. :param state_reader: Custom ``StateReader`` impl. Defaults to the v6.0 stub. Accept as a parameter so ``serve()`` can wire a v6.1 backing store without touching dispatch. @@ -396,12 +436,20 @@ def _build_request_context( auth_principal = auth_info.principal if auth_info is not None else None + # Composite cache scope key when store is supplied (production + # path). Falls back to tool_ctx.caller_identity for test fixtures. + caller_identity: str | None + if store is not None: + caller_identity = compose_caller_identity(account, store) + else: + caller_identity = tool_ctx.caller_identity + # Build the RequestContext with the explicit state/resolve kwargs # if provided; otherwise let the dataclass default factories # supply the v6.0 stubs. ctx_kwargs: dict[str, Any] = { "request_id": tool_ctx.request_id, - "caller_identity": tool_ctx.caller_identity, + "caller_identity": caller_identity, "tenant_id": tool_ctx.tenant_id, "metadata": dict(tool_ctx.metadata), "account": account, @@ -604,9 +652,22 @@ async def _run() -> None: # the typed Pydantic response. await registry.complete(task_id, {"value": str(result)}) - # ``asyncio.create_task`` snapshots contextvars automatically - # — no explicit copy needed at this site. - asyncio.create_task(_run()) + # ``asyncio.create_task`` only weak-refs the resulting Task — under + # GC pressure or with no outer awaiter, the task can be collected + # mid-flight, leaving the registry stuck in 'submitted' forever. + # Track in a module-level set with a done-callback that discards + # the entry once the task completes. Documented Python footgun: + # https://docs.python.org/3/library/asyncio-task.html#creating-tasks + # + # Per Python 3.11+ semantics, ``asyncio.create_task`` inherits the + # current task's ContextVar state by reference (NOT a snapshot). + # That's the right behavior here — the background task should see + # the request-scope ContextVars set by middleware, NOT a stale + # snapshot from before middleware ran. Sync handoffs go through + # ``run_in_executor`` with explicit ``copy_context`` inside ``_run``. + bg_task = asyncio.create_task(_run(), name=f"adcp-handoff-{task_id}") + _BACKGROUND_HANDOFF_TASKS.add(bg_task) + bg_task.add_done_callback(_BACKGROUND_HANDOFF_TASKS.discard) # Wire ``Submitted`` envelope per spec. return { @@ -616,6 +677,14 @@ async def _run() -> None: } +#: Strong-ref the in-flight handoff tasks so the asyncio loop's +#: weak-ref behavior doesn't garbage-collect them mid-flight. Each +#: completed task removes itself via :meth:`asyncio.Task.add_done_callback`. +#: Module-level so the set survives across requests; framework-internal, +#: never exported. +_BACKGROUND_HANDOFF_TASKS: set[asyncio.Task[None]] = set() + + __all__ = [ "REQUIRED_METHODS_PER_SPECIALISM", "compose_caller_identity", diff --git a/src/adcp/decisioning/handler.py b/src/adcp/decisioning/handler.py index 45f766f31..0208c5cf6 100644 --- a/src/adcp/decisioning/handler.py +++ b/src/adcp/decisioning/handler.py @@ -210,12 +210,15 @@ def _build_ctx( account: Account[Any], ) -> Any: """Wrap :func:`_build_request_context` with the handler's - wired StateReader / ResourceResolver overrides.""" + wired StateReader / ResourceResolver overrides AND the + platform's AccountStore (for D9 round-3 composite cache + scope-key derivation).""" auth_info = self._extract_auth_info(tool_ctx) return _build_request_context( tool_ctx, account, auth_info, + store=self._platform.accounts, state_reader=self._state_reader, resource_resolver=self._resource_resolver, ) diff --git a/src/adcp/decisioning/serve.py b/src/adcp/decisioning/serve.py index f1e7b6fea..dc1034945 100644 --- a/src/adcp/decisioning/serve.py +++ b/src/adcp/decisioning/serve.py @@ -142,21 +142,29 @@ def create_adcp_server_from_platform( ) # Allocate registry, with production-mode gate (Emma #8). + # Gate reads the registry's is_durable class-level marker rather + # than `isinstance(registry, InMemoryTaskRegistry)`. Two reasons: + # 1. Adopters subclassing InMemoryTaskRegistry for instrumentation + # inherit `is_durable=False` and correctly trip the gate. + # 2. Adopters duck-typing a custom in-memory store would bypass + # the isinstance check; the marker is opt-in for durability, + # defaulting safe. if registry is None: registry = InMemoryTaskRegistry() - if isinstance(registry, InMemoryTaskRegistry) and _is_production_env(): + is_durable = bool(getattr(registry, "is_durable", False)) + if not is_durable and _is_production_env(): opt_in = os.environ.get("ADCP_DECISIONING_ALLOW_INMEMORY_TASKS", "").strip() if opt_in != "1": raise AdcpError( "INVALID_REQUEST", message=( - "InMemoryTaskRegistry refuses to start in production " - "(ADCP_ENV is 'prod' or 'production'). HITL flows " - "depend on the registry — silent in-memory fallback " - "would lose tasks across process restarts. Either " - "wire a durable TaskRegistry impl (see " - "adcp.decisioning.TaskRegistry Protocol; v6.1 ships " - "PostgresTaskRegistry) OR set " + f"Non-durable TaskRegistry ({type(registry).__name__}) " + "refuses to start in production (ADCP_ENV is 'prod' " + "or 'production'). HITL flows depend on the registry " + "— silent in-memory fallback would lose tasks across " + "process restarts. Either wire a durable " + "TaskRegistry impl (set is_durable=True on the class; " + "v6.1 ships PostgresTaskRegistry) OR set " "ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1 to " "explicitly opt into in-memory tasks (e.g., for " "single-process pilots)." @@ -164,6 +172,7 @@ def create_adcp_server_from_platform( recovery="terminal", details={ "registry": type(registry).__name__, + "is_durable": is_durable, "ADCP_ENV": os.environ.get("ADCP_ENV", ""), }, ) diff --git a/src/adcp/decisioning/task_registry.py b/src/adcp/decisioning/task_registry.py index 0040937df..260daeb75 100644 --- a/src/adcp/decisioning/task_registry.py +++ b/src/adcp/decisioning/task_registry.py @@ -40,7 +40,7 @@ import uuid from collections.abc import Awaitable, Callable from dataclasses import dataclass, field -from typing import Any, Literal, Protocol, runtime_checkable +from typing import Any, ClassVar, Literal, Protocol, runtime_checkable #: Terminal task states per AdCP 3.0 spec (``enums/task-status.json``). #: ``submitted`` = task created but not yet started; ``working`` = adopter @@ -123,6 +123,18 @@ class TaskRegistry(Protocol): """Per-account task store — the seam adopters substitute for a durable backing implementation. + **Durability marker** (``is_durable: ClassVar[bool]``): + + Production deployments running ``sales-broadcast-tv`` or any HITL + flow refuse to start with a non-durable registry unless the + operator explicitly opts in via + ``ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1``. The framework reads + ``registry.is_durable`` to make this decision; subclassing + :class:`InMemoryTaskRegistry` for instrumentation does NOT bypass + the gate (the subclass inherits ``is_durable = False``). Custom + durable impls MUST set ``is_durable = True`` explicitly. The + Protocol declares this as a class-level ``bool``. + Lifecycle (framework-driven; adopters call only :meth:`TaskHandoffContext` methods, not these directly): @@ -157,6 +169,14 @@ class TaskRegistry(Protocol): the regression suite. """ + #: Whether this registry persists tasks across process restarts. + #: ``False`` for in-memory / lossy impls; ``True`` for durable + #: backings (PostgreSQL, Redis, etc.). The framework's + #: production-mode gate refuses non-durable registries unless + #: the operator explicitly opts in via + #: ``ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1``. + is_durable: ClassVar[bool] + async def issue( self, *, @@ -259,13 +279,17 @@ class InMemoryTaskRegistry: Production-mode gate: :func:`adcp.decisioning.serve.serve` refuses to wire this when ``ADCP_ENV`` indicates production unless - ``ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1`` is set. The gate lives - in ``serve.py`` (Stage 3 dispatch) — this class itself is benign; - constructing it is fine. Production sellers running - ``sales-broadcast-tv`` or any HITL flow get the explicit refusal - so silent in-memory fallback can't bite oncall. + ``ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1`` is set. The gate + reads ``registry.is_durable``; subclassing this class for + instrumentation does NOT bypass the gate (the ``False`` is + inherited). Custom durable impls set ``is_durable = True`` + explicitly. Production sellers running ``sales-broadcast-tv`` + or any HITL flow get the explicit refusal so silent in-memory + fallback can't bite oncall. """ + is_durable: ClassVar[bool] = False + def __init__(self) -> None: self._records: dict[str, TaskRecord] = {} self._lock = asyncio.Lock() @@ -276,6 +300,20 @@ async def issue( account_id: str, task_type: str, ) -> str: + # Reject empty/unset account_id at issue-time. Without this, + # two tenants whose AccountStore returns Account(id="") or the + # default Account(id="") share a cache scope class and + # can read each other's tasks via cross-tenant probe (the + # equality check passes when both are empty). See + # tests/test_decisioning_task_registry_cross_tenant.py for the + # regression suite. + if not account_id or not account_id.strip() or account_id == "": + raise ValueError( + f"account_id must be a non-empty, non-default string; " + f"got {account_id!r}. AccountStore.resolve must always " + "return Account(id=) so cross-tenant cache " + "scoping works correctly." + ) task_id = f"task_{uuid.uuid4().hex[:16]}" async with self._lock: self._records[task_id] = TaskRecord( diff --git a/tests/test_decisioning_dispatch.py b/tests/test_decisioning_dispatch.py index b2b659b0b..5b9689542 100644 --- a/tests/test_decisioning_dispatch.py +++ b/tests/test_decisioning_dispatch.py @@ -220,11 +220,28 @@ def test_required_methods_per_specialism_pinned_for_sales() -> None: # ---- compose_caller_identity (D9 round-3) ---- -def test_compose_caller_identity_uses_store_qualname_and_account_id() -> None: +def test_compose_caller_identity_uses_module_qualname_and_account_id() -> None: + """Composite key is ``module.qualname:account_id``. Includes + ``__module__`` because two ``MyStore`` classes in different + packages share ``__qualname__`` — structural cross-MODULE + isolation (round-4 review).""" store = SingletonAccounts(account_id="acme") account: Account[Any] = Account(id="acme:buyer-a") key = compose_caller_identity(account, store) - assert key == "SingletonAccounts:acme:buyer-a" + assert key == "adcp.decisioning.accounts.SingletonAccounts:acme:buyer-a" + + +def test_compose_caller_identity_rejects_empty_account_id() -> None: + """Empty/whitespace/ account.id raises — Account(id="") + or the dataclass default would silently collapse every empty-id + tenant into one cache scope class (P0 security fix from round-4 + review).""" + store = SingletonAccounts(account_id="x") + for bogus in ("", " ", ""): + with pytest.raises(AdcpError) as exc_info: + compose_caller_identity(Account(id=bogus), store) + assert exc_info.value.code == "INVALID_REQUEST" + assert "empty" in str(exc_info.value).lower() or "unset" in str(exc_info.value).lower() def test_compose_caller_identity_isolates_across_stores() -> None: @@ -263,11 +280,27 @@ def test_build_request_context_threads_account_and_auth() -> None: assert ctx.auth_info is auth assert ctx.auth_principal == "buyer-a" assert ctx.request_id == "req_1" + # Without ``store=`` (test fixture path), caller_identity falls + # back to tool_ctx.caller_identity. The composite-key path is + # exercised by test_build_request_context_uses_composite_key_when_store_supplied. assert ctx.caller_identity == "caller_x" assert ctx.tenant_id == "tenant_y" assert ctx.metadata == {"foo": "bar"} +def test_build_request_context_uses_composite_key_when_store_supplied() -> None: + """P0 round-4 regression: ``_build_request_context`` MUST set + ``ctx.caller_identity`` to the composite key when ``store=`` is + supplied. Without this wiring, idempotency middleware caches by + raw ``tool_ctx.caller_identity`` and D9 round-3 cross-store + isolation does not exist at runtime.""" + store = SingletonAccounts(account_id="acme") + account: Account[Any] = Account(id="acme:buyer-a") + tool_ctx = ToolContext(caller_identity="raw-original") + ctx = _build_request_context(tool_ctx, account, None, store=store) + assert ctx.caller_identity == ("adcp.decisioning.accounts.SingletonAccounts:acme:buyer-a") + + def test_build_request_context_with_no_auth() -> None: """Unauthenticated dev path (singleton fixtures): auth_principal is None, auth_info is None.""" @@ -683,6 +716,55 @@ def _sync_handoff_fn(task_ctx): assert rec["result"]["thread"].startswith("test-dispatch-") +@pytest.mark.asyncio +async def test_handoff_background_task_is_strong_referenced( + executor: ThreadPoolExecutor, +) -> None: + """P0 round-4 regression: ``asyncio.create_task`` only weak-refs + the resulting Task; under GC pressure the loop can collect the + background task before it completes, leaving the registry stuck + in 'submitted' forever. Fix: the framework tracks pending tasks + in a module-level set with done-callback cleanup. Test asserts + the set membership is correct during the task's lifetime.""" + from adcp.decisioning.dispatch import _BACKGROUND_HANDOFF_TASKS + + registry = InMemoryTaskRegistry() + ctx = _build_request_context(ToolContext(), Account(id="acct_a"), None) + started = asyncio.Event() + finish = asyncio.Event() + + async def _handoff_fn(task_ctx): + started.set() + await finish.wait() + return {"done": True} + + initial_size = len(_BACKGROUND_HANDOFF_TASKS) + envelope = await _project_handoff( + TaskHandoff(_handoff_fn), + ctx, + method_name="create_media_buy", + registry=registry, + executor=executor, + ) + # Background task is alive — strong-ref'd via the module-level set. + await asyncio.wait_for(started.wait(), timeout=2.0) + assert len(_BACKGROUND_HANDOFF_TASKS) > initial_size + bg_tasks_for_this = [ + t + for t in _BACKGROUND_HANDOFF_TASKS + if t.get_name() == f"adcp-handoff-{envelope['task_id']}" + ] + assert ( + len(bg_tasks_for_this) == 1 + ), f"Expected exactly one tracked background task; got {len(bg_tasks_for_this)}" + # Let it complete; the done-callback removes from the set. + finish.set() + await asyncio.sleep(0.1) + assert all( + t.get_name() != f"adcp-handoff-{envelope['task_id']}" for t in _BACKGROUND_HANDOFF_TASKS + ), "Completed background task must be removed via done-callback" + + @pytest.mark.asyncio async def test_handoff_invoked_via_invoke_platform_method( executor: ThreadPoolExecutor, diff --git a/tests/test_decisioning_serve.py b/tests/test_decisioning_serve.py index a6f89caa4..8c7587b4e 100644 --- a/tests/test_decisioning_serve.py +++ b/tests/test_decisioning_serve.py @@ -178,11 +178,15 @@ def test_create_passes_in_production_with_explicit_opt_in() -> None: def test_create_passes_in_production_with_custom_durable_registry() -> None: - """When the operator supplies a non-InMemory registry, the gate - doesn't fire — a v6.1-style PostgresTaskRegistry would be - accepted in prod without the opt-in.""" + """When the operator supplies a registry with ``is_durable=True``, + the gate doesn't fire — a v6.1-style PostgresTaskRegistry would + be accepted in prod without the opt-in. The marker is what the + gate reads (NOT isinstance checks; subclasses of + InMemoryTaskRegistry inherit is_durable=False).""" class _DurableStub: + is_durable = True # the marker the gate reads + async def issue(self, *, account_id, task_type): return "task_x" @@ -209,6 +213,55 @@ async def get(self, task_id, *, expected_account_id=None): executor.shutdown(wait=True) +def test_create_raises_when_inmemory_subclass_used_in_production() -> None: + """Adopter subclassing InMemoryTaskRegistry for instrumentation + inherits is_durable=False — gate fires, no bypass via subclass. + This is the regression for the round-4 review's `isinstance` + bypass concern.""" + + class _InstrumentedInMemoryRegistry(InMemoryTaskRegistry): + pass + + platform = _BarePlatform() + with patch.dict(os.environ, {"ADCP_ENV": "production"}): + os.environ.pop("ADCP_DECISIONING_ALLOW_INMEMORY_TASKS", None) + with pytest.raises(AdcpError) as exc_info: + create_adcp_server_from_platform(platform, registry=_InstrumentedInMemoryRegistry()) + assert exc_info.value.code == "INVALID_REQUEST" + assert "_InstrumentedInMemoryRegistry" in str(exc_info.value) + + +def test_create_raises_when_duck_typed_non_durable_used_in_production() -> None: + """Custom registry with no is_durable marker (defaults False via + getattr) trips the gate. Adopters MUST explicitly opt into + is_durable=True; safe-by-default.""" + + class _BareRegistry: + async def issue(self, *, account_id, task_type): + return "task_x" + + async def update_progress(self, task_id, progress): + pass + + async def complete(self, task_id, result): + pass + + async def fail(self, task_id, error): + pass + + async def get(self, task_id, *, expected_account_id=None): + return None + + platform = _BarePlatform() + with patch.dict(os.environ, {"ADCP_ENV": "production"}): + os.environ.pop("ADCP_DECISIONING_ALLOW_INMEMORY_TASKS", None) + with pytest.raises(AdcpError) as exc_info: + create_adcp_server_from_platform( + platform, registry=_BareRegistry() # type: ignore[arg-type] + ) + assert exc_info.value.code == "INVALID_REQUEST" + + def test_create_passes_in_dev_env_with_default_registry() -> None: """No prod gate — defaults work in local dev / CI.""" platform = _BarePlatform() diff --git a/tests/test_decisioning_task_registry.py b/tests/test_decisioning_task_registry.py index 7e5917207..57616cb02 100644 --- a/tests/test_decisioning_task_registry.py +++ b/tests/test_decisioning_task_registry.py @@ -51,10 +51,12 @@ def test_in_memory_task_registry_satisfies_protocol() -> None: def test_custom_registry_satisfies_protocol_via_duck_typing() -> None: - """Adopter-written class with the right methods matches without - inheritance.""" + """Adopter-written class with the right methods + ``is_durable`` + class attr matches without inheritance.""" class _Stub: + is_durable = True # custom durable impl + async def issue(self, *, account_id: str, task_type: str) -> str: return "task_x" @@ -78,6 +80,19 @@ async def get( assert isinstance(_Stub(), TaskRegistry) +def test_in_memory_task_registry_is_not_durable() -> None: + """``InMemoryTaskRegistry.is_durable`` is False — production-mode + gate refuses by default. Subclasses for instrumentation inherit + this.""" + assert InMemoryTaskRegistry.is_durable is False + assert InMemoryTaskRegistry().is_durable is False + + class _InstrumentedSubclass(InMemoryTaskRegistry): + pass + + assert _InstrumentedSubclass.is_durable is False + + # ---- InMemoryTaskRegistry — issue + initial state ---- From fd4a3568d14b091faf8dff40e71ca6d4e2f83578 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 11:35:54 -0400 Subject: [PATCH 14/18] =?UTF-8?q?fix(decisioning):=20Stage-3=20review=20P1?= =?UTF-8?q?=20fixes=20=E2=80=94=20singletons,=20drift=20detection,=20loggi?= =?UTF-8?q?ng?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Batch 2 of Stage-3 reviewer feedback. Three focused improvements landing on top of the P0 fixes (commit c2b04073): 1. Module-level singleton stubs for the v6.0 stub state/resolve readers. Per-RequestContext allocation bought nothing — the warned-once set is module-level and the docstring promises "per process per method, not per request". Singleton matches the contract and eliminates per-request stub churn. 2. arg_projector signature-drift detection. When an adopter renames a kwargs-projected param (e.g., update_media_buy's `patch` → `update_data`), the framework's kwargs-unpack hits TypeError. Previously projected to bare INTERNAL_ERROR with no hint. Now projected to INVALID_REQUEST with the projected-kwargs and method name in the message — adopters fix the signature without log archaeology. Fall-through TypeError (non-projector path) still wraps to INTERNAL_ERROR. 3. TaskHandoffContext.update logging. The swallow-on-registry-error contract is preserved (transient writes must not abort the handoff fn), but now logs at WARNING with traceback + task_id so transient failures aren't silently invisible to operators. Test additions: - test_invoke_arg_projector_signature_drift_projects_invalid_request - test_handoff_context_update_swallows_registry_errors strengthened to assert the new WARNING log - test_default_state_reader_is_module_singleton - test_default_resolver_is_module_singleton - test_request_context_default_factories_share_singleton Foundation tests: 149 (+4). Full suite: 2523 passed, 17 skipped, 1 xfailed. ruff + mypy clean. P2 items (full hex task_id, design-doc WeakValueDictionary mention, TaskRecord.error vs adcp_error spec field, async-detection docstring alignment) deferred to Stage-4 follow-up — they don't block correctness or security. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/adcp/decisioning/dispatch.py | 41 ++++++++++++++++++- src/adcp/decisioning/resolve.py | 9 +++- src/adcp/decisioning/state.py | 17 +++++++- src/adcp/decisioning/task_registry.py | 22 +++++++--- .../test_decisioning_context_state_resolve.py | 32 +++++++++++++++ tests/test_decisioning_dispatch.py | 41 +++++++++++++++++++ tests/test_decisioning_task_registry.py | 17 ++++++-- 7 files changed, 166 insertions(+), 13 deletions(-) diff --git a/src/adcp/decisioning/dispatch.py b/src/adcp/decisioning/dispatch.py index e23dfcbf7..9dc5f93e4 100644 --- a/src/adcp/decisioning/dispatch.py +++ b/src/adcp/decisioning/dispatch.py @@ -535,10 +535,49 @@ async def _invoke_platform_method( # Adopter raised structured error — propagate verbatim. The # outer middleware projects to the wire envelope. raise + except TypeError as exc: + # Most likely an arg_projector signature-drift bug — adopter + # renamed update_media_buy's `patch` kwarg → `update`, etc. + # Bare INTERNAL_ERROR would hide the cause; project to + # INVALID_REQUEST with a hint pointing at the adopter's + # method signature so they fix it without a server-log dive. + # Note: server logs see the full traceback; wire response + # stays opaque. + if arg_projector is not None: + logger.exception( + "TypeError invoking platform.%s — likely arg_projector " + "signature drift (kwargs %s vs adopter signature)", + method_name, + sorted(arg_projector.keys()), + ) + raise AdcpError( + "INVALID_REQUEST", + message=( + f"Platform method {method_name!r} signature mismatch — " + "the framework's wire-shape projection sent " + f"kwargs {sorted(arg_projector.keys())!r} + ctx, but " + "the adopter method rejected them. Check the " + "method's Python signature against the per-specialism " + "Protocol class (typically a renamed parameter)." + ), + recovery="terminal", + ) from exc + # Non-projected TypeError — fall through to generic wrap. + logger.exception( + "Unhandled exception in platform.%s — wrapping to INTERNAL_ERROR", + method_name, + ) + raise AdcpError( + "INTERNAL_ERROR", + message="An internal error occurred", + recovery="terminal", + ) from exc except Exception as exc: # Wrap unexpected exceptions so the wire never sees a stack # trace. Adopter logs the original via observability hooks; - # __cause__ is preserved for server-side debugging. + # __cause__ is preserved for server-side debugging (the wire + # ``AdcpError.to_wire()`` projection deliberately omits + # __cause__ — middleware MUST NOT format it into the response). logger.exception( "Unhandled exception in platform.%s — wrapping to INTERNAL_ERROR", method_name, diff --git a/src/adcp/decisioning/resolve.py b/src/adcp/decisioning/resolve.py index 10bcf216d..84757ac7e 100644 --- a/src/adcp/decisioning/resolve.py +++ b/src/adcp/decisioning/resolve.py @@ -164,8 +164,15 @@ async def creative_format( ) +#: Module-level singleton — one stub instance per process. The stub +#: methods always raise (no warned-once state to share, but consistency +#: with state.py's pattern + avoiding per-RequestContext allocation). +_DEFAULT_RESOLVER: ResourceResolver = _NotYetWiredResolver() + + def _make_default_resolver() -> ResourceResolver: - return _NotYetWiredResolver() + """Return the module-level :class:`_NotYetWiredResolver` singleton.""" + return _DEFAULT_RESOLVER __all__ = [ diff --git a/src/adcp/decisioning/state.py b/src/adcp/decisioning/state.py index 8035c8343..d499f331d 100644 --- a/src/adcp/decisioning/state.py +++ b/src/adcp/decisioning/state.py @@ -259,7 +259,22 @@ def _reset_state_stub_warned() -> None: ] +# Module-level singleton — one stub instance per process. The +# warned-once set ``_STATE_STUB_WARNED`` is also module-level, so +# per-instance allocation buys nothing AND the docstring promises +# "per process per method, not per request". Using a singleton +# matches the contract and avoids per-RequestContext stub churn +# (round-4 review). +_DEFAULT_STATE_READER: StateReader = _NotYetWiredStateReader() + + # Re-exports needed by ``RequestContext`` field defaults but not part # of the public adopter-facing surface — keep below ``__all__``. def _make_default_state_reader() -> StateReader: - return _NotYetWiredStateReader() + """Return the module-level :class:`_NotYetWiredStateReader` singleton. + + Each :class:`RequestContext` shares the same stub instance; the + warned-once set is module-level so the visibility behavior is + correct across all requests in a process. + """ + return _DEFAULT_STATE_READER diff --git a/src/adcp/decisioning/task_registry.py b/src/adcp/decisioning/task_registry.py index 260daeb75..6a3cafd81 100644 --- a/src/adcp/decisioning/task_registry.py +++ b/src/adcp/decisioning/task_registry.py @@ -36,8 +36,11 @@ from __future__ import annotations import asyncio +import logging import time import uuid + +logger = logging.getLogger(__name__) from collections.abc import Awaitable, Callable from dataclasses import dataclass, field from typing import Any, ClassVar, Literal, Protocol, runtime_checkable @@ -438,16 +441,23 @@ async def update(self, progress: dict[str, Any]) -> None: """Write a progress payload. Transitions ``submitted`` → ``working`` on first call. - Errors are swallowed: a transient registry write failure must - not abort the handoff. Buyer-facing impact is a missed - progress event, not a failed task. + Errors are swallowed (logged at WARNING with traceback): + a transient registry write failure must not abort the handoff. + Buyer-facing impact is a missed progress event, not a failed + task. Adopters who need delivery guarantees plug a durable + registry; the warning surfaces the transient via existing + observability hooks so silent loss isn't truly invisible. """ try: await self._registry.update_progress(self.id, progress) except Exception: - # Swallow — preserve the handoff fn's progress in the - # face of registry transients. Adopters who need - # delivery guarantees plug a durable registry. + logger.warning( + "TaskHandoffContext.update(task_id=%s) suppressed " + "registry transient — progress event lost; handoff " + "continues", + self.id, + exc_info=True, + ) return async def heartbeat(self) -> None: diff --git a/tests/test_decisioning_context_state_resolve.py b/tests/test_decisioning_context_state_resolve.py index 76215346d..fb635fde9 100644 --- a/tests/test_decisioning_context_state_resolve.py +++ b/tests/test_decisioning_context_state_resolve.py @@ -202,6 +202,38 @@ def test_state_stub_governance_context_warning_text() -> None: assert "fail-fast" in msg +def test_default_state_reader_is_module_singleton() -> None: + """Round-4 review: ``_make_default_state_reader`` returns the same + instance across calls (module-level singleton). Per-RequestContext + stub allocation buys nothing since the warned-once set is also + module-level — singleton matches the contract and avoids stub + churn.""" + from adcp.decisioning.state import _make_default_state_reader + + a = _make_default_state_reader() + b = _make_default_state_reader() + assert a is b + + +def test_default_resolver_is_module_singleton() -> None: + """Same singleton pattern for ``_make_default_resolver``.""" + from adcp.decisioning.resolve import _make_default_resolver + + a = _make_default_resolver() + b = _make_default_resolver() + assert a is b + + +def test_request_context_default_factories_share_singleton() -> None: + """Each RequestContext instance shares the same default stub + instances — no per-context allocation. Verifies the field + default_factory plumbing reads the singletons correctly.""" + a = RequestContext() + b = RequestContext() + assert a.state is b.state + assert a.resolve is b.resolve + + def test_property_list_alias_pinned_to_reference() -> None: """``adcp.decisioning.PropertyList`` aliases ``PropertyListReference`` deliberately (the spec models both as diff --git a/tests/test_decisioning_dispatch.py b/tests/test_decisioning_dispatch.py index 5b9689542..43f75f3ea 100644 --- a/tests/test_decisioning_dispatch.py +++ b/tests/test_decisioning_dispatch.py @@ -524,6 +524,47 @@ async def get_products(self, req, ctx): assert "oops, internal-state bug" not in str(exc_info.value) +@pytest.mark.asyncio +async def test_invoke_arg_projector_signature_drift_projects_invalid_request( + executor: ThreadPoolExecutor, +) -> None: + """When an adopter renames a Pydantic field projected via + arg_projector (e.g., ``patch`` → ``update``), the framework's + kwargs-unpack hits TypeError. Round-4 review P1: project to + INVALID_REQUEST with a hint, NOT bare INTERNAL_ERROR — adopters + fix the signature without a server-log dive.""" + from pydantic import BaseModel + + class _PatchModel(BaseModel): + media_buy_id: str + + class _DriftedPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities() + accounts = SingletonAccounts(account_id="x") + + # Adopter renamed `patch` → `update_data`. Wire shape still + # has both fields, but our arg_projector kwargs key mismatches. + async def update_media_buy(self, media_buy_id, update_data, ctx): + return {} + + ctx = _build_request_context(ToolContext(), Account(id="x"), None) + patch = _PatchModel(media_buy_id="mb_1") + with pytest.raises(AdcpError) as exc_info: + await _invoke_platform_method( + _DriftedPlatform(), + "update_media_buy", + patch, + ctx, + executor=executor, + registry=InMemoryTaskRegistry(), + arg_projector={"media_buy_id": "mb_1", "patch": patch}, + ) + assert exc_info.value.code == "INVALID_REQUEST" + msg = str(exc_info.value) + assert "signature mismatch" in msg + assert "update_media_buy" in msg + + @pytest.mark.asyncio async def test_invoke_with_arg_projector_uses_kwargs( executor: ThreadPoolExecutor, diff --git a/tests/test_decisioning_task_registry.py b/tests/test_decisioning_task_registry.py index 57616cb02..b4f29a4f2 100644 --- a/tests/test_decisioning_task_registry.py +++ b/tests/test_decisioning_task_registry.py @@ -296,16 +296,25 @@ async def test_handoff_context_update_routes_to_registry() -> None: @pytest.mark.asyncio -async def test_handoff_context_update_swallows_registry_errors() -> None: +async def test_handoff_context_update_swallows_registry_errors(caplog) -> None: """A transient registry write failure must not abort the handoff fn. ``update`` swallows; the buyer-facing impact is a missed - progress event, not a failed task.""" + progress event, not a failed task. Round-4 review: the swallow + now logs at WARNING with traceback so transient failures aren't + silently invisible to operators.""" failing_registry = AsyncMock(spec=TaskRegistry) failing_registry.update_progress.side_effect = RuntimeError("DB down") handoff_ctx = TaskHandoffContext(id="task_x", _registry=failing_registry) - # Must NOT raise. - await handoff_ctx.update({"step": 1}) + import logging + + with caplog.at_level(logging.WARNING): + # Must NOT raise. + await handoff_ctx.update({"step": 1}) failing_registry.update_progress.assert_called_once_with("task_x", {"step": 1}) + # Round-4 review: swallow now logs WARNING with traceback. + assert any( + "task_x" in r.message and "registry transient" in r.message for r in caplog.records + ), "TaskHandoffContext.update suppression must log WARNING with task_id" @pytest.mark.asyncio From 54ba969523ab396fa1f52877609253944fc2a259 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 12:54:07 -0400 Subject: [PATCH 15/18] feat(decisioning): hello_seller examples + integration tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stage 4 — vertical-slice examples that demonstrate the v6.0 DecisioningPlatform from a single screen, plus integration tests that exercise the full dispatch path end-to-end. - examples/hello_seller.py: smallest possible sales-non-guaranteed adopter. Five sync methods (the spec-required core) + AdcpError raise on empty packages. Validates against the framework's full validate_platform surface (specialism method coverage, AccountStore wiring, composite caller_identity). - examples/hello_seller_async_handoff.py: hybrid platform demonstrating all three return shapes of create_media_buy in one body — sync success / AdcpError raise (correctable rejection) / TaskHandoff (HITL trafficker review with progress updates). - tests/test_hello_seller_integration.py: 7 tests covering the sync dispatch path — typed Pydantic request → resolved account → typed response, AdcpError correctable rejection, account_resolution threading two principals, composite caller_identity wiring (D9 round-3), advertised_tools class attribute pinned. - tests/test_hello_seller_async_handoff_integration.py: 5 tests covering the hybrid path — sync arm returns success envelope without task_id, AdcpError arm raises with full to_wire() envelope, handoff arm returns wire Submitted envelope synchronously and async registry persists the terminal artifact, progress updates from the handoff fn visible via registry, handoff fn AdcpError persists via registry.fail. Foundation tests: 161 (+12). Full suite: 2535 passed, 17 skipped, 1 xfailed. ruff + mypy + black clean. Stage 4 complete except for the codegen drift test (deferred to follow-up PR per design doc — Stage 4 file plan note). Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/hello_seller.py | 210 ++++++++++++++ examples/hello_seller_async_handoff.py | 274 ++++++++++++++++++ ..._hello_seller_async_handoff_integration.py | 260 +++++++++++++++++ tests/test_hello_seller_integration.py | 254 ++++++++++++++++ 4 files changed, 998 insertions(+) create mode 100644 examples/hello_seller.py create mode 100644 examples/hello_seller_async_handoff.py create mode 100644 tests/test_hello_seller_async_handoff_integration.py create mode 100644 tests/test_hello_seller_integration.py diff --git a/examples/hello_seller.py b/examples/hello_seller.py new file mode 100644 index 000000000..bcae82364 --- /dev/null +++ b/examples/hello_seller.py @@ -0,0 +1,210 @@ +"""Hello-seller — the smallest possible v6.0 DecisioningPlatform. + +A minimal :class:`SalesPlatform` adopter showing the canonical surface: + +* :class:`DecisioningCapabilities` declared on the class body +* :class:`SingletonAccounts` for the dev/single-tenant case +* Three platform methods (``get_products``, ``create_media_buy``, + ``get_media_buy_delivery``) — all sync, sync return path + +Run:: + + uv run python examples/hello_seller.py + +Then: + +* MCP discovery: connect with any AdCP MCP buyer +* List tools: should advertise just the 3 implemented + the + framework's protocol tools +* Call ``get_products``: returns one product +* Call ``create_media_buy``: returns the success envelope +""" + +from __future__ import annotations + +from typing import Any + +from adcp.decisioning import ( + AdcpError, + DecisioningCapabilities, + DecisioningPlatform, + RequestContext, + SingletonAccounts, + serve, +) + + +class HelloSeller(DecisioningPlatform): + """The canonical minimal v6.0 sales-non-guaranteed adopter. + + Implements the three sync methods every sales-* specialism + requires for a buyer to discover, transact, and read delivery. + Production sellers would add ``update_media_buy`` and + ``sync_creatives`` to satisfy the full sales-non-guaranteed + contract; this example focuses on the common-path subset that + fits in one screen. + """ + + capabilities = DecisioningCapabilities( + specialisms=["sales-non-guaranteed"], + channels=["display"], + pricing_models=["cpm"], + ) + accounts = SingletonAccounts(account_id="hello") + + def get_products( + self, + req: Any, + ctx: RequestContext[Any], + ) -> dict[str, Any]: + """Return a single example product. Sync — no HITL.""" + return { + "products": [ + { + "product_id": "display-rotation", + "name": "Display Rotation", + "description": "300x250 banner across our example properties", + "delivery_type": "non_guaranteed", + "publisher_properties": [ + {"publisher_domain": "example.com", "selection_type": "all"}, + ], + "format_ids": [ + { + "agent_url": "https://creative.adcontextprotocol.org/", + "id": "display_300x250", + }, + ], + "pricing_options": [ + { + "pricing_option_id": "po-cpm-default", + "pricing_model": "cpm", + "floor_price": 5.0, + "currency": "USD", + }, + ], + "reporting_capabilities": { + "available_metrics": ["impressions", "spend"], + "available_reporting_frequencies": ["daily"], + "date_range_support": "date_range", + "supports_webhooks": False, + "expected_delay_minutes": 60, + "timezone": "UTC", + }, + "delivery_measurement": {"provider": "internal"}, + }, + ], + } + + def create_media_buy( + self, + req: Any, + ctx: RequestContext[Any], + ) -> dict[str, Any]: + """Sync fast path — accept the request and return a media_buy_id. + + Production sellers branch on a budget/policy check here and + return :meth:`ctx.handoff_to_task(fn)` for HITL review (see + ``examples/hello_seller_async_handoff.py``). Hello-seller + accepts everything; reject obviously-broken budgets via + :class:`AdcpError`. + """ + # Pre-flight: reject zero-budget requests with a structured + # error so buyers get a clear correction signal. Real sellers + # check against a published floor; this just demonstrates the + # AdcpError raise-and-project pattern. + packages = self._get_packages(req) + if not packages: + raise AdcpError( + "INVALID_REQUEST", + message="At least one package is required", + field="packages", + recovery="correctable", + ) + + return { + "media_buy_id": f"mb_{ctx.account.id}_{len(packages)}", + "status": "active", + "packages": [ + { + "package_id": f"pkg_{i}", + "product_id": pkg.get("product_id", "display-rotation"), + "pricing_option_id": pkg.get("pricing_option_id", "po-cpm-default"), + } + for i, pkg in enumerate(packages) + ], + } + + def update_media_buy( + self, + media_buy_id: str, + patch: Any, + ctx: RequestContext[Any], + ) -> dict[str, Any]: + """Sync update — accept any patch as a no-op echo. The + ``(media_buy_id, patch, ctx)`` signature mirrors the + :class:`SalesPlatform` Protocol (D1 arg-projection — the + framework's handler.py shim splits the wire request shape + into separate kwargs).""" + return { + "media_buy_id": media_buy_id, + "status": "active", + "packages": [], + } + + def sync_creatives( + self, + req: Any, + ctx: RequestContext[Any], + ) -> dict[str, Any]: + """Sync creative review — auto-approve every submitted + creative. Production sellers run S&P review here and either + return mixed approved/pending rows, or hand off the entire + batch via :meth:`ctx.handoff_to_task` for trafficker + review (see the async-handoff example).""" + creatives = getattr(req, "creatives", None) or [] + return { + "creatives": [ + { + "creative_id": ( + c.creative_id if hasattr(c, "creative_id") else c.get("creative_id") + ), + "approval_status": "approved", + } + for c in creatives + ], + } + + def get_media_buy_delivery( + self, + req: Any, + ctx: RequestContext[Any], + ) -> dict[str, Any]: + """Stub delivery snapshot — flat zeros.""" + return { + "deliveries": [ + { + "media_buy_id": getattr(req, "media_buy_id", "mb_unknown"), + "totals": {"impressions": 0, "spend": 0.0}, + }, + ], + } + + @staticmethod + def _get_packages(req: Any) -> list[dict[str, Any]]: + """Pull the wire ``packages`` array from the request, tolerating + both Pydantic and dict shapes (the framework's typed dispatch + gives Pydantic; tests / scripts may pass dicts).""" + if hasattr(req, "packages"): + packages = req.packages or [] + return [p.model_dump() if hasattr(p, "model_dump") else dict(p) for p in packages] + if isinstance(req, dict): + return list(req.get("packages") or []) + return [] + + +if __name__ == "__main__": + # serve() builds the PlatformHandler, allocates the executor + + # registry, validates the platform at boot, and starts the MCP + # server. Default port 3001 over streamable-http; override via + # ``serve(seller, port=...)``. + serve(HelloSeller(), name="hello-seller") diff --git a/examples/hello_seller_async_handoff.py b/examples/hello_seller_async_handoff.py new file mode 100644 index 000000000..73bad06c5 --- /dev/null +++ b/examples/hello_seller_async_handoff.py @@ -0,0 +1,274 @@ +"""Hybrid v6.0 DecisioningPlatform — sync fast path + HITL handoff ++ AdcpError correctable rejection. + +Demonstrates the three return shapes a single ``create_media_buy`` +method can produce: + +1. **Sync success** — `return CreateMediaBuySuccessResponse(...)` / + ``dict``. Framework projects to the wire success envelope. +2. **AdcpError raise** — `raise AdcpError("BUDGET_TOO_LOW", ...)`. + Framework projects to the wire ``adcp_error`` envelope with + ``recovery: 'correctable'`` so the buyer retries with the fixed + field. +3. **TaskHandoff** — `return ctx.handoff_to_task(fn)`. Framework + allocates a task_id, returns the wire ``Submitted`` envelope to + the buyer immediately, runs ``fn`` in the background, persists + the terminal artifact via the registry. Buyer polls + ``tasks/get`` (or receives the webhook). + +Branch per-call: programmatic remnant goes sync, guaranteed inventory +goes through trafficker review, aggressive budgets get rejected. + +Run:: + + uv run python examples/hello_seller_async_handoff.py +""" + +from __future__ import annotations + +import asyncio +from typing import Any + +from adcp.decisioning import ( + AdcpError, + DecisioningCapabilities, + DecisioningPlatform, + RequestContext, + SalesResult, + SingletonAccounts, + TaskHandoffContext, + serve, +) + +# Tunable thresholds — keep at top-level so the demo is easy to tweak. +_MIN_VIABLE_BUDGET_CPM = 0.50 # USD per thousand +_HITL_REVIEW_THRESHOLD = 50_000.0 # buys above this go through review + + +class HelloSellerHybrid(DecisioningPlatform): + """Adopter that mixes sync fast-path, AdcpError rejection, and + TaskHandoff in a single ``create_media_buy`` body. + + The sync methods (``get_products``, ``get_media_buy_delivery``) + stay sync. The hybrid path is on the mutating tools that may + need HITL review. + """ + + capabilities = DecisioningCapabilities( + specialisms=["sales-non-guaranteed"], + channels=["display"], + pricing_models=["cpm"], + ) + accounts = SingletonAccounts(account_id="hello-hybrid") + + def get_products( + self, + req: Any, + ctx: RequestContext[Any], + ) -> dict[str, Any]: + return { + "products": [ + { + "product_id": "display-rotation", + "name": "Display Rotation", + "description": ( + "Hybrid product — small budgets accept sync, " "large budgets go to review" + ), + "delivery_type": "non_guaranteed", + "publisher_properties": [ + {"publisher_domain": "example.com", "selection_type": "all"}, + ], + "format_ids": [ + { + "agent_url": "https://creative.adcontextprotocol.org/", + "id": "display_300x250", + }, + ], + "pricing_options": [ + { + "pricing_option_id": "po-cpm-default", + "pricing_model": "cpm", + "floor_price": _MIN_VIABLE_BUDGET_CPM, + "currency": "USD", + }, + ], + "reporting_capabilities": { + "available_metrics": ["impressions", "spend"], + "available_reporting_frequencies": ["daily"], + "date_range_support": "date_range", + "supports_webhooks": False, + "expected_delay_minutes": 60, + "timezone": "UTC", + }, + "delivery_measurement": {"provider": "internal"}, + }, + ], + } + + def create_media_buy( + self, + req: Any, + ctx: RequestContext[Any], + ) -> SalesResult[dict[str, Any]]: + """Hybrid: branch per-call between sync, AdcpError, and handoff. + + :returns: Either a :class:`dict` (sync success), or + :class:`TaskHandoff` returned from + :meth:`ctx.handoff_to_task`. Type alias + :data:`SalesResult` covers both arms. + + :raises AdcpError: when the budget is below the seller's + minimum viable threshold. Buyer fixes ``total_budget`` + and retries (``recovery='correctable'``). + """ + total_budget = self._extract_total_budget(req) + + # Arm 1: budget below floor → AdcpError correctable rejection. + if total_budget < _MIN_VIABLE_BUDGET_CPM: + raise AdcpError( + "BUDGET_TOO_LOW", + message=( + f"total_budget {total_budget} USD below minimum " + f"viable {_MIN_VIABLE_BUDGET_CPM} USD" + ), + field="total_budget", + recovery="correctable", + suggestion=( + f"Increase total_budget to at least " + f"{_MIN_VIABLE_BUDGET_CPM} USD to engage trafficking." + ), + ) + + # Arm 2: large buy → handoff for trafficker review. + if total_budget >= _HITL_REVIEW_THRESHOLD: + return ctx.handoff_to_task( + self._async_trafficker_review, + ) + + # Arm 3: small/medium buy → sync acceptance. + return { + "media_buy_id": f"mb_sync_{ctx.account.id}_{int(total_budget)}", + "status": "active", + "packages": self._echo_packages(req), + } + + def update_media_buy( + self, + media_buy_id: str, + patch: Any, + ctx: RequestContext[Any], + ) -> dict[str, Any]: + """Sync update — accept any patch.""" + return {"media_buy_id": media_buy_id, "status": "active", "packages": []} + + def sync_creatives( + self, + req: Any, + ctx: RequestContext[Any], + ) -> dict[str, Any]: + """Sync auto-approve — production would handoff for S&P review + when a new buyer submits unfamiliar creative.""" + creatives = getattr(req, "creatives", None) or [] + return { + "creatives": [ + { + "creative_id": ( + c.creative_id if hasattr(c, "creative_id") else c.get("creative_id") + ), + "approval_status": "approved", + } + for c in creatives + ], + } + + def get_media_buy_delivery( + self, + req: Any, + ctx: RequestContext[Any], + ) -> dict[str, Any]: + return { + "deliveries": [ + { + "media_buy_id": getattr(req, "media_buy_id", "mb_unknown"), + "totals": {"impressions": 0, "spend": 0.0}, + }, + ], + } + + # ---- Handoff fn ---- + + async def _async_trafficker_review( + self, + task_ctx: TaskHandoffContext, + ) -> dict[str, Any]: + """Background fn the framework runs after the Submitted + envelope returns. Adopters wire this to their own queue / + Slack / approval system; here we simulate a brief review and + return the success. + + ``task_ctx.id`` is framework-allocated BEFORE this fn runs + — adopters persist it to their queue so the trafficker's + approve/reject action can call back into the registry. + """ + await task_ctx.update({"step": "queued for trafficker review"}) + # Simulate review latency. Real adopters wait on an external + # signal (Slack approval, queue message, etc.). + await asyncio.sleep(0.05) + await task_ctx.update({"step": "trafficker approved"}) + return { + "media_buy_id": f"mb_reviewed_{task_ctx.id}", + "status": "active", + "packages": [], + } + + # ---- Helpers ---- + + @staticmethod + def _extract_total_budget(req: Any) -> float: + """Coerce ``total_budget`` from the typed Pydantic model OR + a raw dict. The wire shape is + ``{currency, amount}`` per ``money.json``.""" + raw = ( + req.total_budget + if hasattr(req, "total_budget") + else (req.get("total_budget") if isinstance(req, dict) else None) + ) + if raw is None: + return 0.0 + if hasattr(raw, "amount"): + return float(raw.amount or 0.0) + if isinstance(raw, dict): + return float(raw.get("amount") or 0.0) + try: + return float(raw) + except (TypeError, ValueError): + return 0.0 + + @staticmethod + def _echo_packages(req: Any) -> list[dict[str, Any]]: + packages = getattr(req, "packages", None) or [] + return [ + { + "package_id": f"pkg_{i}", + "product_id": ( + p.product_id + if hasattr(p, "product_id") + else p.get("product_id", "display-rotation") + ), + "pricing_option_id": ( + p.pricing_option_id + if hasattr(p, "pricing_option_id") + else p.get("pricing_option_id", "po-cpm-default") + ), + } + for i, p in enumerate(packages) + ] + + +if __name__ == "__main__": + # Same serve(...) call as the sync example. The HITL flow needs + # a TaskRegistry; serve() wires InMemoryTaskRegistry by default + # for local dev. In production, set + # ADCP_DECISIONING_ALLOW_INMEMORY_TASKS=1 (single-process pilot) + # OR pass registry= a durable impl (Postgres-backed v6.1). + serve(HelloSellerHybrid(), name="hello-seller-hybrid") diff --git a/tests/test_hello_seller_async_handoff_integration.py b/tests/test_hello_seller_async_handoff_integration.py new file mode 100644 index 000000000..948958a83 --- /dev/null +++ b/tests/test_hello_seller_async_handoff_integration.py @@ -0,0 +1,260 @@ +"""Hybrid + AdcpError integration tests for +``examples/hello_seller_async_handoff.py``. + +Exercises the three return shapes of ``create_media_buy`` in a single +test surface: + +1. Sync success (medium budget) → typed response +2. AdcpError raise (sub-floor budget) → correctable rejection envelope +3. TaskHandoff (large budget) → wire Submitted envelope, then + asynchronous registry persistence of the terminal artifact + +Per dispatch design D13 — vertical-slice example tests as +first-class deliverables. +""" + +from __future__ import annotations + +import asyncio +import sys +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path + +import pytest + +# examples/ is not a package — add to sys.path. +_EXAMPLES = str(Path(__file__).parent.parent / "examples") +if _EXAMPLES not in sys.path: + sys.path.insert(0, _EXAMPLES) + +import hello_seller_async_handoff as _hybrid # noqa: E402 + +from adcp.decisioning import ( # noqa: E402 + AdcpError, + InMemoryTaskRegistry, +) +from adcp.decisioning.handler import PlatformHandler # noqa: E402 +from adcp.server.base import ToolContext # noqa: E402 + + +@pytest.fixture +def executor(): + pool = ThreadPoolExecutor(max_workers=4, thread_name_prefix="test-int-handoff-") + yield pool + pool.shutdown(wait=True) + + +@pytest.fixture +def registry() -> InMemoryTaskRegistry: + return InMemoryTaskRegistry() + + +@pytest.fixture +def handler(executor: ThreadPoolExecutor, registry: InMemoryTaskRegistry) -> PlatformHandler: + return PlatformHandler( + _hybrid.HelloSellerHybrid(), + executor=executor, + registry=registry, + ) + + +def _build_request(*, total_budget: float, idem_suffix: str): + """Build a valid CreateMediaBuyRequest with the given budget. The + hybrid platform branches on ``total_budget`` to pick sync vs. + handoff vs. AdcpError. The wire shape uses ``total_budget`` as + a typed object ``{currency, amount}``.""" + from adcp.types import CreateMediaBuyRequest + + return CreateMediaBuyRequest( + account={"account_id": "buyer-1"}, + brand={"domain": "buyer.example.com"}, + idempotency_key=f"idem_handoff_test_{idem_suffix}_aaaa", + start_time="2026-05-01T00:00:00Z", + end_time="2026-05-31T23:59:59Z", + total_budget={"currency": "USD", "amount": total_budget}, + packages=[ + { + "product_id": "display-rotation", + "pricing_option_id": "po-cpm-default", + "budget": total_budget, + }, + ], + ) + + +# ---- Arm 1: sync success ---- + + +@pytest.mark.asyncio +async def test_create_media_buy_medium_budget_returns_sync_success( + handler: PlatformHandler, +) -> None: + """Budget between the floor and the HITL threshold goes through + sync — typed response, no task_id, status=active.""" + req = _build_request(total_budget=5000.0, idem_suffix="medium") + resp = await handler.create_media_buy(req, ToolContext()) + assert isinstance(resp, dict) + # Sync arm: real media_buy_id, no task_id. + assert resp["media_buy_id"].startswith("mb_sync_") + assert "task_id" not in resp + assert resp["status"] == "active" + + +# ---- Arm 2: AdcpError correctable rejection ---- + + +@pytest.mark.asyncio +async def test_create_media_buy_below_floor_raises_adcp_error( + handler: PlatformHandler, +) -> None: + """Budget below the seller's floor → AdcpError correctable. The + framework propagates verbatim (not wrapped to INTERNAL_ERROR); + wire ``adcp_error`` envelope contains code + recovery + field + + suggestion.""" + req = _build_request(total_budget=0.10, idem_suffix="cheap") + with pytest.raises(AdcpError) as exc_info: + await handler.create_media_buy(req, ToolContext()) + + err = exc_info.value + assert err.code == "BUDGET_TOO_LOW" + assert err.recovery == "correctable" + assert err.field == "total_budget" + assert err.suggestion is not None + assert "0.50" in err.suggestion or "0.5" in err.suggestion + + # to_wire() projection includes every populated field — adopters + # / middleware that surface this to buyers see the full envelope. + wire = err.to_wire() + assert wire["code"] == "BUDGET_TOO_LOW" + assert wire["recovery"] == "correctable" + assert wire["field"] == "total_budget" + assert "suggestion" in wire + + +# ---- Arm 3: TaskHandoff lifecycle ---- + + +@pytest.mark.asyncio +async def test_create_media_buy_large_budget_returns_submitted_envelope( + handler: PlatformHandler, + registry: InMemoryTaskRegistry, +) -> None: + """Budget above the HITL threshold → ctx.handoff_to_task. The + framework returns the wire Submitted envelope SYNCHRONOUSLY, + persists the task in 'submitted' state, runs the handoff fn in + the background, then transitions to 'completed' with the + terminal artifact.""" + req = _build_request(total_budget=100_000.0, idem_suffix="enterprise") + resp = await handler.create_media_buy(req, ToolContext()) + + # Sync return is the Submitted envelope. + assert isinstance(resp, dict) + assert resp["status"] == "submitted" + assert resp["task_type"] == "create_media_buy" + task_id = resp["task_id"] + assert task_id.startswith("task_") + + # The handoff fn runs in the background; wait for it to complete. + # The hybrid example's _async_trafficker_review awaits 50ms. + deadline = asyncio.get_running_loop().time() + 2.0 + final_state = "submitted" + while asyncio.get_running_loop().time() < deadline: + rec = await registry.get(task_id, expected_account_id="hello-hybrid:anonymous") + if rec is not None and rec["state"] in {"completed", "failed"}: + final_state = rec["state"] + break + await asyncio.sleep(0.05) + + assert final_state == "completed", f"Expected handoff fn to complete; got state={final_state}" + rec = await registry.get(task_id, expected_account_id="hello-hybrid:anonymous") + assert rec is not None + assert rec["state"] == "completed" + assert rec["result"]["media_buy_id"].startswith("mb_reviewed_") + assert rec["result"]["status"] == "active" + + +@pytest.mark.asyncio +async def test_handoff_progress_updates_visible_via_registry( + handler: PlatformHandler, + registry: InMemoryTaskRegistry, +) -> None: + """The handoff fn calls ``task_ctx.update(progress)``; buyers + polling tasks/get see the latest progress payload while the task + is in 'working' state. Verifies the update_progress wiring + end-to-end (registry write + state transition).""" + req = _build_request(total_budget=200_000.0, idem_suffix="progress") + resp = await handler.create_media_buy(req, ToolContext()) + task_id = resp["task_id"] + + # Give the background fn a moment to fire its first update(). + # The first update transitions submitted → working. + deadline = asyncio.get_running_loop().time() + 2.0 + final_state = None + while asyncio.get_running_loop().time() < deadline: + rec = await registry.get(task_id, expected_account_id="hello-hybrid:anonymous") + if rec is not None and rec["state"] == "completed": + final_state = rec["state"] + # The example's last update wrote "trafficker approved"; + # registry stores the LATEST progress payload. + assert rec["progress"] == {"step": "trafficker approved"} + break + await asyncio.sleep(0.02) + + assert final_state == "completed" + + +# ---- Arm 3 fail path: handoff fn raises AdcpError ---- + + +@pytest.mark.asyncio +async def test_handoff_fn_adcp_error_persists_via_registry_fail( + handler: PlatformHandler, + registry: InMemoryTaskRegistry, +) -> None: + """When the handoff fn itself raises AdcpError, the framework + routes to registry.fail(task_id, err.to_wire()). tasks/get + returns the failure envelope verbatim. Tested by stitching a + custom hybrid platform whose handoff fn rejects.""" + from adcp.decisioning import ( + DecisioningCapabilities, + DecisioningPlatform, + SingletonAccounts, + ) + + async def _rejecting_handoff(task_ctx): + raise AdcpError( + "POLICY_VIOLATION", + message="trafficker rejected after review", + recovery="terminal", + details={"reviewer": "trafficker-1"}, + ) + + class _RejectingPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities(specialisms=[]) + accounts = SingletonAccounts(account_id="reject-test") + + async def create_media_buy(self, req, ctx): + return ctx.handoff_to_task(_rejecting_handoff) + + rejecting_handler = PlatformHandler( + _RejectingPlatform(), + executor=handler._executor, + registry=registry, + ) + req = _build_request(total_budget=5000.0, idem_suffix="reject") + resp = await rejecting_handler.create_media_buy(req, ToolContext()) + task_id = resp["task_id"] + + deadline = asyncio.get_running_loop().time() + 2.0 + rec = None + while asyncio.get_running_loop().time() < deadline: + rec = await registry.get(task_id, expected_account_id="reject-test:anonymous") + if rec is not None and rec["state"] == "failed": + break + await asyncio.sleep(0.02) + + assert rec is not None + assert rec["state"] == "failed" + assert rec["error"]["code"] == "POLICY_VIOLATION" + assert rec["error"]["recovery"] == "terminal" + assert rec["error"]["details"]["reviewer"] == "trafficker-1" diff --git a/tests/test_hello_seller_integration.py b/tests/test_hello_seller_integration.py new file mode 100644 index 000000000..79bfc086d --- /dev/null +++ b/tests/test_hello_seller_integration.py @@ -0,0 +1,254 @@ +"""Vertical-slice integration tests for ``examples/hello_seller.py``. + +Exercises the full v6.0 dispatch path — typed request → account +resolution via :class:`SingletonAccounts` → :class:`RequestContext` +hydration → platform method invocation → typed response — without +spinning up an MCP server. The MCP transport is exercised by the +adcp-client-python repo's own MCP test surface (separate concern); +here we focus on the decisioning framework wiring. + +Two-example file plan per the dispatch design's D13: + +* This file — sync vertical slice. +* :file:`tests/test_hello_seller_async_handoff_integration.py` — + hybrid + AdcpError round-trip. +""" + +from __future__ import annotations + +import sys +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path +from typing import Any + +import pytest + +# examples/ is not a package — add to sys.path so the integration +# tests can import the module directly. +_EXAMPLES = str(Path(__file__).parent.parent / "examples") +if _EXAMPLES not in sys.path: + sys.path.insert(0, _EXAMPLES) + +import hello_seller as _hello # noqa: E402 + +from adcp.decisioning import ( # noqa: E402 + AdcpError, + InMemoryTaskRegistry, +) +from adcp.decisioning.handler import PlatformHandler # noqa: E402 +from adcp.server.base import ToolContext # noqa: E402 + + +@pytest.fixture +def executor(): + pool = ThreadPoolExecutor(max_workers=4, thread_name_prefix="test-int-hello-") + yield pool + pool.shutdown(wait=True) + + +@pytest.fixture +def handler(executor: ThreadPoolExecutor) -> PlatformHandler: + return PlatformHandler( + _hello.HelloSeller(), + executor=executor, + registry=InMemoryTaskRegistry(), + ) + + +@pytest.mark.asyncio +async def test_get_products_returns_one_product(handler: PlatformHandler) -> None: + """End-to-end: typed Pydantic request → resolved account → platform + method → typed response. The hello_seller stubs return one product + with all spec-required fields populated.""" + from adcp.types import GetProductsRequest + + resp = await handler.get_products( + GetProductsRequest(buying_mode="brief", brief="anything"), + ToolContext(), + ) + # Response is the raw dict the platform returned; framework-level + # serialization happens at the wire layer (out of scope here). + assert isinstance(resp, dict) + products = resp["products"] + assert len(products) == 1 + p = products[0] + assert p["product_id"] == "display-rotation" + # Spec-required fields populated. + for required in ( + "name", + "description", + "delivery_type", + "publisher_properties", + "format_ids", + "pricing_options", + "reporting_capabilities", + ): + assert required in p, f"Product missing required field: {required}" + + +@pytest.mark.asyncio +async def test_create_media_buy_sync_path(handler: PlatformHandler) -> None: + """Hello seller's create_media_buy is sync — accepts the request + and returns the success envelope. media_buy_id encodes the + resolved account.id (proves account resolution wired correctly).""" + from adcp.types import CreateMediaBuyRequest + + req = CreateMediaBuyRequest( + account={"account_id": "buyer-1"}, + brand={"domain": "buyer.example.com"}, + idempotency_key="idem_int_test_aaaa1234", + start_time="2026-05-01T00:00:00Z", + end_time="2026-05-31T23:59:59Z", + packages=[ + { + "product_id": "display-rotation", + "pricing_option_id": "po-cpm-default", + "budget": 1000, + }, + ], + ) + resp = await handler.create_media_buy(req, ToolContext()) + assert isinstance(resp, dict) + # SingletonAccounts(account_id="hello") + no auth_info → resolved + # to "hello:anonymous" per per-principal scoping. The hello seller + # encodes the resolved account.id into media_buy_id. + assert resp["media_buy_id"].startswith("mb_hello:anonymous_"), resp + assert resp["status"] == "active" + assert len(resp["packages"]) == 1 + + +@pytest.mark.asyncio +async def test_create_media_buy_rejects_empty_packages( + handler: PlatformHandler, +) -> None: + """AdcpError raise-and-project — empty packages tripping the + platform's own correctable rejection. The framework propagates + AdcpError verbatim (not wrapped to INTERNAL_ERROR) so the wire + response carries the structured envelope. + + The wire schema also enforces ``packages.minItems: 1``, so + real-world buyers can't reach this branch — but adopters + relying on extra business validation (e.g., budget floors, + blocked products) hit the same code path. We construct via + ``model_construct`` to bypass Pydantic's pre-validation and + exercise the platform's defensive check.""" + from adcp.types import CreateMediaBuyRequest + + req = CreateMediaBuyRequest.model_construct( + account={"account_id": "buyer-1"}, + brand={"domain": "buyer.example.com"}, + idempotency_key="idem_int_test_bbbb1234", + start_time="2026-05-01T00:00:00Z", + end_time="2026-05-31T23:59:59Z", + packages=[], + ) + with pytest.raises(AdcpError) as exc_info: + await handler.create_media_buy(req, ToolContext()) + assert exc_info.value.code == "INVALID_REQUEST" + assert exc_info.value.recovery == "correctable" + assert exc_info.value.field == "packages" + + +@pytest.mark.asyncio +async def test_get_media_buy_delivery_returns_zeros( + handler: PlatformHandler, +) -> None: + """Stub delivery snapshot — proves the dispatch path works for a + second sync read tool.""" + from adcp.types import GetMediaBuyDeliveryRequest + + req = GetMediaBuyDeliveryRequest( + account={"account_id": "buyer-1"}, + media_buy_ids=["mb_x"], + ) + resp = await handler.get_media_buy_delivery(req, ToolContext()) + assert isinstance(resp, dict) + assert len(resp["deliveries"]) == 1 + assert resp["deliveries"][0]["totals"]["impressions"] == 0 + + +@pytest.mark.asyncio +async def test_account_resolution_threads_through( + handler: PlatformHandler, +) -> None: + """The framework's _build_request_context wires + ``ctx.account.id`` into the platform method via SingletonAccounts. + Different auth_info principals (set via ctx.metadata) yield + different account ids.""" + from adcp.decisioning import AuthInfo + from adcp.types import GetProductsRequest + + seen_ids: list[str] = [] + + # Inject an AuthInfo via ctx.metadata['adcp.auth_info'] and + # observe via the platform method body. + class _SpyHelloSeller(_hello.HelloSeller): + def get_products(self, req, ctx): + seen_ids.append(ctx.account.id) + return super().get_products(req, ctx) + + spy = PlatformHandler( + _SpyHelloSeller(), + executor=handler._executor, # share the fixture's executor + registry=InMemoryTaskRegistry(), + ) + + # Two different principals → two different per-principal account ids. + for principal in ("buyer-a", "buyer-b"): + ctx = ToolContext( + metadata={ + "adcp.auth_info": AuthInfo( + kind="signed_request", + principal=principal, + ), + }, + ) + await spy.get_products( + GetProductsRequest(buying_mode="brief", brief="x"), + ctx, + ) + + assert seen_ids == ["hello:buyer-a", "hello:buyer-b"] + + +@pytest.mark.asyncio +async def test_caller_identity_uses_composite_key( + handler: PlatformHandler, +) -> None: + """The framework sets ``ctx.caller_identity`` to the composite + cache scope key (D9 round-3 — module + qualname + account.id). + Idempotency middleware reads this; different stores can't collide.""" + from adcp.types import GetProductsRequest + + seen_caller: list[Any] = [] + + class _SpySeller(_hello.HelloSeller): + def get_products(self, req, ctx): + seen_caller.append(ctx.caller_identity) + return super().get_products(req, ctx) + + spy_handler = PlatformHandler( + _SpySeller(), + executor=handler._executor, + registry=InMemoryTaskRegistry(), + ) + await spy_handler.get_products( + GetProductsRequest(buying_mode="brief", brief="x"), + ToolContext(), + ) + assert seen_caller[0] == ("adcp.decisioning.accounts.SingletonAccounts:hello:anonymous") + + +@pytest.mark.asyncio +async def test_advertised_tools_class_attribute_set( + handler: PlatformHandler, +) -> None: + """The codegen-target ``advertised_tools`` ClassVar is populated + at class definition time on PlatformHandler — adopters get a + focused tools/list filter without manual registration (after + prep PR #318 wires __init_subclass__ auto-registration).""" + assert "get_products" in PlatformHandler.advertised_tools + assert "create_media_buy" in PlatformHandler.advertised_tools + assert "update_media_buy" in PlatformHandler.advertised_tools + assert "sync_creatives" in PlatformHandler.advertised_tools + assert "get_media_buy_delivery" in PlatformHandler.advertised_tools From 8868e3ff1cb2773b433adddb28b1607e44cd3512 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 13:05:17 -0400 Subject: [PATCH 16/18] =?UTF-8?q?fix(decisioning):=20final=20review=20fixe?= =?UTF-8?q?s=20=E2=80=94=20typo=20fail-fast=20+=20namespace=20hygiene?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final cross-cutting reviewer pass (code-reviewer + dx-expert + adtech-product-expert). Four highest-leverage items addressed; broader product feedback (URL-path account resolution, webhook-on-terminal-state, ErrorCode StrEnum) explicitly deferred to v6.1 per design doc roadmap. Fixes: 1. **DX P0: typo specialism slugs raise instead of warn.** Adopter typing "sales-non-guarateed" (missing 'n') previously got a UserWarning + 0 tools advertised — server boots, silently 404s every buyer call. Adopters running `python hello_seller.py` never see warnings on stderr's default filter. Now: difflib close-match (cutoff 0.7) raises AdcpError("INVALID_REQUEST") with "Did you mean..." hint AND structured details for tooling. Truly novel slugs (no close match) still get the soft UserWarning for forward-compat with v6.x+ specs. 2. **Code P1: don't leak task_id namespace into media_buy_id.** The hybrid example's _async_trafficker_review returned media_buy_id=f"mb_reviewed_{task_ctx.id}" — adopters copying this produce buyer-visible cross-namespace confusion. Switched to a fresh uuid prefix; integration test asserts task_id is NOT a substring of media_buy_id. 3. **Code P1: TaskHandoffContext.update suppression documented in example.** The handoff fn docstring now explicitly notes that registry write failures are logged at WARNING and suppressed. 4. **Code P1: logger placement in task_registry.py.** Moved `logger = logging.getLogger(__name__)` below the import block per PEP 8 (was placed mid-imports as a convenience). Test additions: - test_validate_platform_raises_on_typo_specialism - test_validate_platform_warns_on_novel_specialism (renamed from warns_on_unknown — preserves the forward-compat path) Foundation tests: 162 (+1). Full suite: 2536 passed. ruff + mypy + black clean. Items intentionally deferred to v6.1 / follow-up PRs per design doc roadmap: - Product P0: URL-path AccountStore mode for salesagent integration - Product P0: webhook-on-terminal-state for HITL polling avoidance - Product P0: idempotency middleware integration with composite caller_identity - Product P1: update_media_buy hybrid (gated on adcp#3392) - DX P1: ErrorCode StrEnum codegen (deferred follow-up) - DX P1: SalesResult union split (API change, defer) - DX P0: hello_seller.py size — file is 210 lines because sales-non-guaranteed requires 5 methods. Docstring accurate; rename / smaller-specialism alternative deferred. Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/hello_seller_async_handoff.py | 20 +++++- src/adcp/decisioning/dispatch.py | 66 +++++++++++++++---- src/adcp/decisioning/task_registry.py | 4 +- tests/test_decisioning_dispatch.py | 39 +++++++++-- ..._hello_seller_async_handoff_integration.py | 5 ++ 5 files changed, 110 insertions(+), 24 deletions(-) diff --git a/examples/hello_seller_async_handoff.py b/examples/hello_seller_async_handoff.py index 73bad06c5..6cea3abd4 100644 --- a/examples/hello_seller_async_handoff.py +++ b/examples/hello_seller_async_handoff.py @@ -206,17 +206,31 @@ async def _async_trafficker_review( Slack / approval system; here we simulate a brief review and return the success. - ``task_ctx.id`` is framework-allocated BEFORE this fn runs - — adopters persist it to their queue so the trafficker's + ``task_ctx.id`` is framework-allocated BEFORE this fn runs — + adopters persist it to their queue so the trafficker's approve/reject action can call back into the registry. + + ``task_ctx.update(progress)`` writes the progress payload AND + transitions the task to ``working`` state on first call. + Registry write failures are suppressed (logged at WARNING with + traceback) so a transient registry hiccup doesn't abort the + handoff fn — buyer-facing impact is a missed progress event, + not a failed task. """ await task_ctx.update({"step": "queued for trafficker review"}) # Simulate review latency. Real adopters wait on an external # signal (Slack approval, queue message, etc.). await asyncio.sleep(0.05) await task_ctx.update({"step": "trafficker approved"}) + # Adopter media_buy_id allocation — DON'T leak the framework's + # task_id namespace here. Buyers reading + # ``media_buy_id.startswith("task_")`` would conflate the two + # IDs. Real adopters mint media_buy_id from their own backend + # store; the example just synthesizes a stable string. + import uuid + return { - "media_buy_id": f"mb_reviewed_{task_ctx.id}", + "media_buy_id": f"mb_reviewed_{uuid.uuid4().hex[:8]}", "status": "active", "packages": [], } diff --git a/src/adcp/decisioning/dispatch.py b/src/adcp/decisioning/dispatch.py index 9dc5f93e4..aa9b120c3 100644 --- a/src/adcp/decisioning/dispatch.py +++ b/src/adcp/decisioning/dispatch.py @@ -36,6 +36,7 @@ import asyncio import contextvars +import difflib import functools import logging import warnings @@ -267,18 +268,59 @@ def validate_platform(platform: DecisioningPlatform) -> None: missing.append((specialism, method_name)) if unknown: - warnings.warn( - ( - f"DecisioningPlatform claims unknown specialism(s) " - f"{sorted(unknown)!r}. Either typos (compare against the AdCP " - f"specialism enum: {sorted(REQUIRED_METHODS_PER_SPECIALISM.keys())}), " - "or your framework version predates the spec. Required-method " - "validation is skipped for these specialisms; tools/list will " - "advertise the spec set this framework version knows." - ), - UserWarning, - stacklevel=2, - ) + # Round-4 DX review: an unknown specialism that's a close + # spelling match to a known one is almost always a typo (e.g., + # "sales-non-guarateed" missing the second 'n'). Adopters running + # python hello_seller.py won't see UserWarning + 0 tools as a + # red flag — the server boots, advertises nothing, silently 404s. + # Promote close matches to a hard fail with a "Did you mean…" + # hint; truly novel slugs still get the soft UserWarning for + # forward-compat with v6.x+ specs. + known = sorted(REQUIRED_METHODS_PER_SPECIALISM.keys()) + typo_suggestions: list[tuple[str, str]] = [] + novel: list[str] = [] + for slug in unknown: + close = difflib.get_close_matches(slug, known, n=1, cutoff=0.7) + if close: + typo_suggestions.append((slug, close[0])) + else: + novel.append(slug) + + if typo_suggestions: + hints = "; ".join( + f"{slug!r} → did you mean {match!r}?" for slug, match in sorted(typo_suggestions) + ) + raise AdcpError( + "INVALID_REQUEST", + message=( + f"DecisioningPlatform claims unknown specialism(s) " + f"that look like typos: {hints}. " + "Forward-compat tolerance applies only to genuinely " + "novel specialism slugs (not close spelling matches). " + f"Known v6.0 specialisms: {known}" + ), + recovery="terminal", + details={ + "typo_suggestions": [ + {"claimed": slug, "did_you_mean": match} for slug, match in typo_suggestions + ], + "known_specialisms": known, + }, + ) + + if novel: + warnings.warn( + ( + f"DecisioningPlatform claims novel specialism(s) " + f"{sorted(novel)!r}. Your framework version predates " + f"the spec, OR you're piloting a future specialism. " + f"Required-method validation skipped; tools/list will " + f"advertise the spec set this framework version knows. " + f"Known v6.0 specialisms: {known}" + ), + UserWarning, + stacklevel=2, + ) if missing: raise AdcpError( diff --git a/src/adcp/decisioning/task_registry.py b/src/adcp/decisioning/task_registry.py index 6a3cafd81..a204c9096 100644 --- a/src/adcp/decisioning/task_registry.py +++ b/src/adcp/decisioning/task_registry.py @@ -39,12 +39,12 @@ import logging import time import uuid - -logger = logging.getLogger(__name__) from collections.abc import Awaitable, Callable from dataclasses import dataclass, field from typing import Any, ClassVar, Literal, Protocol, runtime_checkable +logger = logging.getLogger(__name__) + #: Terminal task states per AdCP 3.0 spec (``enums/task-status.json``). #: ``submitted`` = task created but not yet started; ``working`` = adopter #: callback running; ``completed`` / ``failed`` = terminal. diff --git a/tests/test_decisioning_dispatch.py b/tests/test_decisioning_dispatch.py index 43f75f3ea..fd7a169d9 100644 --- a/tests/test_decisioning_dispatch.py +++ b/tests/test_decisioning_dispatch.py @@ -125,21 +125,46 @@ def update_media_buy(self, media_buy_id, patch, ctx): assert "get_media_buy_delivery" in missing_methods -def test_validate_platform_warns_on_unknown_specialism() -> None: - """Unknown specialism — typo or future spec — emits UserWarning, - NOT an AdcpError raise. Forward-compat with v6.x+ specs (round-3 - D14).""" +def test_validate_platform_warns_on_novel_specialism() -> None: + """Truly novel specialism (no close spelling match to any known + slug) emits UserWarning, NOT a raise. Forward-compat with v6.x+ + specs (round-3 D14).""" - class _UnknownSpecialismPlatform(DecisioningPlatform): + class _NovelSpecialismPlatform(DecisioningPlatform): capabilities = DecisioningCapabilities(specialisms=["this-does-not-exist-yet"]) accounts = SingletonAccounts(account_id="hello") with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always", UserWarning) - validate_platform(_UnknownSpecialismPlatform()) + validate_platform(_NovelSpecialismPlatform()) matched = [w for w in caught if "this-does-not-exist-yet" in str(w.message)] assert len(matched) == 1 - assert "typos" in str(matched[0].message) + assert "novel specialism" in str(matched[0].message) + + +def test_validate_platform_raises_on_typo_specialism() -> None: + """Round-4 DX review: a typo close-match to a known slug + (e.g. "sales-non-guarateed" missing the second 'n') raises + AdcpError with a "Did you mean..." hint, NOT a silent UserWarning. + Adopters running ``python hello_seller.py`` would otherwise see + a server boot with 0 tools advertised and silently 404 every + buyer call.""" + + class _TypoPlatform(DecisioningPlatform): + # Missing 'n' in "non-guaranteed". + capabilities = DecisioningCapabilities( + specialisms=["sales-non-guarateed"], + ) + accounts = SingletonAccounts(account_id="hello") + + with pytest.raises(AdcpError) as exc_info: + validate_platform(_TypoPlatform()) + assert exc_info.value.code == "INVALID_REQUEST" + msg = str(exc_info.value) + assert "did you mean 'sales-non-guaranteed'" in msg.lower() + # Details carry the structured suggestion for tooling. + suggestions = exc_info.value.details["typo_suggestions"] + assert {"claimed": "sales-non-guarateed", "did_you_mean": "sales-non-guaranteed"} in suggestions def test_validate_platform_governance_aware_required_for_governance_specialism() -> None: diff --git a/tests/test_hello_seller_async_handoff_integration.py b/tests/test_hello_seller_async_handoff_integration.py index 948958a83..a464d79a8 100644 --- a/tests/test_hello_seller_async_handoff_integration.py +++ b/tests/test_hello_seller_async_handoff_integration.py @@ -170,6 +170,11 @@ async def test_create_media_buy_large_budget_returns_submitted_envelope( assert rec is not None assert rec["state"] == "completed" assert rec["result"]["media_buy_id"].startswith("mb_reviewed_") + # DON'T cross-leak the framework's task_id namespace into the + # adopter's media_buy_id namespace — buyers reading the response + # shouldn't see a raw task UUID embedded in media_buy_id + # (round-4 reviewer P1). + assert task_id not in rec["result"]["media_buy_id"] assert rec["result"]["status"] == "active" From 597d00ab313c88147031f68a8dd95088451811e7 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 13:53:23 -0400 Subject: [PATCH 17/18] fix(examples,server): cherry-pick storyboard CI fixes from PR #321 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foundation branch (PR #316) is failing the storyboard CI on examples/seller_agent.py for the same reasons as PR #321 — the seed_product defaults, format_ids agent_url normalization, TERMS_REJECTED measurement-terms gate, and context-echo on comply_test_controller. Until #321 lands on main, the foundation branch inherits the same broken state. Cherry-picks the four squashed commits from ``bokelley/storyboard-seed-product-complete``: - examples/seller_agent.py: seed_product non-empty defaults (publisher_properties minItems:1, format_ids[].agent_url, reporting_capabilities.available_reporting_frequencies); format_ids agent_url normalization; targeting_overlay / creative_assignments / creatives round-trip on create + update; TERMS_REJECTED gate covering vendor mismatch / variance < 10 / unsupported windows, with seller-vendor and common windows accepted; defensive non-dict measurement_terms coercion. - src/adcp/server/test_controller.py: dispatcher echoes the wire ``context`` field onto every comply_test_controller response per the comply-test-controller-response schema. Storyboards thread state across steps via $context. resolution; without echo the create_media_buy_async track fails on force_arm_submitted → create_media_buy_submitted handoff. - tests/test_seller_agent_storyboard.py: 18 storyboard regression tests (seed_product schema-shape, fixture-fields-not-overwritten, format_ids edge cases, TERMS_REJECTED variants — vendor/variance/ window/threshold, targeting_overlay round-trip, defensive coercion). - tests/test_test_controller_context.py: 3 new tests covering wire context echo dispatch behavior. Foundation tests: 162 (decisioning) + 33 (storyboard) + everything else. Full suite: 2558 passed, 17 skipped, 1 xfailed. This commit duplicates work in PR #321; when that PR merges to main, the foundation branch's rebase will drop these commits cleanly. Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/seller_agent.py | 193 ++++++++- src/adcp/server/test_controller.py | 39 +- tests/test_seller_agent_storyboard.py | 575 ++++++++++++++++++++++++++ tests/test_test_controller_context.py | 96 ++++- 4 files changed, 887 insertions(+), 16 deletions(-) create mode 100644 tests/test_seller_agent_storyboard.py diff --git a/examples/seller_agent.py b/examples/seller_agent.py index 2e9989e6a..399d5dd13 100644 --- a/examples/seller_agent.py +++ b/examples/seller_agent.py @@ -53,6 +53,9 @@ # Test-controller state (force_*/seed_* scenarios only) plans: dict[str, dict[str, Any]] = {} +# Seeded creative formats keyed by the string format ID the storyboard supplies. +# list_creative_formats merges these in so storyboard references resolve. +seeded_creative_formats: dict[str, dict[str, Any]] = {} # Single-shot directives registered by force_create_media_buy_arm; keyed by account_id. pending_directives: dict[str, dict[str, Any]] = {} # Tasks registered when create_media_buy consumes a 'submitted' directive; keyed by task_id. @@ -126,10 +129,15 @@ async def get_adcp_capabilities( # in 3.0.1) live on the dynamic list_scenarios response and # are reported there — not advertised here. Once the # capabilities schema's enum catches up, the rest land too. + # force_session_status is schema-allowed even for media_buy + # sellers; DemoStore provides a stub so list_scenarios + # includes it and the storyboard runner's controller + # detection check succeeds. "scenarios": [ "force_account_status", "force_media_buy_status", "force_creative_status", + "force_session_status", "simulate_delivery", "simulate_budget_spend", ], @@ -254,14 +262,95 @@ async def create_media_buy(self, params: dict[str, Any], context: Any = None) -> field="product_id", suggestion="Use get_products to discover available products", ) - packages.append( - { - "package_id": f"pkg-{uuid.uuid4().hex[:8]}", - "product_id": product_id, - "pricing_option_id": pkg.get("pricing_option_id"), - "budget": pkg.get("budget"), - } - ) + # Inspect per-package measurement_terms and reject aggressive + # buyer-imposed terms. The demo seller declares + # delivery_measurement.provider == "internal" — any non-empty + # billing_measurement is the buyer trying to dictate terms the + # seller can't honor. The storyboard's + # create_media_buy_aggressive_terms step sends one of: + # - billing_measurement.vendor != seller's internal counter + # - billing_measurement.max_variance_percent below realistic + # tolerance (< 10) + # - billing_measurement.measurement_window the seller doesn't + # support (any non-empty window — demo seller has no window + # declared on its products) + # Any one of those should trip TERMS_REJECTED with + # recovery=correctable so the buyer can retry with workable + # terms. The legacy "viewability_threshold > 80" demo + # convention also rejects so storyboards using either + # format see the same outcome. + # Defensive coercion — storyboard fixtures occasionally send + # measurement_terms as a string or other non-dict shape; treat + # that as "no terms supplied" rather than crashing the seller. + raw_terms = pkg.get("measurement_terms") + pkg_terms = raw_terms if isinstance(raw_terms, dict) else {} + raw_billing = pkg_terms.get("billing_measurement") + billing = raw_billing if isinstance(raw_billing, dict) else {} + rejection: str | None = None + field_path: str | None = None + # Accept the seller's own domain or any "internal" indicator — + # those are buyer-relaxed terms ("yes, use the seller's count"). + # Reject only third-party vendor domains. + seller_vendor_domains = {"example.com", "internal"} + vendor = billing.get("vendor") + vendor_domain = vendor.get("domain") if isinstance(vendor, dict) else None + if ( + isinstance(vendor, dict) + and vendor_domain + and vendor_domain not in seller_vendor_domains + ): + rejection = ( + f"Vendor '{vendor_domain}' is not supported " "(seller uses internal counter)" + ) + field_path = "measurement_terms.billing_measurement.vendor" + elif ( + isinstance(billing.get("max_variance_percent"), int | float) + and billing["max_variance_percent"] < 10 + ): + rejection = ( + f"max_variance_percent {billing['max_variance_percent']} is below " + "the seller's minimum tolerance of 10%" + ) + field_path = "measurement_terms.billing_measurement.max_variance_percent" + elif billing.get("measurement_window") and billing["measurement_window"] not in { + "live", + "c3", + "c7", + "final", + }: + # Common windows are accepted; uncommon strict ones (post_sivt, + # post_ivt, downloads_30d, etc.) are not supported by the demo + # seller. + rejection = ( + f"measurement_window '{billing['measurement_window']}' is not " + "supported by this seller" + ) + field_path = "measurement_terms.billing_measurement.measurement_window" + elif ( + isinstance(pkg_terms.get("viewability_threshold"), int | float) + and pkg_terms["viewability_threshold"] > 80 + ): + rejection = "Viewability threshold exceeds maximum supported value of 80%" + field_path = "measurement_terms.viewability_threshold" + if rejection: + return adcp_error( + "TERMS_REJECTED", + rejection, + field=field_path, + recovery="correctable", + ) + + pkg_obj: dict[str, Any] = { + "package_id": f"pkg-{uuid.uuid4().hex[:8]}", + "product_id": product_id, + "pricing_option_id": pkg.get("pricing_option_id"), + "budget": pkg.get("budget"), + } + # Persist overlay and creative fields so get_media_buys can round-trip them. + for field in ("targeting_overlay", "creative_assignments", "creatives"): + if pkg.get(field) is not None: + pkg_obj[field] = pkg[field] + packages.append(pkg_obj) has_creatives = any( pkg.get("creative_assignments") or pkg.get("creatives") for pkg in params["packages"] @@ -312,7 +401,8 @@ async def update_media_buy(self, params: dict[str, Any], context: Any = None) -> return adcp_error("CONFLICT", "Revision mismatch - refetch and retry") if params.get("packages"): - existing_pkg_ids = {p["package_id"] for p in mb.get("packages", [])} + existing_pkgs = {p["package_id"]: p for p in mb.get("packages", [])} + existing_pkg_ids = set(existing_pkgs.keys()) for pkg_update in params["packages"]: pkg_id = pkg_update.get("package_id") if pkg_id and pkg_id not in existing_pkg_ids: @@ -321,6 +411,13 @@ async def update_media_buy(self, params: dict[str, Any], context: Any = None) -> f"Package '{pkg_id}' not found in media buy {mb_id}", field="package_id", ) + # Apply targeting and creative field deltas to persisted package state + # so get_media_buys can round-trip property_list and overlay updates. + if pkg_id and pkg_id in existing_pkgs: + persisted = existing_pkgs[pkg_id] + for field in ("targeting_overlay", "creative_assignments", "creatives"): + if field in pkg_update: + persisted[field] = pkg_update[field] status = mb["status"] if status == "pending_creatives" and params.get("packages"): @@ -393,6 +490,7 @@ async def list_creative_formats( ], }, ] + all_formats = all_formats + list(seeded_creative_formats.values()) filter_ids = params.get("format_ids") if filter_ids: wanted = {(fid.get("agent_url"), fid["id"]) for fid in filter_ids if "id" in fid} @@ -531,6 +629,20 @@ async def simulate_budget_spend( ) -> dict[str, Any]: return {"simulated": {"spend_percentage": spend_percentage}} + async def force_session_status( + self, + session_id: str, + status: str, + termination_reason: str | None = None, + *, + context: Any = None, + ) -> dict[str, Any]: + # DemoSeller has no SI session state; return a canned transition so + # the storyboard runner's controller-detection probe succeeds and the + # force_session_status storyboard can run (it will simply report the + # canned previous_state). + return {"previous_state": "active", "current_state": status} + async def force_create_media_buy_arm( self, arm: str, @@ -592,6 +704,49 @@ async def seed_product( data = dict(fixture or {}) pid = product_id or data.get("product_id") or f"seeded-{uuid.uuid4().hex[:8]}" data["product_id"] = pid + # Ensure schema-required fields are present so downstream validation + # passes even when the runner sends a minimal fixture with only + # product_id. Defaults are spec-valid (non-empty arrays where + # ``minItems: 1`` applies, format_ids carrying agent_url) so the + # storyboard runner's get-products-response.json validation succeeds + # against any product the runner seeds. + data.setdefault("name", pid) + data.setdefault("description", f"Seeded product {pid}") + data.setdefault("delivery_type", "non_guaranteed") + data.setdefault( + "publisher_properties", + [{"publisher_domain": "example.com", "selection_type": "all"}], + ) + data.setdefault( + "format_ids", + [{"agent_url": AGENT_URL, "id": "display_300x250"}], + ) + # Normalize any caller-supplied format_ids items that omit + # agent_url. Storyboard fixtures commonly send + # ``format_ids: [{"id": "..."}]`` — the bare id without the + # canonical agent_url. The schema requires both fields, so fill + # in the local AGENT_URL when missing. + data["format_ids"] = [ + ( + {**fmt, "agent_url": fmt.get("agent_url") or AGENT_URL} + if isinstance(fmt, dict) + else fmt + ) + for fmt in data["format_ids"] + ] + data.setdefault("pricing_options", []) + data.setdefault( + "reporting_capabilities", + { + "available_metrics": ["impressions", "spend"], + "available_reporting_frequencies": ["hourly", "daily"], + "date_range_support": "date_range", + "supports_webhooks": False, + "expected_delay_minutes": 60, + "timezone": "UTC", + }, + ) + data.setdefault("delivery_measurement", {"provider": "internal"}) for i, p in enumerate(PRODUCTS): if p.get("product_id") == pid: PRODUCTS[i] = data @@ -668,6 +823,26 @@ async def seed_media_buy( media_buys[mb_id] = data return {"media_buy_id": mb_id} + async def seed_creative_format( + self, + fixture: dict[str, Any] | None = None, + format_id: str | None = None, + *, + context: Any = None, + ) -> dict[str, Any]: + data = dict(fixture or {}) + fid = ( + format_id + or (data.get("format_id") or {}).get("id") + or f"fmt-seeded-{uuid.uuid4().hex[:8]}" + ) + data.setdefault("format_id", {"agent_url": AGENT_URL, "id": fid}) + data.setdefault("name", fid) + data.setdefault("renders", []) + data.setdefault("assets", []) + seeded_creative_formats[fid] = data + return {"format_id": fid} + if __name__ == "__main__": serve( diff --git a/src/adcp/server/test_controller.py b/src/adcp/server/test_controller.py index 18c1ce720..5a588cf30 100644 --- a/src/adcp/server/test_controller.py +++ b/src/adcp/server/test_controller.py @@ -60,6 +60,7 @@ async def force_account_status(self, account_id, status): "seed_creative", "seed_plan", "seed_media_buy", + "seed_creative_format", ] _MAX_TASK_ID = 128 @@ -357,6 +358,23 @@ async def seed_media_buy( """ raise NotImplementedError + async def seed_creative_format( + self, + fixture: dict[str, Any] | None = None, + format_id: str | None = None, + *, + context: ToolContext | None = None, + ) -> dict[str, Any]: + """Pre-populate a creative format fixture for storyboard tests (AdCP 3.0.1). + + The seller MUST expose the seeded format_id in list_creative_formats + responses for the duration of the compliance session. + + Returns: + {"format_id": str} + """ + raise NotImplementedError + def _list_scenarios(store: TestControllerStore) -> list[str]: """Detect which scenarios a store actually implements. @@ -617,6 +635,12 @@ async def _handle_test_controller( media_buy_id=scenario_params.get("media_buy_id"), **extra, ) + elif scenario == "seed_creative_format": + result = await method( + fixture=scenario_params.get("fixture"), + format_id=scenario_params.get("format_id"), + **extra, + ) else: return _controller_error("UNKNOWN_SCENARIO", f"Unknown scenario: {scenario}") except TestControllerError as e: @@ -635,6 +659,16 @@ async def _handle_test_controller( if isinstance(result, dict) and "success" not in result: result["success"] = True + # Echo the wire ``context`` field per the spec's + # comply-test-controller-response shape. Storyboards thread state + # across steps via the context object; sellers that don't echo + # break the storyboard runner's ``$context.`` resolution + # for downstream steps. Skip when the store already populated + # ``context`` itself (an explicit override wins). + wire_context = params.get("context") + if isinstance(result, dict) and "context" not in result and isinstance(wire_context, dict): + result["context"] = dict(wire_context) + return dict(result) @@ -683,7 +717,7 @@ async def force_account_status(self, account_id, status): from adcp.server.base import ToolContext as _ToolContext from adcp.server.serve import RequestMetadata as _RequestMetadata - async def comply_test_controller(**kwargs: Any) -> str: + async def comply_test_controller(**kwargs: Any) -> dict[str, Any]: context: _ToolContext | None = None if context_factory is not None: meta = _RequestMetadata(tool_name="comply_test_controller", transport="mcp") @@ -693,8 +727,7 @@ async def comply_test_controller(**kwargs: Any) -> str: "context_factory for comply_test_controller returned " f"{type(context).__name__}, not a ToolContext instance" ) - result = await _handle_test_controller(store, kwargs, context=context) - return json.dumps(result) + return await _handle_test_controller(store, kwargs, context=context) tool = Tool.from_function( comply_test_controller, diff --git a/tests/test_seller_agent_storyboard.py b/tests/test_seller_agent_storyboard.py new file mode 100644 index 000000000..169dfa5f8 --- /dev/null +++ b/tests/test_seller_agent_storyboard.py @@ -0,0 +1,575 @@ +"""Tests for the 5 storyboard fixture-gap fixes in examples/seller_agent.py. + +Covers: + - seed_product populates schema-required field defaults for minimal fixtures + - create_media_buy returns TERMS_REJECTED for aggressive measurement_terms + - create_media_buy round-trips targeting_overlay / property_list through storage + - get_media_buys returns persisted targeting_overlay + - update_media_buy applies targeting_overlay and property_list deltas +""" + +from __future__ import annotations + +import sys +from copy import deepcopy +from pathlib import Path +from typing import Any + +import pytest + +# examples/ is not a package; add it to the path once at import time. +_EXAMPLES = str(Path(__file__).parent.parent / "examples") +if _EXAMPLES not in sys.path: + sys.path.insert(0, _EXAMPLES) + +import seller_agent as _sa # noqa: E402 (path manipulation above is intentional) + +# --------------------------------------------------------------------------- +# Fixture: reset module-level globals before every test so tests are isolated. +# --------------------------------------------------------------------------- + +# Snapshot taken at import time — before any test mutates the module state. +_INITIAL_PRODUCTS: list[dict[str, Any]] = deepcopy(_sa.PRODUCTS) + + +@pytest.fixture(autouse=True) +def _reset_seller_state() -> Any: + """Reset all mutable module globals to their initial state before each test.""" + _sa.PRODUCTS.clear() + _sa.PRODUCTS.extend(deepcopy(_INITIAL_PRODUCTS)) + _sa.media_buys.clear() + _sa.creatives.clear() + _sa.accounts.clear() + _sa.proposals.clear() + _sa.plans.clear() + _sa.seeded_creative_formats.clear() + _sa.pending_directives.clear() + _sa.pending_task_completions.clear() + yield + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _seller() -> _sa.DemoSeller: + return _sa.DemoSeller() + + +def _store() -> _sa.DemoStore: + return _sa.DemoStore() + + +# --------------------------------------------------------------------------- +# seed_product — required field defaults (failures 1 & 2) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_seed_product_minimal_fixture_adds_required_fields() -> None: + """seed_product fills in required fields when the fixture omits them.""" + store = _store() + result = await store.seed_product(product_id="outdoor_display_q2") + assert result["product_id"] == "outdoor_display_q2" + + seeded = next(p for p in _sa.PRODUCTS if p["product_id"] == "outdoor_display_q2") + assert "name" in seeded + assert isinstance(seeded["format_ids"], list) + assert isinstance(seeded["pricing_options"], list) + assert "reporting_capabilities" in seeded + assert "delivery_measurement" in seeded + + +@pytest.mark.asyncio +async def test_seed_product_fixture_fields_not_overwritten() -> None: + """Fixture values must not be overwritten by the setdefault calls.""" + store = _store() + fixture = { + "name": "Q2 Outdoor Custom", + "delivery_type": "guaranteed", + "format_ids": [{"agent_url": "http://x", "id": "custom_format"}], + "pricing_options": [{"pricing_option_id": "po-1", "pricing_model": "cpm"}], + "reporting_capabilities": {"available_metrics": ["impressions"]}, + "delivery_measurement": {"provider": "moat"}, + "publisher_properties": [{"publisher_domain": "example.com"}], + } + await store.seed_product(fixture=fixture, product_id="outdoor_display_q2") + + seeded = next(p for p in _sa.PRODUCTS if p["product_id"] == "outdoor_display_q2") + assert seeded["name"] == "Q2 Outdoor Custom" + assert seeded["delivery_type"] == "guaranteed" + assert seeded["format_ids"] == [{"agent_url": "http://x", "id": "custom_format"}] + assert seeded["delivery_measurement"] == {"provider": "moat"} + + +@pytest.mark.asyncio +async def test_seed_product_minimal_fixture_satisfies_schema_requirements() -> None: + """A bare ``seed_product(product_id=...)`` must produce a product + that satisfies the spec's get-products-response.json schema. + + Regression for storyboard CI failures where ``publisher_properties`` + defaulted to an empty list (violates ``minItems: 1``), + ``available_reporting_frequencies`` was empty (same), and + ``format_ids`` items were missing ``agent_url``. + """ + store = _store() + await store.seed_product(product_id="schema_check_minimal") + + seeded = next(p for p in _sa.PRODUCTS if p["product_id"] == "schema_check_minimal") + + # publisher_properties: minItems 1 + assert ( + len(seeded["publisher_properties"]) >= 1 + ), f"publisher_properties must be non-empty; got {seeded['publisher_properties']}" + + # format_ids: minItems 1, each item requires {agent_url, id} + assert len(seeded["format_ids"]) >= 1 + for fmt in seeded["format_ids"]: + assert "agent_url" in fmt, f"format_ids item missing agent_url: {fmt}" + assert "id" in fmt, f"format_ids item missing id: {fmt}" + + # reporting_capabilities.available_reporting_frequencies: minItems 1 + rc = seeded["reporting_capabilities"] + assert ( + len(rc["available_reporting_frequencies"]) >= 1 + ), f"available_reporting_frequencies must be non-empty; got {rc}" + + +@pytest.mark.asyncio +async def test_seed_product_normalizes_format_ids_missing_agent_url() -> None: + """Storyboard fixtures commonly send ``format_ids: [{"id": "..."}]`` + — the bare id without the canonical ``agent_url``. The schema + requires both fields, so seed_product fills in the local AGENT_URL + for any caller-supplied format_ids item missing it. Existing + agent_url values are preserved (regression for the + fixture-fields-not-overwritten test). + """ + store = _store() + await store.seed_product( + fixture={"format_ids": [{"id": "video_15s"}, {"id": "display_300x250"}]}, + product_id="agent_url_normalize_test", + ) + + seeded = next(p for p in _sa.PRODUCTS if p["product_id"] == "agent_url_normalize_test") + for fmt in seeded["format_ids"]: + assert ( + fmt["agent_url"] == _sa.AGENT_URL + ), f"agent_url should be filled in from local AGENT_URL: {fmt}" + + # Existing agent_url MUST be preserved. + await store.seed_product( + fixture={"format_ids": [{"agent_url": "https://other.example/", "id": "x"}]}, + product_id="agent_url_preserve_test", + ) + preserved = next(p for p in _sa.PRODUCTS if p["product_id"] == "agent_url_preserve_test") + assert preserved["format_ids"][0]["agent_url"] == "https://other.example/" + + +@pytest.mark.asyncio +async def test_seed_product_format_ids_edge_cases() -> None: + """Format-id normalization edge cases — empty-string agent_url and + explicit ``None`` are both treated as missing (filled in with local + AGENT_URL); non-dict items pass through unchanged so weird storyboard + fixtures don't crash the seller.""" + store = _store() + + # Empty string should be treated as missing. + await store.seed_product( + fixture={"format_ids": [{"agent_url": "", "id": "x"}]}, + product_id="agent_url_empty_string", + ) + seeded = next(p for p in _sa.PRODUCTS if p["product_id"] == "agent_url_empty_string") + assert seeded["format_ids"][0]["agent_url"] == _sa.AGENT_URL + + # Explicit None should be treated as missing. + await store.seed_product( + fixture={"format_ids": [{"agent_url": None, "id": "x"}]}, + product_id="agent_url_explicit_none", + ) + seeded = next(p for p in _sa.PRODUCTS if p["product_id"] == "agent_url_explicit_none") + assert seeded["format_ids"][0]["agent_url"] == _sa.AGENT_URL + + # Non-dict items pass through unchanged — defensive against + # malformed fixtures (storyboard runner downstream of us). + await store.seed_product( + fixture={"format_ids": ["just-a-string", {"id": "real", "agent_url": "http://x"}]}, + product_id="agent_url_mixed_shapes", + ) + seeded = next(p for p in _sa.PRODUCTS if p["product_id"] == "agent_url_mixed_shapes") + assert seeded["format_ids"][0] == "just-a-string" + assert seeded["format_ids"][1]["agent_url"] == "http://x" + + +@pytest.mark.asyncio +async def test_seed_product_is_findable_by_create_media_buy() -> None: + """After seed_product, create_media_buy must NOT return PRODUCT_NOT_FOUND.""" + store = _store() + seller = _seller() + + await store.seed_product( + fixture={ + "pricing_options": [{"pricing_option_id": "po-q2", "pricing_model": "cpm"}], + }, + product_id="outdoor_display_q2", + ) + + resp = await seller.create_media_buy( + { + "packages": [ + { + "product_id": "outdoor_display_q2", + "pricing_option_id": "po-q2", + "budget": 5000, + } + ] + } + ) + assert resp.get("media_buy_id") is not None, f"Expected media buy, got: {resp}" + + +# --------------------------------------------------------------------------- +# measurement_terms rejection (failure 3) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_create_media_buy_terms_rejected_high_viewability() -> None: + """Per-package measurement_terms.viewability_threshold > 80 must return TERMS_REJECTED.""" + seller = _seller() + resp = await seller.create_media_buy( + { + "packages": [ + { + "product_id": "premium-homepage", + "pricing_option_id": "po-cpm-homepage", + "budget": 10000, + "measurement_terms": {"viewability_threshold": 85}, + } + ], + } + ) + errors = resp.get("errors", []) + assert any( + e.get("code") == "TERMS_REJECTED" for e in errors + ), f"Expected TERMS_REJECTED in errors, got: {resp}" + # TERMS_REJECTED is a negotiation error; recovery must be correctable, not terminal. + rejected = next(e for e in errors if e.get("code") == "TERMS_REJECTED") + assert ( + rejected.get("recovery") == "correctable" + ), f"Expected recovery=correctable, got: {rejected}" + + +@pytest.mark.asyncio +async def test_create_media_buy_terms_rejected_at_boundary() -> None: + """viewability_threshold == 80 is exactly at the limit and must be accepted.""" + seller = _seller() + resp = await seller.create_media_buy( + { + "packages": [ + { + "product_id": "premium-homepage", + "pricing_option_id": "po-cpm-homepage", + "budget": 10000, + "measurement_terms": {"viewability_threshold": 80}, + } + ], + } + ) + assert ( + resp.get("media_buy_id") is not None + ), f"viewability_threshold=80 should be accepted, got: {resp}" + + +@pytest.mark.asyncio +async def test_create_media_buy_accepts_normal_viewability() -> None: + """viewability_threshold < 80 must NOT be rejected.""" + seller = _seller() + resp = await seller.create_media_buy( + { + "packages": [ + { + "product_id": "premium-homepage", + "pricing_option_id": "po-cpm-homepage", + "budget": 10000, + "measurement_terms": {"viewability_threshold": 70}, + } + ], + } + ) + assert resp.get("media_buy_id") is not None, f"Expected success, got: {resp}" + + +@pytest.mark.asyncio +async def test_create_media_buy_terms_rejected_third_party_vendor() -> None: + """Third-party vendor (not seller's own domain) is rejected — demo + seller uses internal counter, can't honor third-party measurement.""" + seller = _seller() + resp = await seller.create_media_buy( + { + "packages": [ + { + "product_id": "premium-homepage", + "pricing_option_id": "po-cpm-homepage", + "budget": 10000, + "measurement_terms": { + "billing_measurement": { + "vendor": {"domain": "moat.com"}, + }, + }, + } + ], + } + ) + errors = resp.get("errors", []) + assert any( + e.get("code") == "TERMS_REJECTED" for e in errors + ), f"Expected TERMS_REJECTED for vendor mismatch, got: {resp}" + rejected = next(e for e in errors if e.get("code") == "TERMS_REJECTED") + assert rejected["recovery"] == "correctable" + assert "vendor" in rejected.get( + "field", "" + ), f"Expected field path to mention vendor, got: {rejected.get('field')}" + + +@pytest.mark.asyncio +async def test_create_media_buy_accepts_seller_vendor() -> None: + """vendor.domain matching the seller's own domain (example.com) or + the 'internal' marker is a buyer-relaxed term — accept.""" + seller = _seller() + for domain in ("example.com", "internal"): + resp = await seller.create_media_buy( + { + "packages": [ + { + "product_id": "premium-homepage", + "pricing_option_id": "po-cpm-homepage", + "budget": 10000, + "measurement_terms": { + "billing_measurement": {"vendor": {"domain": domain}}, + }, + } + ], + } + ) + assert ( + resp.get("media_buy_id") is not None + ), f"Seller-domain vendor {domain!r} should be accepted, got: {resp}" + + +@pytest.mark.asyncio +async def test_create_media_buy_accepts_common_measurement_windows() -> None: + """Common windows (live, c3, c7, final) are accepted; only strict + windows like post_sivt are rejected as unsupported.""" + seller = _seller() + for window in ("live", "c3", "c7", "final"): + resp = await seller.create_media_buy( + { + "packages": [ + { + "product_id": "premium-homepage", + "pricing_option_id": "po-cpm-homepage", + "budget": 10000, + "measurement_terms": { + "billing_measurement": {"measurement_window": window}, + }, + } + ], + } + ) + assert ( + resp.get("media_buy_id") is not None + ), f"Common window {window!r} should be accepted, got: {resp}" + + +@pytest.mark.asyncio +async def test_create_media_buy_terms_rejected_aggressive_variance() -> None: + """max_variance_percent < 10 is too tight for the demo seller.""" + seller = _seller() + resp = await seller.create_media_buy( + { + "packages": [ + { + "product_id": "premium-homepage", + "pricing_option_id": "po-cpm-homepage", + "budget": 10000, + "measurement_terms": { + "billing_measurement": {"max_variance_percent": 0.5}, + }, + } + ], + } + ) + errors = resp.get("errors", []) + rejected = next((e for e in errors if e.get("code") == "TERMS_REJECTED"), None) + assert rejected is not None, f"Expected TERMS_REJECTED, got: {resp}" + assert rejected["recovery"] == "correctable" + assert "variance" in rejected.get("field", "") + + +@pytest.mark.asyncio +async def test_create_media_buy_terms_rejected_aggressive_window() -> None: + """measurement_window not supported by the demo seller (no windows declared).""" + seller = _seller() + resp = await seller.create_media_buy( + { + "packages": [ + { + "product_id": "premium-homepage", + "pricing_option_id": "po-cpm-homepage", + "budget": 10000, + "measurement_terms": { + "billing_measurement": {"measurement_window": "post_sivt"}, + }, + } + ], + } + ) + errors = resp.get("errors", []) + rejected = next((e for e in errors if e.get("code") == "TERMS_REJECTED"), None) + assert rejected is not None, f"Expected TERMS_REJECTED, got: {resp}" + assert rejected["recovery"] == "correctable" + assert "window" in rejected.get("field", "") + + +@pytest.mark.asyncio +async def test_create_media_buy_terms_rejected_variance_at_boundary() -> None: + """max_variance_percent == 10 is exactly at the limit and accepted.""" + seller = _seller() + resp = await seller.create_media_buy( + { + "packages": [ + { + "product_id": "premium-homepage", + "pricing_option_id": "po-cpm-homepage", + "budget": 10000, + "measurement_terms": { + "billing_measurement": {"max_variance_percent": 10}, + }, + } + ], + } + ) + assert resp.get("media_buy_id") is not None, f"Expected success, got: {resp}" + + +# --------------------------------------------------------------------------- +# targeting_overlay round-trip on create (failures 4 & 5) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_create_media_buy_round_trips_targeting_overlay() -> None: + """targeting_overlay must survive create and appear in get_media_buys.""" + seller = _seller() + overlay = {"property_list": {"list_id": "acme_outdoor_allowlist_v1", "match": "any"}} + create_resp = await seller.create_media_buy( + { + "packages": [ + { + "product_id": "run-of-site", + "pricing_option_id": "po-cpm-ros", + "budget": 2000, + "targeting_overlay": overlay, + } + ] + } + ) + mb_id = create_resp.get("media_buy_id") + assert mb_id is not None, f"Expected media buy id, got: {create_resp}" + + get_resp = await seller.get_media_buys({"media_buy_ids": [mb_id]}) + mb_list = get_resp.get("media_buys", []) + assert mb_list, "Expected at least one media buy in response" + packages = mb_list[0].get("packages", []) + assert packages, "Expected packages in media buy" + assert ( + packages[0].get("targeting_overlay") == overlay + ), f"targeting_overlay not round-tripped: {packages[0]}" + + +# --------------------------------------------------------------------------- +# targeting_overlay round-trip on update (failure 5) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_update_media_buy_persists_targeting_overlay() -> None: + """update_media_buy must apply targeting_overlay AND + creative_assignments AND creatives deltas to the stored package + state — all three are persisted by the seller and round-tripped + through get_media_buys.""" + seller = _seller() + + # Create without any overlay or creatives + create_resp = await seller.create_media_buy( + { + "packages": [ + { + "product_id": "run-of-site", + "pricing_option_id": "po-cpm-ros", + "budget": 1500, + } + ] + } + ) + mb_id = create_resp.get("media_buy_id") + assert mb_id is not None + pkg_id = _sa.media_buys[mb_id]["packages"][0]["package_id"] + + # Update all three persisted fields in one call. + overlay = {"property_list": {"list_id": "acme_outdoor_no_match_v1", "match": "none"}} + assignments = [{"creative_id": "cr-1", "weighting": {"type": "even"}}] + creatives = [{"creative_id": "cr-1", "name": "test"}] + update_resp = await seller.update_media_buy( + { + "media_buy_id": mb_id, + "packages": [ + { + "package_id": pkg_id, + "targeting_overlay": overlay, + "creative_assignments": assignments, + "creatives": creatives, + } + ], + } + ) + assert update_resp.get("media_buy_id") == mb_id, f"Update failed: {update_resp}" + + # All three fields must be persisted on the package — round-tripping through + # get_media_buys is the storyboard contract for delivery_reporting + inventory. + persisted = _sa.media_buys[mb_id]["packages"][0] + assert persisted.get("targeting_overlay") == overlay + assert persisted.get("creative_assignments") == assignments + assert persisted.get("creatives") == creatives + + # Verify the get_media_buys response also surfaces the persisted fields. + get_resp = await seller.get_media_buys({"media_buy_ids": [mb_id]}) + pkg = get_resp["media_buys"][0]["packages"][0] + assert pkg.get("targeting_overlay") == overlay + assert pkg.get("creative_assignments") == assignments + assert pkg.get("creatives") == creatives + + +@pytest.mark.asyncio +async def test_create_media_buy_handles_non_dict_measurement_terms() -> None: + """Defensive coercion — fixtures occasionally send measurement_terms + as a non-dict (string / list / None). The seller must NOT crash with + AttributeError; treat as "no terms supplied" and accept the package.""" + seller = _seller() + for bogus in ("a-string", 123, None, ["a", "list"]): + resp = await seller.create_media_buy( + { + "packages": [ + { + "product_id": "premium-homepage", + "pricing_option_id": "po-cpm-homepage", + "budget": 10000, + "measurement_terms": bogus, + } + ], + } + ) + assert ( + resp.get("media_buy_id") is not None + ), f"Bogus measurement_terms={bogus!r} should be ignored, got: {resp}" diff --git a/tests/test_test_controller_context.py b/tests/test_test_controller_context.py index f45ba11f1..4034454a7 100644 --- a/tests/test_test_controller_context.py +++ b/tests/test_test_controller_context.py @@ -187,6 +187,76 @@ class _Store(_Mixin, TestControllerStore): # --------------------------------------------------------------------------- +async def test_dispatch_echoes_wire_context_to_response(): + """Per the comply-test-controller-response schema, the seller MUST + echo the wire ``context`` field on every response. Storyboards + thread state across steps via the context object — sellers that + don't echo break the runner's ``$context.`` resolution for + downstream steps. The dispatcher injects the wire context after + the store returns; stores that explicitly populate ``context`` + themselves win.""" + + class _Store(TestControllerStore): + async def force_account_status(self, account_id: str, status: str) -> dict[str, Any]: + return {"previous_state": "active", "current_state": status} + + wire_context = {"correlation_id": "story-123", "forced_task_id": "t_abc"} + result = await _handle_test_controller( + _Store(), + { + "scenario": "force_account_status", + "params": {"account_id": "acc-1", "status": "suspended"}, + "context": wire_context, + }, + ) + assert ( + result["context"] == wire_context + ), f"context must be echoed verbatim; got: {result.get('context')}" + + +async def test_dispatch_does_not_overwrite_store_supplied_context(): + """A store that explicitly populates ``context`` in its result + overrides the auto-echo — e.g., when the store wants to mutate + correlation_id for instrumentation purposes.""" + + class _Store(TestControllerStore): + async def force_account_status(self, account_id: str, status: str) -> dict[str, Any]: + return { + "previous_state": "active", + "current_state": status, + "context": {"correlation_id": "store-overrode-it"}, + } + + result = await _handle_test_controller( + _Store(), + { + "scenario": "force_account_status", + "params": {"account_id": "acc-1", "status": "suspended"}, + "context": {"correlation_id": "wire-original"}, + }, + ) + assert result["context"] == {"correlation_id": "store-overrode-it"} + + +async def test_dispatch_skips_context_echo_when_request_has_no_context(): + """When the wire request has no ``context`` field, the dispatcher + must NOT inject a phantom ``context: {}``. Storyboards relying on + presence-detection get the right answer.""" + + class _Store(TestControllerStore): + async def force_account_status(self, account_id: str, status: str) -> dict[str, Any]: + return {"previous_state": "active", "current_state": status} + + result = await _handle_test_controller( + _Store(), + { + "scenario": "force_account_status", + "params": {"account_id": "acc-1", "status": "suspended"}, + }, + ) + assert "context" not in result + + async def test_store_with_context_kwarg_receives_the_context(): """The primary #227 scenario: a store method that accepts ``context`` receives the ToolContext the caller passed into the dispatcher.""" @@ -318,14 +388,11 @@ def build_context(meta: RequestMetadata) -> ToolContext: tool = mcp._tool_manager._tools["comply_test_controller"] # FastMCP's tool wrapper takes the function args as kwargs. fn = tool.fn # type: ignore[attr-defined] - result_json = await fn( + result = await fn( scenario="force_account_status", params={"account_id": "acc-1", "status": "suspended"}, ) - import json - - result = json.loads(result_json) assert result["success"] is True assert result["current_state"] == "suspended" # The factory ran, built a ToolContext, and the store saw the header- @@ -340,6 +407,27 @@ def build_context(meta: RequestMetadata) -> ToolContext: assert received[0].metadata["tool_name"] == "comply_test_controller" +async def test_register_test_controller_list_scenarios_returns_dict(): + """Regression for #314 — comply_test_controller must return a dict (not a + JSON string) through the FastMCP registration path so the JS runner's + structuredContent unwrapper can read data.success and data.scenarios.""" + + class _Store(TestControllerStore): + async def force_account_status(self, account_id: str, status: str) -> dict[str, Any]: + return {"previous_state": "active", "current_state": status} + + mcp = create_mcp_server(_MinimalHandler(), name="test-agent") + register_test_controller(mcp, _Store()) + + tool = mcp._tool_manager._tools["comply_test_controller"] + fn = tool.fn # type: ignore[attr-defined] + result = await fn(scenario="list_scenarios") + + assert isinstance(result, dict), "must be a dict, not a JSON string" + assert result["success"] is True + assert "force_account_status" in result["scenarios"] + + async def test_register_test_controller_rejects_non_toolcontext_from_factory(): """Guard rail — a factory that returns a dict instead of a ToolContext fails loudly at call time, not deep inside the store.""" From 80eccf01f3af98f7966cb22c6c16b4264bd9e1f0 Mon Sep 17 00:00:00 2001 From: Brian O'Kelley Date: Thu, 30 Apr 2026 15:48:21 -0400 Subject: [PATCH 18/18] =?UTF-8?q?fix(decisioning):=20round-5=20Emma=20revi?= =?UTF-8?q?ew=20=E2=80=94=20spec=20drift=20+=20governance=20+=20wire=20sha?= =?UTF-8?q?pe?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three P0 blockers + four P1 review items from a fresh read of the foundation PR against the canonical spec at schemas/cache/enums/specialism.json. P0: - Drop sales-streaming-tv, sales-exchange, sales-retail-media from REQUIRED_METHODS_PER_SPECIALISM — none of them exist in the spec enum. Add SPEC_SPECIALISM_ENUM constant mirroring the on-disk enum, with a unit test that pins it to the schema cache so out-of-band drift surfaces in CI. Typo detection now runs against the full spec enum, not just the v6.0 enforced subset; an unknown slug that matches a real spec slug we don't yet enforce method coverage for emits a distinct "spec-recognized but unenforced" UserWarning (separate from the "novel" forward-compat warning). - Add governance-aware-seller to GOVERNANCE_SPECIALISMS. Without it, a seller agent claiming the slug skipped the governance-aware/StateReader fail-fast — silent governance-gate bypass. - Drop task_type from the synchronous Submitted wire envelope per schemas/cache/core/protocol-envelope.json. The field stays on TaskRecord (tasks/get reads it) but the wire never carries the Python method name. P1: - InMemoryTaskRegistry.update_progress: terminal-state guard. A straggler progress write against a completed/failed task no longer resurrects "working" appearance against tasks/get readers holding the prior terminal state. - ExplicitAccounts: drop the unsupported "auth-info available for scope checks" claim from the docstring — del auth_info actually discards it. Adopters needing principal-vs-account scope checks implement AccountStore directly. - TaskRegistry production-mode gate: distinguish "is_durable marker absent" (programmer error, fails fast in any env) from "marker present and False in prod" (deployment misconfig). Without the split, a duck-typed registry without the marker would surface a misleading "non-durable refused" error. - handler.py: clarify the cast() lines are static-typing hints, not runtime validation. Adopters returning plain dicts that match the wire shape are supported by the framework's transport layer. Cleanup: prune docstring references to the dropped fake specialism slugs in specialisms/sales.py. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/adcp/decisioning/accounts.py | 24 ++- src/adcp/decisioning/dispatch.py | 160 +++++++++++------- src/adcp/decisioning/handler.py | 10 +- src/adcp/decisioning/platform.py | 11 +- src/adcp/decisioning/serve.py | 25 +++ src/adcp/decisioning/specialisms/sales.py | 28 ++- src/adcp/decisioning/task_registry.py | 26 ++- .../test_decisioning_context_state_resolve.py | 14 +- tests/test_decisioning_dispatch.py | 95 ++++++++++- tests/test_decisioning_handler.py | 5 +- tests/test_decisioning_serve.py | 55 +++++- tests/test_decisioning_task_registry.py | 43 +++++ ..._hello_seller_async_handoff_integration.py | 7 +- 13 files changed, 396 insertions(+), 107 deletions(-) diff --git a/src/adcp/decisioning/accounts.py b/src/adcp/decisioning/accounts.py index ff6c7e58e..8d81ca232 100644 --- a/src/adcp/decisioning/accounts.py +++ b/src/adcp/decisioning/accounts.py @@ -157,10 +157,19 @@ class ExplicitAccounts(Generic[TMeta]): The framework passes ``ref`` from the parsed request body (typically ``request.account``); ``resolve`` reads - ``ref["account_id"]`` and looks up the account via the adopter- - supplied ``loader``. Auth-info is available for scope checks - (e.g., reject if principal lacks access to the requested account) - but does not drive resolution. + ``ref["account_id"]`` and routes through the adopter-supplied + ``loader``. The wire ref is the source of truth for *which* + account to resolve. + + Auth scope checks (does this principal have access to the + requested account?) are NOT performed by ``ExplicitAccounts.resolve`` + — the default loader signature only takes ``account_id``. Adopters + needing principal-vs-account scope enforcement implement the + :class:`AccountStore` Protocol directly with a custom resolve that + reads ``auth_info``, OR add a request middleware that runs before + the handler. The framework does NOT silently bind ``auth_info`` to + the lookup; if your loader returns an account a principal shouldn't + see, you've shipped a cross-tenant data leak. Example:: @@ -185,7 +194,12 @@ def resolve( ref: dict[str, Any] | None, auth_info: AuthInfo | None = None, ) -> Awaitable[Account[TMeta]] | Account[TMeta]: - del auth_info # explicit mode ignores auth (scope checks happen elsewhere) + # Explicit mode resolves purely off the wire ref. Adopters + # needing principal-vs-account scope checks implement + # AccountStore directly (see class docstring). The loader + # signature is account_id-only by contract, so auth_info isn't + # threaded through here. + del auth_info if not ref or not ref.get("account_id"): from adcp.decisioning.types import AdcpError diff --git a/src/adcp/decisioning/dispatch.py b/src/adcp/decisioning/dispatch.py index aa9b120c3..a61491c16 100644 --- a/src/adcp/decisioning/dispatch.py +++ b/src/adcp/decisioning/dispatch.py @@ -65,6 +65,47 @@ logger = logging.getLogger(__name__) +# --------------------------------------------------------------------------- +# Specialism enum — spec slugs known to the framework +# --------------------------------------------------------------------------- + +#: Canonical spec specialism enum, mirrored verbatim from +#: ``schemas/cache/enums/specialism.json``. Used by +#: :func:`validate_platform` for typo suggestions: an unknown slug that +#: close-matches anything in ``SPEC_SPECIALISM_ENUM`` is treated as a +#: typo (hard fail with "did you mean…"); a slug that doesn't close-match +#: any spec value is forward-compat-tolerated via UserWarning. +#: +#: Drift policy: when the spec adds a specialism, bump this constant. +#: A unit test (``test_spec_specialism_enum_matches_schema_cache``) reads +#: the on-disk enum and asserts equality, so out-of-band drift surfaces +#: in CI. +SPEC_SPECIALISM_ENUM: frozenset[str] = frozenset( + { + "audience-sync", + "brand-rights", + "collection-lists", + "content-standards", + "creative-ad-server", + "creative-generative", + "creative-template", + "governance-aware-seller", + "governance-delivery-monitor", + "governance-spend-authority", + "property-lists", + "sales-broadcast-tv", + "sales-catalog-driven", + "sales-guaranteed", + "sales-non-guaranteed", + "sales-proposal-mode", + "sales-social", + "signal-marketplace", + "signal-owned", + "signed-requests", + } +) + + # --------------------------------------------------------------------------- # REQUIRED_METHODS_PER_SPECIALISM — what each specialism must implement # --------------------------------------------------------------------------- @@ -73,16 +114,20 @@ #: ``capabilities.specialisms`` against this map at server boot and #: fail-fasts when a claimed specialism is missing methods. #: -#: Keyed by specialism slug (matches the AdCP wire enum in -#: ``schemas/cache/enums/specialism.json``). v6.0 ships ``sales-*``; -#: v6.1 adds the rest as new specialism Protocols land. +#: Keyed by specialism slug — every key MUST also appear in +#: :data:`SPEC_SPECIALISM_ENUM` (the on-disk spec enum). v6.0 ships +#: enforced method coverage for the sales-* slugs the framework provides +#: a Protocol for; non-sales spec slugs (audience-sync, signal-*, +#: creative-*, governance-*, brand-rights, collection-lists, +#: content-standards, property-lists) emit an "unenforced specialism" +#: UserWarning until their per-Protocol coverage lands in v6.1+. #: #: Drift policy: when a specialism Protocol gains a required method, -#: bump this map AND add a v6.x migration note. ``validate_platform`` -#: tolerates *unknown* specialisms (forward-compat with v6.x+ specs) -#: but only via UserWarning — see D14 round-3. +#: bump this map AND add a v6.x migration note. The v6.0 enforced subset +#: is intentionally narrow — adding a method here without a Protocol +#: behind it would break adopters mid-version. REQUIRED_METHODS_PER_SPECIALISM: dict[str, frozenset[str]] = { - # All nine sales-* specialisms share the unified hybrid SalesPlatform + # Five sales-* specialisms share the unified hybrid SalesPlatform # surface. Per the SalesPlatform docstring, every sales-* claim # requires the five core methods. The four optional methods # (get_media_buys, provide_performance_feedback, @@ -116,15 +161,6 @@ "get_media_buy_delivery", } ), - "sales-streaming-tv": frozenset( - { - "get_products", - "create_media_buy", - "update_media_buy", - "sync_creatives", - "get_media_buy_delivery", - } - ), "sales-social": frozenset( { "get_products", @@ -134,15 +170,6 @@ "get_media_buy_delivery", } ), - "sales-exchange": frozenset( - { - "get_products", - "create_media_buy", - "update_media_buy", - "sync_creatives", - "get_media_buy_delivery", - } - ), "sales-proposal-mode": frozenset( { "get_products", @@ -152,11 +179,8 @@ "get_media_buy_delivery", } ), - # Catalog-driven and retail-media require the sales core PLUS - # sync_catalogs (to push the inventory taxonomy). v6.1 adds - # log_event + sync_event_sources for retail-media; for v6.0 alpha - # we leave those off the required list so adopters can ship sales - # core first. + # Catalog-driven requires the sales core PLUS sync_catalogs (to push + # the inventory taxonomy). "sales-catalog-driven": frozenset( { "get_products", @@ -167,16 +191,6 @@ "sync_catalogs", } ), - "sales-retail-media": frozenset( - { - "get_products", - "create_media_buy", - "update_media_buy", - "sync_creatives", - "get_media_buy_delivery", - "sync_catalogs", - } - ), } @@ -268,19 +282,27 @@ def validate_platform(platform: DecisioningPlatform) -> None: missing.append((specialism, method_name)) if unknown: - # Round-4 DX review: an unknown specialism that's a close - # spelling match to a known one is almost always a typo (e.g., - # "sales-non-guarateed" missing the second 'n'). Adopters running - # python hello_seller.py won't see UserWarning + 0 tools as a - # red flag — the server boots, advertises nothing, silently 404s. - # Promote close matches to a hard fail with a "Did you mean…" - # hint; truly novel slugs still get the soft UserWarning for - # forward-compat with v6.x+ specs. - known = sorted(REQUIRED_METHODS_PER_SPECIALISM.keys()) + # Three buckets: + # - typo: close-match to any spec slug → hard fail with hint + # - unenforced: spec-recognized but no method-coverage rules in + # this framework version → soft UserWarning (Protocol lands + # in v6.1+) + # - novel: not in spec at all → forward-compat UserWarning + # The typo detector compares against the full spec enum (not just + # REQUIRED_METHODS keys) so misspelling a spec slug we don't yet + # enforce still surfaces as a typo. + spec_known = sorted(SPEC_SPECIALISM_ENUM) typo_suggestions: list[tuple[str, str]] = [] + unenforced: list[str] = [] novel: list[str] = [] for slug in unknown: - close = difflib.get_close_matches(slug, known, n=1, cutoff=0.7) + if slug in SPEC_SPECIALISM_ENUM: + # Spec-recognized but not in REQUIRED_METHODS — adopter + # claimed a real spec slug whose Protocol hasn't shipped + # method-coverage rules yet. + unenforced.append(slug) + continue + close = difflib.get_close_matches(slug, spec_known, n=1, cutoff=0.7) if close: typo_suggestions.append((slug, close[0])) else: @@ -297,26 +319,40 @@ def validate_platform(platform: DecisioningPlatform) -> None: f"that look like typos: {hints}. " "Forward-compat tolerance applies only to genuinely " "novel specialism slugs (not close spelling matches). " - f"Known v6.0 specialisms: {known}" + f"Known spec specialisms: {spec_known}" ), recovery="terminal", details={ "typo_suggestions": [ {"claimed": slug, "did_you_mean": match} for slug, match in typo_suggestions ], - "known_specialisms": known, + "spec_specialisms": spec_known, }, ) + if unenforced: + warnings.warn( + ( + f"DecisioningPlatform claims spec-recognized specialism(s) " + f"{sorted(unenforced)!r} that this framework version " + f"doesn't yet enforce method coverage for. The claim is " + f"valid; required-method validation is skipped until the " + f"per-Protocol coverage lands. Implement the spec methods " + f"on your platform subclass so buyers don't 404." + ), + UserWarning, + stacklevel=2, + ) + if novel: warnings.warn( ( f"DecisioningPlatform claims novel specialism(s) " - f"{sorted(novel)!r}. Your framework version predates " - f"the spec, OR you're piloting a future specialism. " - f"Required-method validation skipped; tools/list will " - f"advertise the spec set this framework version knows. " - f"Known v6.0 specialisms: {known}" + f"{sorted(novel)!r} that aren't in the spec enum at " + f"schemas/cache/enums/specialism.json. Your framework " + f"version predates the spec, OR you're piloting a future " + f"specialism. Required-method validation skipped. " + f"Known spec specialisms: {spec_known}" ), UserWarning, stacklevel=2, @@ -750,11 +786,16 @@ async def _run() -> None: _BACKGROUND_HANDOFF_TASKS.add(bg_task) bg_task.add_done_callback(_BACKGROUND_HANDOFF_TASKS.discard) - # Wire ``Submitted`` envelope per spec. + # Wire ``Submitted`` envelope per + # ``schemas/cache/core/protocol-envelope.json``: only ``task_id`` + + # ``status`` are framework-emitted at this layer; the per-tool + # ``payload`` is empty for the submitted state. ``task_type`` is + # deliberately NOT on the wire — it lives on TaskRecord for + # ``tasks/get`` reads only, since the Python method name leaking to + # buyers would couple the wire to handler-internal naming. return { "task_id": task_id, "status": "submitted", - "task_type": method_name, } @@ -768,6 +809,7 @@ async def _run() -> None: __all__ = [ "REQUIRED_METHODS_PER_SPECIALISM", + "SPEC_SPECIALISM_ENUM", "compose_caller_identity", "validate_platform", ] diff --git a/src/adcp/decisioning/handler.py b/src/adcp/decisioning/handler.py index 0208c5cf6..49ddc08cf 100644 --- a/src/adcp/decisioning/handler.py +++ b/src/adcp/decisioning/handler.py @@ -14,8 +14,14 @@ 4. Calls :func:`_invoke_platform_method` to invoke the platform method, which projects ``TaskHandoff`` and wraps non-``AdcpError`` exceptions to the wire envelope. -5. Returns the typed response (or raises :class:`AdcpError` which the - framework's existing wire-projection codepath catches). +5. Returns whatever the platform method returned — typed Pydantic + response, plain dict matching the wire shape, or the ``Submitted`` + envelope dict from a TaskHandoff projection. The ``cast()`` on each + shim is a static-typing hint for callers; it is NOT a runtime + validation pass. The framework's transport layer + (``adcp.server.serve``) handles wire serialization for both Pydantic + and dict returns. Adopters relying on Pydantic round-trip validation + can opt in via ``response_validator`` middleware. The class-level ``advertised_tools: ClassVar[set[str]]`` declaration is auto-registered with the framework's tool-discovery seam via diff --git a/src/adcp/decisioning/platform.py b/src/adcp/decisioning/platform.py index 71af7cf2a..5cc716049 100644 --- a/src/adcp/decisioning/platform.py +++ b/src/adcp/decisioning/platform.py @@ -76,10 +76,19 @@ class DecisioningCapabilities: #: :func:`adcp.decisioning.dispatch.validate_platform` — silent #: governance-gate skipping is a security regression the framework #: refuses to ship. +#: +#: Mirrors every ``governance-*`` slug in +#: ``schemas/cache/enums/specialism.json`` — including +#: ``governance-aware-seller``. A seller agent that composes with a +#: buyer's governance agent reads governance context per-request; the +#: gate must catch it claiming the specialism without wiring the +#: StateReader, just like the spend-authority and delivery-monitor +#: governance agents themselves. GOVERNANCE_SPECIALISMS: frozenset[str] = frozenset( { - "governance-spend-authority", + "governance-aware-seller", "governance-delivery-monitor", + "governance-spend-authority", } ) diff --git a/src/adcp/decisioning/serve.py b/src/adcp/decisioning/serve.py index dc1034945..ddf998085 100644 --- a/src/adcp/decisioning/serve.py +++ b/src/adcp/decisioning/serve.py @@ -151,7 +151,32 @@ def create_adcp_server_from_platform( # defaulting safe. if registry is None: registry = InMemoryTaskRegistry() + # Round-5 Emma P1: an adopter duck-typing TaskRegistry without the + # is_durable marker would treat the missing attribute as False and + # silently trip the production gate — operator sees "non-durable + # registry refused" with no clear cause. Distinguish "marker + # absent" from "marker present and False" so the diagnostic + # points at the real problem. + has_marker = hasattr(type(registry), "is_durable") or hasattr(registry, "is_durable") is_durable = bool(getattr(registry, "is_durable", False)) + if not has_marker: + raise AdcpError( + "INVALID_REQUEST", + message=( + f"TaskRegistry impl {type(registry).__name__!r} is missing " + "the ``is_durable: ClassVar[bool]`` marker. The framework's " + "production-mode gate requires every registry to declare " + "durability explicitly — set ``is_durable = True`` (durable " + "backing store like Postgres/Redis) or ``is_durable = False`` " + "(in-memory / lossy). Without the marker, the gate would " + "silent-deny the registry with a confusing 'non-durable' " + "error." + ), + recovery="terminal", + details={ + "registry": type(registry).__name__, + }, + ) if not is_durable and _is_production_env(): opt_in = os.environ.get("ADCP_DECISIONING_ALLOW_INMEMORY_TASKS", "").strip() if opt_in != "1": diff --git a/src/adcp/decisioning/specialisms/sales.py b/src/adcp/decisioning/specialisms/sales.py index e67b010c1..886005bc9 100644 --- a/src/adcp/decisioning/specialisms/sales.py +++ b/src/adcp/decisioning/specialisms/sales.py @@ -1,13 +1,14 @@ -"""SalesPlatform Protocol — covers all nine ``sales-*`` specialisms. - -A platform claiming any of ``sales-non-guaranteed``, ``sales-guaranteed``, -``sales-broadcast-tv``, ``sales-streaming-tv``, ``sales-social``, -``sales-exchange``, ``sales-proposal-mode``, ``sales-catalog-driven``, -``sales-retail-media`` implements the methods on this Protocol. The -unified hybrid shape collapses 14 method names from v1's dual-method -design (``createMediaBuy`` + ``createMediaBuyTask``) into 7: each -mutating tool returns ``SalesResult[TSuccess]`` so adopters branch per -call between the sync fast path and the HITL slow path. +"""SalesPlatform Protocol — covers every ``sales-*`` specialism. + +A platform claiming any of the spec ``sales-*`` slugs +(``sales-non-guaranteed``, ``sales-guaranteed``, ``sales-broadcast-tv``, +``sales-social``, ``sales-proposal-mode``, ``sales-catalog-driven``) +implements the methods on this Protocol. The slugs mirror +``schemas/cache/enums/specialism.json``. The unified hybrid shape +collapses 14 method names from v1's dual-method design +(``createMediaBuy`` + ``createMediaBuyTask``) into 7: each mutating +tool returns ``SalesResult[TSuccess]`` so adopters branch per call +between the sync fast path and the HITL slow path. Required methods (every sales-* specialism): @@ -24,11 +25,8 @@ * :meth:`provide_performance_feedback` * :meth:`list_creative_formats` * :meth:`list_creatives` -* :meth:`sync_catalogs` — required when claiming ``sales-catalog-driven`` - or ``sales-retail-media`` -* :meth:`log_event` — required when claiming ``sales-retail-media`` -* :meth:`sync_event_sources` — required when claiming - ``sales-retail-media`` +* :meth:`sync_catalogs` — required when claiming + ``sales-catalog-driven`` The framework's :func:`validate_platform` walks ``capabilities.specialisms`` and confirms each specialism's required methods exist on the platform diff --git a/src/adcp/decisioning/task_registry.py b/src/adcp/decisioning/task_registry.py index a204c9096..08a348f30 100644 --- a/src/adcp/decisioning/task_registry.py +++ b/src/adcp/decisioning/task_registry.py @@ -70,8 +70,10 @@ class TaskRecord: :meth:`TaskRegistry.complete` / :meth:`TaskRegistry.fail` at the end of the handoff fn. :param task_type: Wire-spec task type (``'create_media_buy'``, - ``'sync_creatives'``, etc.). Mirrors the Submitted envelope's - ``task_type`` field so ``tasks/get`` round-trips correctly. + ``'sync_creatives'``, etc.). Stored on the registry record so + ``tasks/get`` can return it on the response payload; NOT part + of the synchronous Submitted envelope (per + ``schemas/cache/core/protocol-envelope.json``). :param progress: Latest progress payload written by :meth:`TaskHandoffContext.update`. Buyers see this on ``tasks/get`` while the task is in the ``working`` state. @@ -192,8 +194,9 @@ async def issue( :param account_id: Account that owns the task. Drives the cross-tenant access check on subsequent reads. :param task_type: Wire-spec task type (``'create_media_buy'``, - etc.). Surfaced on the Submitted envelope and on - ``tasks/get`` reads. + etc.). Persisted on the row and surfaced on ``tasks/get`` + reads; NOT included in the synchronous Submitted envelope + (per ``schemas/cache/core/protocol-envelope.json``). :returns: The framework-allocated task_id (string UUID). """ ... @@ -339,6 +342,21 @@ async def update_progress( # to never raise on transient lookup failure (see Protocol # docstring). return + if record.state in ("completed", "failed"): + # Terminal-state guard: a late progress update from a + # straggler coroutine MUST NOT mutate a finalized record + # — it would resurrect "working" appearance against + # ``tasks/get`` reads that already saw the terminal + # state. Log + drop is the safe choice (the dispatch + # wrapper is expected to swallow update failures + # anyway). + logger.warning( + "InMemoryTaskRegistry.update_progress(task_id=%s) " + "dropped: task is already in terminal state %r", + task_id, + record.state, + ) + return record.progress = dict(progress) if record.state == "submitted": record.state = "working" diff --git a/tests/test_decisioning_context_state_resolve.py b/tests/test_decisioning_context_state_resolve.py index fb635fde9..3b31eb2a4 100644 --- a/tests/test_decisioning_context_state_resolve.py +++ b/tests/test_decisioning_context_state_resolve.py @@ -392,13 +392,19 @@ def test_capabilities_governance_aware_defaults_false() -> None: def test_governance_specialisms_pinned() -> None: - """The constant tracks the AdCP 3.0 governance-* specialism set. - Drift here is a foundation-PR-level decision; this test is the - locked contract.""" + """The constant tracks every ``governance-*`` slug in the spec + enum (``schemas/cache/enums/specialism.json``). Drift here is a + foundation-PR-level decision; this test is the locked contract. + + Includes ``governance-aware-seller`` — a seller agent that + composes with a buyer's governance agent reads governance context + per-request, so the gate must catch it claiming the specialism + without wiring the StateReader (round-5 Emma P0).""" assert GOVERNANCE_SPECIALISMS == frozenset( { - "governance-spend-authority", + "governance-aware-seller", "governance-delivery-monitor", + "governance-spend-authority", } ) diff --git a/tests/test_decisioning_dispatch.py b/tests/test_decisioning_dispatch.py index fd7a169d9..01a74a697 100644 --- a/tests/test_decisioning_dispatch.py +++ b/tests/test_decisioning_dispatch.py @@ -27,6 +27,7 @@ ) from adcp.decisioning.dispatch import ( REQUIRED_METHODS_PER_SPECIALISM, + SPEC_SPECIALISM_ENUM, _build_request_context, _invoke_platform_method, _project_handoff, @@ -222,7 +223,11 @@ class _NoClaimsPlatform(DecisioningPlatform): def test_required_methods_per_specialism_pinned_for_sales() -> None: """Contract test — locks the sales core method set so future - spec churn surfaces as a visible test failure.""" + spec churn surfaces as a visible test failure. Slugs covered are + only those in the spec enum that the v6.0 framework enforces + method coverage for; non-sales spec slugs (signal-*, audience-sync, + creative-*, governance-*) emit "unenforced specialism" UserWarning + until their per-Protocol coverage lands in v6.1+.""" expected_core = { "get_products", "create_media_buy", @@ -234,14 +239,78 @@ def test_required_methods_per_specialism_pinned_for_sales() -> None: "sales-non-guaranteed", "sales-guaranteed", "sales-broadcast-tv", - "sales-streaming-tv", "sales-social", - "sales-exchange", "sales-proposal-mode", ): assert REQUIRED_METHODS_PER_SPECIALISM[slug] == expected_core, f"sales core drift on {slug}" +def test_required_methods_only_contains_spec_slugs() -> None: + """Every key in REQUIRED_METHODS_PER_SPECIALISM MUST be a real + spec specialism slug. Round-5 Emma review: shipping invented slugs + (e.g. ``sales-streaming-tv``) made adopters claiming non-spec + specialisms pass validation — silent buyer compatibility break.""" + invented = set(REQUIRED_METHODS_PER_SPECIALISM.keys()) - SPEC_SPECIALISM_ENUM + assert invented == set(), ( + f"REQUIRED_METHODS_PER_SPECIALISM contains slugs not in the spec " + f"enum: {sorted(invented)}. Either drop them or add the slug to " + f"schemas/cache/enums/specialism.json upstream." + ) + + +def test_spec_specialism_enum_matches_schema_cache() -> None: + """SPEC_SPECIALISM_ENUM mirrors ``schemas/cache/enums/specialism.json`` + verbatim. CI catches out-of-band drift when the schema cache + refreshes from upstream.""" + import json + from pathlib import Path + + schema_path = Path(__file__).parent.parent / "schemas" / "cache" / "enums" / "specialism.json" + with schema_path.open() as f: + on_disk = frozenset(json.load(f)["enum"]) + assert SPEC_SPECIALISM_ENUM == on_disk, ( + f"SPEC_SPECIALISM_ENUM drifted from on-disk spec enum. " + f"Missing from constant: {sorted(on_disk - SPEC_SPECIALISM_ENUM)}; " + f"extra in constant: {sorted(SPEC_SPECIALISM_ENUM - on_disk)}." + ) + + +def test_validate_platform_warns_on_unenforced_spec_specialism() -> None: + """Spec-recognized specialism that the v6.0 framework doesn't yet + enforce (e.g. ``signal-marketplace``) emits an "unenforced + specialism" UserWarning — distinct from the "novel" warning, since + it's a real claim, just not method-checked.""" + + class _UnenforcedSpecPlatform(DecisioningPlatform): + capabilities = DecisioningCapabilities(specialisms=["signal-marketplace"]) + accounts = SingletonAccounts(account_id="hello") + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always", UserWarning) + validate_platform(_UnenforcedSpecPlatform()) + matched = [w for w in caught if "signal-marketplace" in str(w.message)] + assert len(matched) == 1 + assert "spec-recognized" in str(matched[0].message) + + +def test_validate_platform_typo_check_uses_spec_enum() -> None: + """Typo detector matches against the full spec enum, not just + REQUIRED_METHODS keys. A typo of ``signal-marketplace`` (a spec + slug we don't yet enforce coverage for) still trips the hard fail + with a "did you mean…" hint.""" + + class _TypoOfSpecSlugPlatform(DecisioningPlatform): + # Missing 'l' in "marketplace". + capabilities = DecisioningCapabilities(specialisms=["signal-marketpace"]) + accounts = SingletonAccounts(account_id="hello") + + with pytest.raises(AdcpError) as exc_info: + validate_platform(_TypoOfSpecSlugPlatform()) + assert exc_info.value.code == "INVALID_REQUEST" + msg = str(exc_info.value).lower() + assert "did you mean 'signal-marketplace'" in msg + + # ---- compose_caller_identity (D9 round-3) ---- @@ -630,8 +699,11 @@ async def update_media_buy(self, media_buy_id, patch, ctx): async def test_handoff_returns_submitted_envelope( executor: ThreadPoolExecutor, ) -> None: - """The synchronous return is the wire Submitted envelope — - {task_id, status, task_type}. Buyer pattern-matches on shape.""" + """The synchronous return is the wire Submitted envelope per + ``schemas/cache/core/protocol-envelope.json`` — only ``task_id`` + + ``status``. ``task_type`` lives on TaskRecord (for tasks/get + reads) but never on the wire envelope; leaking the Python method + name would couple the wire to handler-internal naming.""" registry = InMemoryTaskRegistry() ctx = _build_request_context(ToolContext(), Account(id="acct_a"), None) completed = asyncio.Event() @@ -649,8 +721,10 @@ async def _handoff_fn(task_ctx): executor=executor, ) assert envelope["status"] == "submitted" - assert envelope["task_type"] == "create_media_buy" assert envelope["task_id"].startswith("task_") + # Spec: Submitted wire envelope is {task_id, status} only. + assert "task_type" not in envelope + assert set(envelope.keys()) == {"task_id", "status"} # Wait for the background task to complete so the assertion below # is deterministic. (CI may schedule background tasks slowly.) @@ -658,9 +732,12 @@ async def _handoff_fn(task_ctx): # Yield once more so the registry.complete() call lands. await asyncio.sleep(0.05) + # task_type IS on TaskRecord (registry surface) — buyer-side + # tasks/get round-trips it; handler-internal use only. rec = await registry.get(envelope["task_id"], expected_account_id="acct_a") assert rec is not None assert rec["state"] == "completed" + assert rec["task_type"] == "create_media_buy" assert rec["result"] == {"media_buy_id": "mb_1"} @@ -859,7 +936,9 @@ async def create_media_buy(self, req, ctx): executor=executor, registry=registry, ) - # Returned the wire envelope, NOT the handoff marker. + # Returned the wire envelope, NOT the handoff marker. The wire + # shape is {task_id, status} only — task_type lives on the + # registry for tasks/get reads. assert isinstance(result, dict) assert result["status"] == "submitted" - assert result["task_type"] == "create_media_buy" + assert "task_type" not in result diff --git a/tests/test_decisioning_handler.py b/tests/test_decisioning_handler.py index 1e68c651d..1edd15d08 100644 --- a/tests/test_decisioning_handler.py +++ b/tests/test_decisioning_handler.py @@ -237,10 +237,11 @@ async def create_media_buy(self, req, ctx): ), ToolContext(), ) - # Wire envelope, not Pydantic. + # Wire envelope, not Pydantic. Spec submitted shape is + # {task_id, status} only. assert isinstance(result, dict) assert result["status"] == "submitted" - assert result["task_type"] == "create_media_buy" + assert "task_type" not in result # ---- update_media_buy — arg-projected (media_buy_id, patch, ctx) ---- diff --git a/tests/test_decisioning_serve.py b/tests/test_decisioning_serve.py index 8c7587b4e..f3f09f2ed 100644 --- a/tests/test_decisioning_serve.py +++ b/tests/test_decisioning_serve.py @@ -231,12 +231,18 @@ class _InstrumentedInMemoryRegistry(InMemoryTaskRegistry): assert "_InstrumentedInMemoryRegistry" in str(exc_info.value) -def test_create_raises_when_duck_typed_non_durable_used_in_production() -> None: - """Custom registry with no is_durable marker (defaults False via - getattr) trips the gate. Adopters MUST explicitly opt into - is_durable=True; safe-by-default.""" +def test_create_raises_when_registry_missing_is_durable_marker() -> None: + """Round-5 Emma P1: a custom registry without the ``is_durable`` + marker fails fast at server boot — the framework refuses to guess + whether the registry is durable. The diagnostic distinguishes + "marker absent" (programmer error) from "marker=False in prod" + (deployment misconfig). Without this guard, the prod gate's + ``getattr(..., False)`` would treat the missing marker as + non-durable and emit a misleading "non-durable refused" error.""" class _BareRegistry: + # NO is_durable declared — programmer error. + async def issue(self, *, account_id, task_type): return "task_x" @@ -253,13 +259,52 @@ async def get(self, task_id, *, expected_account_id=None): return None platform = _BarePlatform() - with patch.dict(os.environ, {"ADCP_ENV": "production"}): + # Fires regardless of env — the marker is the programmer-facing + # contract, not the deployment gate. + with patch.dict(os.environ, {"ADCP_ENV": "dev"}): os.environ.pop("ADCP_DECISIONING_ALLOW_INMEMORY_TASKS", None) with pytest.raises(AdcpError) as exc_info: create_adcp_server_from_platform( platform, registry=_BareRegistry() # type: ignore[arg-type] ) assert exc_info.value.code == "INVALID_REQUEST" + assert "is_durable" in str(exc_info.value) + assert "missing" in str(exc_info.value).lower() + + +def test_create_raises_when_duck_typed_non_durable_used_in_production() -> None: + """Custom registry that explicitly declares is_durable=False trips + the prod gate. Distinct from the missing-marker case above — this + one is a deployment misconfig, not a programmer error.""" + + class _ExplicitlyNonDurableRegistry: + is_durable = False # explicit opt-out, just no opt-in env var + + async def issue(self, *, account_id, task_type): + return "task_x" + + async def update_progress(self, task_id, progress): + pass + + async def complete(self, task_id, result): + pass + + async def fail(self, task_id, error): + pass + + async def get(self, task_id, *, expected_account_id=None): + return None + + platform = _BarePlatform() + with patch.dict(os.environ, {"ADCP_ENV": "production"}): + os.environ.pop("ADCP_DECISIONING_ALLOW_INMEMORY_TASKS", None) + with pytest.raises(AdcpError) as exc_info: + create_adcp_server_from_platform( + platform, + registry=_ExplicitlyNonDurableRegistry(), # type: ignore[arg-type] + ) + assert exc_info.value.code == "INVALID_REQUEST" + assert "Non-durable" in str(exc_info.value) def test_create_passes_in_dev_env_with_default_registry() -> None: diff --git a/tests/test_decisioning_task_registry.py b/tests/test_decisioning_task_registry.py index b4f29a4f2..cb256c94a 100644 --- a/tests/test_decisioning_task_registry.py +++ b/tests/test_decisioning_task_registry.py @@ -213,6 +213,49 @@ async def test_complete_unknown_task_raises() -> None: await reg.complete("nonexistent", {"x": 1}) +@pytest.mark.asyncio +async def test_update_progress_after_completed_is_dropped(caplog) -> None: + """Round-5 Emma P1: a straggler progress update arriving after + ``complete`` MUST NOT mutate the record. ``tasks/get`` already + returned the terminal payload; rewriting the row would resurrect + a "working" appearance against any reader holding the prior + state.""" + reg = InMemoryTaskRegistry() + tid = await reg.issue(account_id="acct_a", task_type="create_media_buy") + await reg.complete(tid, {"media_buy_id": "mb_1"}) + + import logging + + with caplog.at_level(logging.WARNING, logger="adcp.decisioning.task_registry"): + await reg.update_progress(tid, {"step": 99}) + + rec = await reg.get(tid, expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "completed" + assert rec["progress"] is None # drop, not write + assert rec["result"] == {"media_buy_id": "mb_1"} + # Operator-visible signal — drop without trace would hide + # programmer bugs. + assert any("terminal state" in rec.message for rec in caplog.records) + + +@pytest.mark.asyncio +async def test_update_progress_after_failed_is_dropped() -> None: + """Same guard as completed: a straggler progress write against a + failed task is dropped, not applied.""" + reg = InMemoryTaskRegistry() + tid = await reg.issue(account_id="acct_a", task_type="create_media_buy") + await reg.fail(tid, {"code": "INTERNAL_ERROR", "message": "boom"}) + + await reg.update_progress(tid, {"step": 50}) + + rec = await reg.get(tid, expected_account_id="acct_a") + assert rec is not None + assert rec["state"] == "failed" + assert rec["progress"] is None + assert rec["error"] == {"code": "INTERNAL_ERROR", "message": "boom"} + + # ---- fail ---- diff --git a/tests/test_hello_seller_async_handoff_integration.py b/tests/test_hello_seller_async_handoff_integration.py index a464d79a8..f493faf84 100644 --- a/tests/test_hello_seller_async_handoff_integration.py +++ b/tests/test_hello_seller_async_handoff_integration.py @@ -147,10 +147,13 @@ async def test_create_media_buy_large_budget_returns_submitted_envelope( req = _build_request(total_budget=100_000.0, idem_suffix="enterprise") resp = await handler.create_media_buy(req, ToolContext()) - # Sync return is the Submitted envelope. + # Sync return is the Submitted envelope per + # ``schemas/cache/core/protocol-envelope.json`` — {task_id, status} + # only. ``task_type`` is registry-internal (tasks/get reads it but + # the wire never carries it). assert isinstance(resp, dict) assert resp["status"] == "submitted" - assert resp["task_type"] == "create_media_buy" + assert "task_type" not in resp task_id = resp["task_id"] assert task_id.startswith("task_")