Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
## Unreleased

- `trusted_endpoints`: registered URLs may now contain FastAPI/Express-style path placeholders. `{id}` matches exactly one path segment, `{rest:path}` matches any subtree. Plain URLs without `{` keep exact-match semantics — no migration needed for existing rows. Both `is_trusted_endpoint` and the snapshot tamper-check inside `evaluate_handoff` honor the new syntax. Closes #14.
- `set_intercept_url_allowlist`: now accepts the same `{id}` / `{rest:path}` placeholders as `trusted_endpoints` (parity, single matching helper used by both code paths). A registered `https://api.example.com/customers/{id}` covers the concrete `https://api.example.com/customers/42` for both intercept recording and the simulation tamper hook. Plain URLs without `{` keep exact-match semantics — zero migration. Closes #20.
- `json_path` (used by `field_extraction` / `schema_type` / `range_threshold`): now supports array indexing. Use bracket form (`items[0].subject`, `[0].status`) or numeric-segment fallback (`items.0.subject`). Out-of-range indices raise `IndexError` and surface as `CAUGHT` with `"out of range"` in the detail. Pure-dict paths are unchanged. Closes #23.
- README: new "Getting `PROVABLY_API_KEY` and `PROVABLY_ORG_ID`" subsection walking through sign-up at app.provably.ai → create org → Integrations menu, plus a pointer to provably.ai/docs.
- **BREAKING:** removed `default_cluster_b_url()` and the `CLUSTER_B_URL` env var — leftovers from the langgraph-demo monorepo extraction with a `localhost:8082` default and opaque "cluster B" naming the SDK has no business assuming. `post_handoff(receiver_url, payload)` (positional arg renamed from `cluster_b_url`) takes the URL directly — supply it from your application's own configuration.

Expand Down
63 changes: 49 additions & 14 deletions src/provably/handoff/eval_modes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import math
import re
from typing import Any

import jsonschema
Expand Down Expand Up @@ -31,7 +32,7 @@ def evaluate_claim(claim: HandoffClaim, indexed_root: Any) -> dict[str, Any]:

try:
at_path = _get_by_json_path(indexed_root, claim.json_path)
except (KeyError, TypeError, ValueError) as exc:
except (KeyError, IndexError, TypeError, ValueError) as exc:
return {**base, "result": "CAUGHT", "detail": f"json_path: {exc}"}

base["indexed_at_path"] = canonical_json(at_path)
Expand Down Expand Up @@ -96,22 +97,62 @@ def _eval_range_threshold(claim: HandoffClaim, at_path: Any, base: dict[str, Any
return {**base, "result": "PASS"}


def _normalize_json_path(path: str) -> str:
"""Strip JSONPath / Relaxed JSON Pointer prefixes so we only walk plain dot paths.
_BRACKET_INDEX_RE = re.compile(r"\[(\d+)\]")


Examples: ``"$.userId"`` → ``"userId"``; ``"$"`` / ``""`` → ``""`` (root); ``"a.b"`` unchanged.
def _normalize_json_path(path: str) -> str:
"""Strip JSONPath / Relaxed JSON Pointer prefixes and split bracket indices into their
own segments so the dot-tokenizer can walk them.

Examples:
- ``"$.userId"`` → ``"userId"``
- ``"$"`` / ``""`` → ``""`` (root)
- ``"a.b"`` → ``"a.b"`` (unchanged)
- ``"items[0].subject"`` → ``"items.[0].subject"`` (bracket lifted to its own segment)
- ``"[0].status"`` → ``"[0].status"`` (leading-dot stripped)
"""
p = (path or "").strip()
if not p or p == "$":
return ""
if p.startswith("$."):
return p[2:].strip()
if p.startswith("$"):
p = p[2:].strip()
elif p.startswith("$"):
# e.g. "$['x']" not supported; bare "$x" is treated as path after $
return p[1:].lstrip(".").strip()
p = p[1:].lstrip(".").strip()
# Lift bracket indices into standalone dot segments so ``items[0]`` becomes
# ``items.[0]`` and ``items[0][1]`` becomes ``items.[0].[1]``. The empty-segment
# filter in ``_get_by_json_path`` swallows any double-dots this introduces.
p = _BRACKET_INDEX_RE.sub(r".[\1]", p).lstrip(".")
return p


def _step_into(cursor: Any, segment: str) -> Any:
"""Walk one segment.

- ``[N]`` (bracket form) against a list → ``cursor[N]``.
- Numeric segment against a list → ``cursor[N]`` (fallback for ``items.0.foo``).
- Any other segment against a dict → ``cursor[segment]``.
"""
bracket = _BRACKET_INDEX_RE.fullmatch(segment)
if bracket and isinstance(cursor, list):
idx = int(bracket.group(1))
if idx >= len(cursor):
raise IndexError(f"index {idx} out of range (list has {len(cursor)} elements)")
return cursor[idx]
if isinstance(cursor, list) and segment.isdigit():
idx = int(segment)
if idx >= len(cursor):
raise IndexError(f"index {idx} out of range (list has {len(cursor)} elements)")
return cursor[idx]
if isinstance(cursor, dict):
if segment not in cursor:
raise KeyError(segment)
return cursor[segment]
raise KeyError(
f"expected dict or list at segment {segment!r}, got {type(cursor).__name__}"
)


def _get_by_json_path(obj: Any, path: str) -> Any:
rel = _normalize_json_path(path)
if not rel:
Expand All @@ -121,13 +162,7 @@ def _get_by_json_path(obj: Any, path: str) -> Any:
segment = segment.strip()
if not segment:
continue
if not isinstance(cursor, dict):
raise KeyError(
f"expected dict at segment {segment!r}, got {type(cursor).__name__}"
)
if segment not in cursor:
raise KeyError(segment)
cursor = cursor[segment]
cursor = _step_into(cursor, segment)
return cursor


Expand Down
30 changes: 26 additions & 4 deletions src/provably/intercept/interceptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
insert_intercept_row,
request_payload_dict,
)
from provably.trusted_endpoints import normalize_url_for_trust
from provably.trusted_endpoints import _matches_registered, normalize_url_for_trust

_ctx_agent_id: ContextVar[str] = ContextVar("provably_agent_id", default="")
_ctx_action_name: ContextVar[str] = ContextVar("provably_action_name", default="")
Expand Down Expand Up @@ -67,6 +67,12 @@ def set_intercept_url_allowlist(urls: list[str] | None) -> None:

Pass ``None`` to clear. Pass a list to restrict tamper to the run's dashboard endpoints;
URLs not in the set are not recorded and pass through unchanged.

Entries support the same FastAPI/Express-style placeholders as the trusted-endpoint
registry: ``{name}`` matches one path segment, ``{name:path}`` matches any subtree.
A registered ``https://api.example.com/customers/{id}`` matches the concrete URL
``https://api.example.com/customers/42``. Plain URLs without ``{`` keep exact-match
semantics.
"""
global _url_allowlist
if urls is None:
Expand All @@ -78,6 +84,22 @@ def set_intercept_url_allowlist(urls: list[str] | None) -> None:
_url_allowlist.discard("")


def _url_in_allowlist(nurl: str) -> bool:
"""Membership test for ``_url_allowlist`` that honors pattern entries.

Exact match is checked first (O(1)). Only on a miss do we iterate over pattern entries
(those containing ``{``) — plain-URL allowlists pay no per-request iteration cost.
Caller must have already confirmed ``_url_allowlist is not None``.
"""
assert _url_allowlist is not None
if nurl in _url_allowlist:
return True
for entry in _url_allowlist:
if "{" in entry and _matches_registered(nurl, entry):
return True
return False


@contextmanager
def intercept_context(
*, agent_id: str, action_name: str, intercept_index: int = 0
Expand Down Expand Up @@ -262,14 +284,14 @@ def _record_and_maybe_tamper(
"""
req = request_payload_dict(url, method, req_kwargs)
nurl = normalize_url_for_trust(str(url))
if _url_allowlist is not None and nurl not in _url_allowlist:
in_allowlist = _url_allowlist is not None and _url_in_allowlist(nurl)
if _url_allowlist is not None and not in_allowlist:
return response
if _enabled:
_insert_row(url, req, raw, method=method)
# Tamper hook fires only for explicit run endpoints; never for OpenRouter, Provably API,
# cluster handoff posts (those run with allowlist cleared or off-list).
tamper = _url_allowlist is not None and nurl in _url_allowlist
mutated = _maybe_transform_body(raw) if tamper else raw
mutated = _maybe_transform_body(raw) if in_allowlist else raw
if mutated is raw:
return response
if isinstance(response, requests.Response):
Expand Down
87 changes: 87 additions & 0 deletions tests/unit/test_eval_modes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import pytest

from provably.handoff.eval_modes import _get_by_json_path, _normalize_json_path, evaluate_claim
from provably.handoff.types import HandoffClaim

Expand All @@ -21,6 +23,91 @@ def test_get_by_path_jsonpath() -> None:
assert _get_by_json_path(obj, "$") == obj


# ---------------------------------------------------------------------------
# Array indexing in json_path (#23): both bracket form and numeric fallback.
# ---------------------------------------------------------------------------


def test_normalize_jsonpath_lifts_brackets_into_segments() -> None:
assert _normalize_json_path("items[0].subject") == "items.[0].subject"
assert _normalize_json_path("items[0][1]") == "items.[0].[1]"
assert _normalize_json_path("[0].status") == "[0].status"
assert _normalize_json_path("$.items[2].quantity") == "items.[2].quantity"


def test_get_by_path_bracket_indexing_at_root() -> None:
assert _get_by_json_path([{"status": "open"}], "[0].status") == "open"
assert _get_by_json_path([10, 20, 30], "[1]") == 20


def test_get_by_path_bracket_indexing_inside_dict() -> None:
obj = {"items": [{"a": 1}, {"a": 2}, {"a": 3}]}
assert _get_by_json_path(obj, "items[0].a") == 1
assert _get_by_json_path(obj, "items[2].a") == 3
assert _get_by_json_path(obj, "$.items[1].a") == 2


def test_get_by_path_numeric_segment_fallback_for_lists() -> None:
"""``items.0.a`` works when cursor is a list — easier shape for naive LLMs."""
obj = {"items": [{"a": 1}, {"a": 2}]}
assert _get_by_json_path(obj, "items.0.a") == 1
assert _get_by_json_path(obj, "items.1.a") == 2


def test_get_by_path_nested_lists() -> None:
"""Bracket form chains: list of lists of dicts."""
obj = {"matrix": [[{"v": "a"}, {"v": "b"}], [{"v": "c"}]]}
assert _get_by_json_path(obj, "matrix[0][1].v") == "b"
assert _get_by_json_path(obj, "matrix[1][0].v") == "c"


def test_get_by_path_index_out_of_range_raises_indexerror() -> None:
with pytest.raises(IndexError, match="out of range"):
_get_by_json_path([{"a": 1}], "[5].a")
with pytest.raises(IndexError, match="out of range"):
_get_by_json_path({"items": [1, 2]}, "items[5]")


def test_get_by_path_dict_segment_against_list_still_raises_keyerror() -> None:
"""If the path expects a dict-like step but cursor is a list (and the segment isn't
numeric or bracket form), we still raise — old error class preserved."""
with pytest.raises(KeyError):
_get_by_json_path({"a": [1, 2]}, "a.b")


def test_get_by_path_existing_dict_paths_still_work() -> None:
"""Regression: nothing about pure-dict walks should change."""
assert _get_by_json_path({"plan": "Enterprise"}, "plan") == "Enterprise"
assert _get_by_json_path({"a": {"b": 1}}, "a.b") == 1


def test_field_extraction_pass_with_array_index() -> None:
"""End-to-end via the real evaluator: list-shaped indexed value, claim against ``[0].status``."""
claim = HandoffClaim(
action_name="list_open_tickets",
claimed_value="open",
query_record_id="q1",
verification_mode="field_extraction",
json_path="[0].status",
)
indexed = [{"status": "open", "id": 42}, {"status": "closed", "id": 41}]
v = evaluate_claim(claim, indexed)
assert v["result"] == "PASS", v


def test_field_extraction_caught_when_index_out_of_range() -> None:
claim = HandoffClaim(
action_name="list_open_tickets",
claimed_value="open",
query_record_id="q1",
verification_mode="field_extraction",
json_path="[7].status",
)
v = evaluate_claim(claim, [{"status": "open"}])
assert v["result"] == "CAUGHT"
assert "out of range" in v["detail"]


def test_schema_type_passes_with_dollar_path() -> None:
"""Regression: LLM emits JSONPath ``$.userId``; we must not index ``['$']``."""
claim = HandoffClaim(
Expand Down
99 changes: 99 additions & 0 deletions tests/unit/test_interceptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,105 @@ def fake_insert(url: str, *_a: Any, **_k: Any) -> None:
interceptor.set_intercept_url_allowlist(None)


# ---------------------------------------------------------------------------
# Allowlist pattern matching ({id} / {path:path} parity with trusted_endpoints)
# ---------------------------------------------------------------------------


def _attach_with_pattern_allowlist(
monkeypatch: Any, allowlist_entries: list[str], call_url: str
) -> tuple[list[str], list[Any]]:
"""Helper: install a pattern allowlist, run _attach against ``call_url``, return
(recorded URLs, tamper-hook calls)."""
recorded: list[str] = []
tampered: list[Any] = []

monkeypatch.setattr(
interceptor, "_insert_row", lambda url, *_a, **_k: recorded.append(url)
)
monkeypatch.setattr(
interceptor, "_maybe_transform_body", lambda raw: tampered.append(raw) or raw
)
monkeypatch.setattr(interceptor, "_enabled", True)
try:
interceptor.set_intercept_url_allowlist(allowlist_entries)
resp = requests.Response()
resp.status_code = 200
resp._content = b'{"ok": true}'
resp.encoding = "utf-8"
interceptor._attach(resp, call_url, "GET", {})
finally:
interceptor.set_intercept_url_allowlist(None)
return recorded, tampered


def test_allowlist_pattern_matches_concrete_url(monkeypatch: Any) -> None:
"""Registered ``/customers/{id}`` matches the concrete ``/customers/42`` URL — both
recorded and tamper-hooked."""
recorded, tampered = _attach_with_pattern_allowlist(
monkeypatch,
["https://api.example.com/customers/{id}"],
"https://api.example.com/customers/42",
)
assert recorded == ["https://api.example.com/customers/42"]
assert len(tampered) == 1


def test_allowlist_pattern_rejects_extra_segment(monkeypatch: Any) -> None:
"""Registered ``/customers/{id}`` does NOT match ``/customers/42/orders``
(single-segment placeholder)."""
recorded, tampered = _attach_with_pattern_allowlist(
monkeypatch,
["https://api.example.com/customers/{id}"],
"https://api.example.com/customers/42/orders",
)
assert recorded == []
assert tampered == []


def test_allowlist_path_placeholder_matches_subtree(monkeypatch: Any) -> None:
"""``{rest:path}`` covers any subtree, including nested segments."""
recorded, _ = _attach_with_pattern_allowlist(
monkeypatch,
["https://api.example.com/customers/{rest:path}"],
"https://api.example.com/customers/42/orders/9",
)
assert recorded == ["https://api.example.com/customers/42/orders/9"]


def test_allowlist_mixed_exact_and_pattern(monkeypatch: Any) -> None:
"""An allowlist with both exact URLs and patterns: each entry retains its semantics."""
# Exact entry hits exactly; pattern entry hits its pattern; unrelated URL is rejected.
for url, expected_recorded in [
("https://api.example.com/health", True), # exact match
("https://api.example.com/customers/9", True), # pattern match
("https://api.example.com/customers/9/orders", False), # past pattern
("https://api.example.com/other", False), # unrelated
]:
recorded, _ = _attach_with_pattern_allowlist(
monkeypatch,
[
"https://api.example.com/health",
"https://api.example.com/customers/{id}",
],
url,
)
assert (recorded == [url]) is expected_recorded, (
f"url={url!r}: expected_recorded={expected_recorded}, got recorded={recorded}"
)


def test_allowlist_plain_url_still_uses_exact_match(monkeypatch: Any) -> None:
"""An allowlist entry without ``{`` keeps exact-match semantics — no perf regression for
the common case and no accidental prefix match."""
recorded, _ = _attach_with_pattern_allowlist(
monkeypatch,
["https://api.example.com/customers"], # no placeholders → exact-only
"https://api.example.com/customers/42",
)
assert recorded == []


# ---------------------------------------------------------------------------
# Phase 1 additions: Client.send / AsyncClient.send / Session.send coverage,
# re-entry guard, and self-egress exemption.
Expand Down
Loading