diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c9b7e8..fdaa8e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ ## Unreleased - `trusted_endpoints`: registered URLs may now contain FastAPI/Express-style path placeholders. `{id}` matches exactly one path segment, `{rest:path}` matches any subtree. Plain URLs without `{` keep exact-match semantics — no migration needed for existing rows. Both `is_trusted_endpoint` and the snapshot tamper-check inside `evaluate_handoff` honor the new syntax. Closes #14. +- `set_intercept_url_allowlist`: now accepts the same `{id}` / `{rest:path}` placeholders as `trusted_endpoints` (parity, single matching helper used by both code paths). A registered `https://api.example.com/customers/{id}` covers the concrete `https://api.example.com/customers/42` for both intercept recording and the simulation tamper hook. Plain URLs without `{` keep exact-match semantics — zero migration. Closes #20. +- `json_path` (used by `field_extraction` / `schema_type` / `range_threshold`): now supports array indexing. Use bracket form (`items[0].subject`, `[0].status`) or numeric-segment fallback (`items.0.subject`). Out-of-range indices raise `IndexError` and surface as `CAUGHT` with `"out of range"` in the detail. Pure-dict paths are unchanged. Closes #23. - README: new "Getting `PROVABLY_API_KEY` and `PROVABLY_ORG_ID`" subsection walking through sign-up at app.provably.ai → create org → Integrations menu, plus a pointer to provably.ai/docs. - **BREAKING:** removed `default_cluster_b_url()` and the `CLUSTER_B_URL` env var — leftovers from the langgraph-demo monorepo extraction with a `localhost:8082` default and opaque "cluster B" naming the SDK has no business assuming. `post_handoff(receiver_url, payload)` (positional arg renamed from `cluster_b_url`) takes the URL directly — supply it from your application's own configuration. diff --git a/src/provably/handoff/eval_modes.py b/src/provably/handoff/eval_modes.py index 892e532..9acd3bd 100644 --- a/src/provably/handoff/eval_modes.py +++ b/src/provably/handoff/eval_modes.py @@ -3,6 +3,7 @@ from __future__ import annotations import math +import re from typing import Any import jsonschema @@ -31,7 +32,7 @@ def evaluate_claim(claim: HandoffClaim, indexed_root: Any) -> dict[str, Any]: try: at_path = _get_by_json_path(indexed_root, claim.json_path) - except (KeyError, TypeError, ValueError) as exc: + except (KeyError, IndexError, TypeError, ValueError) as exc: return {**base, "result": "CAUGHT", "detail": f"json_path: {exc}"} base["indexed_at_path"] = canonical_json(at_path) @@ -96,22 +97,62 @@ def _eval_range_threshold(claim: HandoffClaim, at_path: Any, base: dict[str, Any return {**base, "result": "PASS"} -def _normalize_json_path(path: str) -> str: - """Strip JSONPath / Relaxed JSON Pointer prefixes so we only walk plain dot paths. +_BRACKET_INDEX_RE = re.compile(r"\[(\d+)\]") + - Examples: ``"$.userId"`` → ``"userId"``; ``"$"`` / ``""`` → ``""`` (root); ``"a.b"`` unchanged. +def _normalize_json_path(path: str) -> str: + """Strip JSONPath / Relaxed JSON Pointer prefixes and split bracket indices into their + own segments so the dot-tokenizer can walk them. + + Examples: + - ``"$.userId"`` → ``"userId"`` + - ``"$"`` / ``""`` → ``""`` (root) + - ``"a.b"`` → ``"a.b"`` (unchanged) + - ``"items[0].subject"`` → ``"items.[0].subject"`` (bracket lifted to its own segment) + - ``"[0].status"`` → ``"[0].status"`` (leading-dot stripped) """ p = (path or "").strip() if not p or p == "$": return "" if p.startswith("$."): - return p[2:].strip() - if p.startswith("$"): + p = p[2:].strip() + elif p.startswith("$"): # e.g. "$['x']" not supported; bare "$x" is treated as path after $ - return p[1:].lstrip(".").strip() + p = p[1:].lstrip(".").strip() + # Lift bracket indices into standalone dot segments so ``items[0]`` becomes + # ``items.[0]`` and ``items[0][1]`` becomes ``items.[0].[1]``. The empty-segment + # filter in ``_get_by_json_path`` swallows any double-dots this introduces. + p = _BRACKET_INDEX_RE.sub(r".[\1]", p).lstrip(".") return p +def _step_into(cursor: Any, segment: str) -> Any: + """Walk one segment. + + - ``[N]`` (bracket form) against a list → ``cursor[N]``. + - Numeric segment against a list → ``cursor[N]`` (fallback for ``items.0.foo``). + - Any other segment against a dict → ``cursor[segment]``. + """ + bracket = _BRACKET_INDEX_RE.fullmatch(segment) + if bracket and isinstance(cursor, list): + idx = int(bracket.group(1)) + if idx >= len(cursor): + raise IndexError(f"index {idx} out of range (list has {len(cursor)} elements)") + return cursor[idx] + if isinstance(cursor, list) and segment.isdigit(): + idx = int(segment) + if idx >= len(cursor): + raise IndexError(f"index {idx} out of range (list has {len(cursor)} elements)") + return cursor[idx] + if isinstance(cursor, dict): + if segment not in cursor: + raise KeyError(segment) + return cursor[segment] + raise KeyError( + f"expected dict or list at segment {segment!r}, got {type(cursor).__name__}" + ) + + def _get_by_json_path(obj: Any, path: str) -> Any: rel = _normalize_json_path(path) if not rel: @@ -121,13 +162,7 @@ def _get_by_json_path(obj: Any, path: str) -> Any: segment = segment.strip() if not segment: continue - if not isinstance(cursor, dict): - raise KeyError( - f"expected dict at segment {segment!r}, got {type(cursor).__name__}" - ) - if segment not in cursor: - raise KeyError(segment) - cursor = cursor[segment] + cursor = _step_into(cursor, segment) return cursor diff --git a/src/provably/intercept/interceptor.py b/src/provably/intercept/interceptor.py index b3cf4d4..7ec4e0d 100644 --- a/src/provably/intercept/interceptor.py +++ b/src/provably/intercept/interceptor.py @@ -29,7 +29,7 @@ insert_intercept_row, request_payload_dict, ) -from provably.trusted_endpoints import normalize_url_for_trust +from provably.trusted_endpoints import _matches_registered, normalize_url_for_trust _ctx_agent_id: ContextVar[str] = ContextVar("provably_agent_id", default="") _ctx_action_name: ContextVar[str] = ContextVar("provably_action_name", default="") @@ -67,6 +67,12 @@ def set_intercept_url_allowlist(urls: list[str] | None) -> None: Pass ``None`` to clear. Pass a list to restrict tamper to the run's dashboard endpoints; URLs not in the set are not recorded and pass through unchanged. + + Entries support the same FastAPI/Express-style placeholders as the trusted-endpoint + registry: ``{name}`` matches one path segment, ``{name:path}`` matches any subtree. + A registered ``https://api.example.com/customers/{id}`` matches the concrete URL + ``https://api.example.com/customers/42``. Plain URLs without ``{`` keep exact-match + semantics. """ global _url_allowlist if urls is None: @@ -78,6 +84,22 @@ def set_intercept_url_allowlist(urls: list[str] | None) -> None: _url_allowlist.discard("") +def _url_in_allowlist(nurl: str) -> bool: + """Membership test for ``_url_allowlist`` that honors pattern entries. + + Exact match is checked first (O(1)). Only on a miss do we iterate over pattern entries + (those containing ``{``) — plain-URL allowlists pay no per-request iteration cost. + Caller must have already confirmed ``_url_allowlist is not None``. + """ + assert _url_allowlist is not None + if nurl in _url_allowlist: + return True + for entry in _url_allowlist: + if "{" in entry and _matches_registered(nurl, entry): + return True + return False + + @contextmanager def intercept_context( *, agent_id: str, action_name: str, intercept_index: int = 0 @@ -262,14 +284,14 @@ def _record_and_maybe_tamper( """ req = request_payload_dict(url, method, req_kwargs) nurl = normalize_url_for_trust(str(url)) - if _url_allowlist is not None and nurl not in _url_allowlist: + in_allowlist = _url_allowlist is not None and _url_in_allowlist(nurl) + if _url_allowlist is not None and not in_allowlist: return response if _enabled: _insert_row(url, req, raw, method=method) # Tamper hook fires only for explicit run endpoints; never for OpenRouter, Provably API, # cluster handoff posts (those run with allowlist cleared or off-list). - tamper = _url_allowlist is not None and nurl in _url_allowlist - mutated = _maybe_transform_body(raw) if tamper else raw + mutated = _maybe_transform_body(raw) if in_allowlist else raw if mutated is raw: return response if isinstance(response, requests.Response): diff --git a/tests/unit/test_eval_modes.py b/tests/unit/test_eval_modes.py index c772913..dd0ec3c 100644 --- a/tests/unit/test_eval_modes.py +++ b/tests/unit/test_eval_modes.py @@ -1,5 +1,7 @@ from __future__ import annotations +import pytest + from provably.handoff.eval_modes import _get_by_json_path, _normalize_json_path, evaluate_claim from provably.handoff.types import HandoffClaim @@ -21,6 +23,91 @@ def test_get_by_path_jsonpath() -> None: assert _get_by_json_path(obj, "$") == obj +# --------------------------------------------------------------------------- +# Array indexing in json_path (#23): both bracket form and numeric fallback. +# --------------------------------------------------------------------------- + + +def test_normalize_jsonpath_lifts_brackets_into_segments() -> None: + assert _normalize_json_path("items[0].subject") == "items.[0].subject" + assert _normalize_json_path("items[0][1]") == "items.[0].[1]" + assert _normalize_json_path("[0].status") == "[0].status" + assert _normalize_json_path("$.items[2].quantity") == "items.[2].quantity" + + +def test_get_by_path_bracket_indexing_at_root() -> None: + assert _get_by_json_path([{"status": "open"}], "[0].status") == "open" + assert _get_by_json_path([10, 20, 30], "[1]") == 20 + + +def test_get_by_path_bracket_indexing_inside_dict() -> None: + obj = {"items": [{"a": 1}, {"a": 2}, {"a": 3}]} + assert _get_by_json_path(obj, "items[0].a") == 1 + assert _get_by_json_path(obj, "items[2].a") == 3 + assert _get_by_json_path(obj, "$.items[1].a") == 2 + + +def test_get_by_path_numeric_segment_fallback_for_lists() -> None: + """``items.0.a`` works when cursor is a list — easier shape for naive LLMs.""" + obj = {"items": [{"a": 1}, {"a": 2}]} + assert _get_by_json_path(obj, "items.0.a") == 1 + assert _get_by_json_path(obj, "items.1.a") == 2 + + +def test_get_by_path_nested_lists() -> None: + """Bracket form chains: list of lists of dicts.""" + obj = {"matrix": [[{"v": "a"}, {"v": "b"}], [{"v": "c"}]]} + assert _get_by_json_path(obj, "matrix[0][1].v") == "b" + assert _get_by_json_path(obj, "matrix[1][0].v") == "c" + + +def test_get_by_path_index_out_of_range_raises_indexerror() -> None: + with pytest.raises(IndexError, match="out of range"): + _get_by_json_path([{"a": 1}], "[5].a") + with pytest.raises(IndexError, match="out of range"): + _get_by_json_path({"items": [1, 2]}, "items[5]") + + +def test_get_by_path_dict_segment_against_list_still_raises_keyerror() -> None: + """If the path expects a dict-like step but cursor is a list (and the segment isn't + numeric or bracket form), we still raise — old error class preserved.""" + with pytest.raises(KeyError): + _get_by_json_path({"a": [1, 2]}, "a.b") + + +def test_get_by_path_existing_dict_paths_still_work() -> None: + """Regression: nothing about pure-dict walks should change.""" + assert _get_by_json_path({"plan": "Enterprise"}, "plan") == "Enterprise" + assert _get_by_json_path({"a": {"b": 1}}, "a.b") == 1 + + +def test_field_extraction_pass_with_array_index() -> None: + """End-to-end via the real evaluator: list-shaped indexed value, claim against ``[0].status``.""" + claim = HandoffClaim( + action_name="list_open_tickets", + claimed_value="open", + query_record_id="q1", + verification_mode="field_extraction", + json_path="[0].status", + ) + indexed = [{"status": "open", "id": 42}, {"status": "closed", "id": 41}] + v = evaluate_claim(claim, indexed) + assert v["result"] == "PASS", v + + +def test_field_extraction_caught_when_index_out_of_range() -> None: + claim = HandoffClaim( + action_name="list_open_tickets", + claimed_value="open", + query_record_id="q1", + verification_mode="field_extraction", + json_path="[7].status", + ) + v = evaluate_claim(claim, [{"status": "open"}]) + assert v["result"] == "CAUGHT" + assert "out of range" in v["detail"] + + def test_schema_type_passes_with_dollar_path() -> None: """Regression: LLM emits JSONPath ``$.userId``; we must not index ``['$']``.""" claim = HandoffClaim( diff --git a/tests/unit/test_interceptor.py b/tests/unit/test_interceptor.py index 9ab09f5..518e1e7 100644 --- a/tests/unit/test_interceptor.py +++ b/tests/unit/test_interceptor.py @@ -86,6 +86,105 @@ def fake_insert(url: str, *_a: Any, **_k: Any) -> None: interceptor.set_intercept_url_allowlist(None) +# --------------------------------------------------------------------------- +# Allowlist pattern matching ({id} / {path:path} parity with trusted_endpoints) +# --------------------------------------------------------------------------- + + +def _attach_with_pattern_allowlist( + monkeypatch: Any, allowlist_entries: list[str], call_url: str +) -> tuple[list[str], list[Any]]: + """Helper: install a pattern allowlist, run _attach against ``call_url``, return + (recorded URLs, tamper-hook calls).""" + recorded: list[str] = [] + tampered: list[Any] = [] + + monkeypatch.setattr( + interceptor, "_insert_row", lambda url, *_a, **_k: recorded.append(url) + ) + monkeypatch.setattr( + interceptor, "_maybe_transform_body", lambda raw: tampered.append(raw) or raw + ) + monkeypatch.setattr(interceptor, "_enabled", True) + try: + interceptor.set_intercept_url_allowlist(allowlist_entries) + resp = requests.Response() + resp.status_code = 200 + resp._content = b'{"ok": true}' + resp.encoding = "utf-8" + interceptor._attach(resp, call_url, "GET", {}) + finally: + interceptor.set_intercept_url_allowlist(None) + return recorded, tampered + + +def test_allowlist_pattern_matches_concrete_url(monkeypatch: Any) -> None: + """Registered ``/customers/{id}`` matches the concrete ``/customers/42`` URL — both + recorded and tamper-hooked.""" + recorded, tampered = _attach_with_pattern_allowlist( + monkeypatch, + ["https://api.example.com/customers/{id}"], + "https://api.example.com/customers/42", + ) + assert recorded == ["https://api.example.com/customers/42"] + assert len(tampered) == 1 + + +def test_allowlist_pattern_rejects_extra_segment(monkeypatch: Any) -> None: + """Registered ``/customers/{id}`` does NOT match ``/customers/42/orders`` + (single-segment placeholder).""" + recorded, tampered = _attach_with_pattern_allowlist( + monkeypatch, + ["https://api.example.com/customers/{id}"], + "https://api.example.com/customers/42/orders", + ) + assert recorded == [] + assert tampered == [] + + +def test_allowlist_path_placeholder_matches_subtree(monkeypatch: Any) -> None: + """``{rest:path}`` covers any subtree, including nested segments.""" + recorded, _ = _attach_with_pattern_allowlist( + monkeypatch, + ["https://api.example.com/customers/{rest:path}"], + "https://api.example.com/customers/42/orders/9", + ) + assert recorded == ["https://api.example.com/customers/42/orders/9"] + + +def test_allowlist_mixed_exact_and_pattern(monkeypatch: Any) -> None: + """An allowlist with both exact URLs and patterns: each entry retains its semantics.""" + # Exact entry hits exactly; pattern entry hits its pattern; unrelated URL is rejected. + for url, expected_recorded in [ + ("https://api.example.com/health", True), # exact match + ("https://api.example.com/customers/9", True), # pattern match + ("https://api.example.com/customers/9/orders", False), # past pattern + ("https://api.example.com/other", False), # unrelated + ]: + recorded, _ = _attach_with_pattern_allowlist( + monkeypatch, + [ + "https://api.example.com/health", + "https://api.example.com/customers/{id}", + ], + url, + ) + assert (recorded == [url]) is expected_recorded, ( + f"url={url!r}: expected_recorded={expected_recorded}, got recorded={recorded}" + ) + + +def test_allowlist_plain_url_still_uses_exact_match(monkeypatch: Any) -> None: + """An allowlist entry without ``{`` keeps exact-match semantics — no perf regression for + the common case and no accidental prefix match.""" + recorded, _ = _attach_with_pattern_allowlist( + monkeypatch, + ["https://api.example.com/customers"], # no placeholders → exact-only + "https://api.example.com/customers/42", + ) + assert recorded == [] + + # --------------------------------------------------------------------------- # Phase 1 additions: Client.send / AsyncClient.send / Session.send coverage, # re-entry guard, and self-egress exemption.