diff --git a/src/provably/handoff/eval_modes.py b/src/provably/handoff/eval_modes.py index 9acd3bd..f285605 100644 --- a/src/provably/handoff/eval_modes.py +++ b/src/provably/handoff/eval_modes.py @@ -33,7 +33,7 @@ def evaluate_claim(claim: HandoffClaim, indexed_root: Any) -> dict[str, Any]: try: at_path = _get_by_json_path(indexed_root, claim.json_path) except (KeyError, IndexError, TypeError, ValueError) as exc: - return {**base, "result": "CAUGHT", "detail": f"json_path: {exc}"} + return {**base, "result": "ERROR", "detail": f"json_path: {exc}"} base["indexed_at_path"] = canonical_json(at_path) @@ -43,7 +43,7 @@ def evaluate_claim(claim: HandoffClaim, indexed_root: Any) -> dict[str, Any]: return _eval_schema_type(claim, at_path, base) if mode == "range_threshold": return _eval_range_threshold(claim, at_path, base) - return {**base, "result": "CAUGHT", "detail": f"unknown verification_mode: {mode}"} + return {**base, "result": "ERROR", "detail": f"unknown verification_mode: {mode}"} def _base_verdict(claim: HandoffClaim, indexed_root: Any) -> dict[str, Any]: @@ -71,23 +71,23 @@ def _eval_field_extraction(claim: HandoffClaim, at_path: Any, base: dict[str, An def _eval_schema_type(claim: HandoffClaim, at_path: Any, base: dict[str, Any]) -> dict[str, Any]: schema = claim.expected_json_schema if not schema: - return {**base, "result": "CAUGHT", "detail": "expected_json_schema is required for schema_type"} + return {**base, "result": "ERROR", "detail": "expected_json_schema is required for schema_type"} try: jsonschema.validate(at_path, schema) except jsonschema.ValidationError as exc: return {**base, "result": "CAUGHT", "detail": exc.message} except jsonschema.SchemaError as exc: - return {**base, "result": "CAUGHT", "detail": f"invalid schema: {exc}"} + return {**base, "result": "ERROR", "detail": f"invalid schema: {exc}"} return {**base, "result": "PASS"} def _eval_range_threshold(claim: HandoffClaim, at_path: Any, base: dict[str, Any]) -> dict[str, Any]: if claim.range_min is None and claim.range_max is None: - return {**base, "result": "CAUGHT", "detail": "range_threshold requires range_min and/or range_max"} + return {**base, "result": "ERROR", "detail": "range_threshold requires range_min and/or range_max"} try: value = _coerce_number(at_path) except (TypeError, ValueError) as exc: - return {**base, "result": "CAUGHT", "detail": f"indexed value not numeric: {exc}"} + return {**base, "result": "ERROR", "detail": f"indexed value not numeric: {exc}"} if claim.range_min is not None and value < float(claim.range_min): return {**base, "result": "CAUGHT", "detail": f"value {value} below range_min {claim.range_min}"} if claim.range_max is not None and value > float(claim.range_max): diff --git a/tests/unit/test_eval_modes.py b/tests/unit/test_eval_modes.py index dd0ec3c..8eb02b8 100644 --- a/tests/unit/test_eval_modes.py +++ b/tests/unit/test_eval_modes.py @@ -95,7 +95,7 @@ def test_field_extraction_pass_with_array_index() -> None: assert v["result"] == "PASS", v -def test_field_extraction_caught_when_index_out_of_range() -> None: +def test_field_extraction_error_when_index_out_of_range() -> None: claim = HandoffClaim( action_name="list_open_tickets", claimed_value="open", @@ -104,7 +104,7 @@ def test_field_extraction_caught_when_index_out_of_range() -> None: json_path="[7].status", ) v = evaluate_claim(claim, [{"status": "open"}]) - assert v["result"] == "CAUGHT" + assert v["result"] == "ERROR" assert "out of range" in v["detail"] @@ -125,7 +125,7 @@ def test_schema_type_passes_with_dollar_path() -> None: assert v["indexed_at_path"] == "1" -def test_schema_type_missing_path_is_caught() -> None: +def test_schema_type_missing_path_is_error() -> None: claim = HandoffClaim( action_name="endpoint_0", claimed_value=None, @@ -135,4 +135,4 @@ def test_schema_type_missing_path_is_caught() -> None: expected_json_schema={"type": "boolean"}, ) v = evaluate_claim(claim, {"id": 1}) - assert v["result"] == "CAUGHT" + assert v["result"] == "ERROR"