From 3fc3f6b1c600d9435e85340a1265a295df6c0386 Mon Sep 17 00:00:00 2001 From: hyeokjun32 Date: Thu, 14 May 2026 14:22:59 +0900 Subject: [PATCH] feat: add lab decision policy versioning --- README.md | 1 + docs/pipeline_contract.md | 8 ++ inferedgelab/commands/compare.py | 6 ++ inferedgelab/commands/compare_latest.py | 6 ++ inferedgelab/report/html_generator.py | 11 +++ inferedgelab/report/markdown_generator.py | 6 ++ inferedgelab/services/core4_conformance.py | 24 +++++ inferedgelab/services/demo_evidence_report.py | 5 ++ inferedgelab/services/deployment_decision.py | 90 +++++++++++++++++++ inferedgelab/services/worker_contract.py | 6 ++ tests/test_api_worker_workflow.py | 7 ++ tests/test_core4_conformance.py | 2 + tests/test_demo_evidence_report.py | 7 ++ tests/test_deployment_decision.py | 20 +++++ tests/test_report_generators.py | 8 ++ tests/test_worker_contract.py | 7 ++ 16 files changed, 214 insertions(+) diff --git a/README.md b/README.md index 7d0ac48..53567a7 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ poetry run inferedgelab export-demo-evidence --output reports/studio_demo_eviden It validates the committed Studio fixtures, expected README/PPT metrics, portfolio docs, and local Studio assets without starting workers, queues, databases, or a production SaaS service. `core4-conformance-check` is the cross-repo contract guardrail. It validates the bundled Forge manifest/metadata fixture, Runtime result JSON, Lab compare/deployment decision surface, and AIGuard `guard_analysis` evidence without mutating existing schemas. +The Lab decision surface now also exposes `policy_version`, `triggered_rules`, and `policy_summary` so reviewers can see which local policy rules produced deploy/review/block/unknown outcomes. ![InferEdge Local Studio demo evidence](assets/images/local-studio-demo-evidence.png) diff --git a/docs/pipeline_contract.md b/docs/pipeline_contract.md index 943fc46..f781d2c 100644 --- a/docs/pipeline_contract.md +++ b/docs/pipeline_contract.md @@ -122,6 +122,14 @@ Optional AIGuard `guard_analysis.status` can influence the Lab deployment decisi The final deployment decision is always owned by InferEdgeLab. AIGuard supplies optional diagnosis evidence; it does not overwrite Lab policy. +Deployment decisions include a backward-compatible policy trace: + +- `policy_version`: currently `inferedge-lab-decision-policy-v1`. +- `triggered_rules`: compact rule IDs explaining which Lab/Guard/contract conditions affected the decision. +- `policy_summary`: reviewer-facing rule effects and descriptions. + +The v1 policy keeps existing decision behavior stable while making review/block reasoning explicit. Examples include Guard error evidence mapping to `blocked`, Guard warning evidence mapping to `review_required`, shape mismatch mapping to review, and latency/accuracy trade-off risk mapping to review. + ## SaaS Boundary This contract matters for the SaaS expansion path because the UI and API should return stable evidence bundles even when the pipeline is split across workers or services. diff --git a/inferedgelab/commands/compare.py b/inferedgelab/commands/compare.py index 1dfbd99..22cebe2 100644 --- a/inferedgelab/commands/compare.py +++ b/inferedgelab/commands/compare.py @@ -75,9 +75,15 @@ def _render_deployment_decision(deployment_decision: dict | None) -> None: return rprint("[bold]Deployment Decision[/bold]") + rprint(f"- policy_version: {deployment_decision.get('policy_version')}") rprint(f"- decision: {deployment_decision.get('decision')}") rprint(f"- reason: {deployment_decision.get('reason')}") rprint(f"- recommended_action: {deployment_decision.get('recommended_action')}") + triggered_rules = deployment_decision.get("triggered_rules") or [] + if triggered_rules: + rprint("- triggered_rules:") + for rule in triggered_rules: + rprint(f" - {rule}") def compare_cmd( diff --git a/inferedgelab/commands/compare_latest.py b/inferedgelab/commands/compare_latest.py index e3c58fa..717cbe6 100644 --- a/inferedgelab/commands/compare_latest.py +++ b/inferedgelab/commands/compare_latest.py @@ -66,9 +66,15 @@ def _render_deployment_decision(deployment_decision: dict | None) -> None: return rprint("[bold]Deployment Decision[/bold]") + rprint(f"- policy_version: {deployment_decision.get('policy_version')}") rprint(f"- decision: {deployment_decision.get('decision')}") rprint(f"- reason: {deployment_decision.get('reason')}") rprint(f"- recommended_action: {deployment_decision.get('recommended_action')}") + triggered_rules = deployment_decision.get("triggered_rules") or [] + if triggered_rules: + rprint("- triggered_rules:") + for rule in triggered_rules: + rprint(f" - {rule}") def _render_compare_bundle(bundle: dict, markdown_out: str, html_out: str) -> None: diff --git a/inferedgelab/report/html_generator.py b/inferedgelab/report/html_generator.py index 10a0d16..402ff5a 100644 --- a/inferedgelab/report/html_generator.py +++ b/inferedgelab/report/html_generator.py @@ -274,14 +274,25 @@ def _deployment_decision_to_html(deployment_decision: Dict[str, Any] | None) -> if deployment_decision is None: return "" + triggered_rules = deployment_decision.get("triggered_rules") or [] + triggered_rules_html = "" + if triggered_rules: + triggered_rules_html = ( + "

triggered_rules: " + + ", ".join(f"{escape(str(rule))}" for rule in triggered_rules) + + "

" + ) + return f"""

Deployment Decision

+

policy_version: {escape(str(deployment_decision.get("policy_version")))}

decision: {escape(str(deployment_decision.get("decision")))}

reason: {escape(str(deployment_decision.get("reason")))}

lab_overall: {escape(str(deployment_decision.get("lab_overall")))}

guard_status: {escape(str(deployment_decision.get("guard_status")))}

recommended_action: {escape(str(deployment_decision.get("recommended_action")))}

+ {triggered_rules_html}
""" diff --git a/inferedgelab/report/markdown_generator.py b/inferedgelab/report/markdown_generator.py index 309c03c..691700e 100644 --- a/inferedgelab/report/markdown_generator.py +++ b/inferedgelab/report/markdown_generator.py @@ -111,11 +111,17 @@ def _append_guard_analysis(lines: list[str], guard_analysis: Dict[str, Any]) -> def _append_deployment_decision(lines: list[str], deployment_decision: Dict[str, Any]) -> None: lines.append("## Deployment Decision") lines.append("") + lines.append(f"- policy_version: {deployment_decision.get('policy_version')}") lines.append(f"- decision: {deployment_decision.get('decision')}") lines.append(f"- reason: {deployment_decision.get('reason')}") lines.append(f"- lab_overall: {deployment_decision.get('lab_overall')}") lines.append(f"- guard_status: {deployment_decision.get('guard_status')}") lines.append(f"- recommended_action: {deployment_decision.get('recommended_action')}") + triggered_rules = deployment_decision.get("triggered_rules") or [] + if triggered_rules: + lines.append("- triggered_rules:") + for rule in triggered_rules: + lines.append(f" - {rule}") lines.append("") diff --git a/inferedgelab/services/core4_conformance.py b/inferedgelab/services/core4_conformance.py index 08afb8e..29ce811 100644 --- a/inferedgelab/services/core4_conformance.py +++ b/inferedgelab/services/core4_conformance.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Any +from inferedgelab.services.deployment_decision import POLICY_VERSION from inferedgelab.services.demo_evidence_report import build_demo_evidence_summary SCHEMA_VERSION = "inferedgelab-core4-conformance-v1" @@ -266,6 +267,29 @@ def _check_lab_contract(summary: dict[str, Any], checks: list[dict[str, Any]]) - expected="non-empty reason", observed=decision.get("reason"), ), + _check_item( + name="lab:deployment_policy_version", + passed=decision.get("policy_version") == POLICY_VERSION, + category="lab", + expected=POLICY_VERSION, + observed=decision.get("policy_version"), + ), + _check_item( + name="lab:deployment_triggered_rules", + passed=isinstance(decision.get("triggered_rules"), list) + and len(decision["triggered_rules"]) > 0, + category="lab", + expected="non-empty triggered_rules list", + observed=decision.get("triggered_rules"), + ), + _check_item( + name="lab:deployment_policy_summary", + passed=isinstance(decision.get("policy_summary"), list) + and len(decision["policy_summary"]) > 0, + category="lab", + expected="non-empty policy_summary list", + observed=decision.get("policy_summary"), + ), ] ) diff --git a/inferedgelab/services/demo_evidence_report.py b/inferedgelab/services/demo_evidence_report.py index bcd2b47..6dbe919 100644 --- a/inferedgelab/services/demo_evidence_report.py +++ b/inferedgelab/services/demo_evidence_report.py @@ -167,11 +167,14 @@ def build_demo_evidence_markdown(summary: dict[str, Any] | None = None) -> str: "", "## Deployment Decision", "", + f"- policy_version: `{decision.get('policy_version')}`", f"- decision: `{decision.get('decision')}`", f"- reason: {decision.get('reason')}", f"- guard_status: `{decision.get('guard_status')}`", f"- guard_verdict: `{decision.get('guard_verdict')}`", f"- recommended_action: {decision.get('recommended_action')}", + "- triggered_rules:", + *[f" - `{rule}`" for rule in decision.get("triggered_rules", [])], "", "## YOLOv8 COCO Subset Evaluation", "", @@ -429,6 +432,8 @@ def build_demo_evidence_summary_text(summary: dict[str, Any] | None = None) -> s f"- TensorRT FPS: {_fmt_number(runtime['tensorrt_jetson_fp16_25w']['fps'])}", f"- ONNX Runtime FPS: {_fmt_number(runtime['onnxruntime_cpu']['fps'])}", f"- deployment_decision: {decision.get('decision')}", + f"- policy_version: {decision.get('policy_version')}", + f"- triggered_rules: {', '.join(decision.get('triggered_rules', [])) or '-'}", f"- evaluation map50: {_fmt_number(evaluation['map50'])}", f"- in_memory_note: {summary['in_memory_note']}", "", diff --git a/inferedgelab/services/deployment_decision.py b/inferedgelab/services/deployment_decision.py index 35511a4..62f57ed 100644 --- a/inferedgelab/services/deployment_decision.py +++ b/inferedgelab/services/deployment_decision.py @@ -6,6 +6,78 @@ REVIEW_TRADEOFF_RISKS = {"risky_tradeoff", "severe_tradeoff", "not_beneficial"} +POLICY_VERSION = "inferedge-lab-decision-policy-v1" +POLICY_RULES: dict[str, dict[str, str]] = { + "guard_error_block": { + "effect": "blocked", + "description": "AIGuard reported error-level diagnosis evidence.", + }, + "guard_warning_review": { + "effect": "review_required", + "description": "AIGuard reported warning-level diagnosis evidence.", + }, + "guard_skipped_unknown": { + "effect": "unknown", + "description": "AIGuard was skipped, so diagnosis evidence is incomplete.", + }, + "guard_unavailable_unknown": { + "effect": "unknown", + "description": "AIGuard evidence is unavailable for this comparison.", + }, + "guard_ok_lab_favorable_deployable": { + "effect": "deployable", + "description": "Lab comparison is favorable and AIGuard passed.", + }, + "guard_ok_lab_neutral_deployable_note": { + "effect": "deployable_with_note", + "description": "Lab comparison is neutral and AIGuard passed.", + }, + "guard_ok_lab_unfavorable_review": { + "effect": "review_required", + "description": "Lab comparison indicates regression or mismatch despite AIGuard passing.", + }, + "guard_ok_lab_unknown": { + "effect": "unknown", + "description": "Lab comparison judgement is not recognized by the decision policy.", + }, + "guard_status_unrecognized_unknown": { + "effect": "unknown", + "description": "AIGuard status is not recognized by the decision policy.", + }, + "shape_mismatch_review": { + "effect": "review_required", + "description": "Input shape mismatch requires explicit deployment review.", + }, + "system_mismatch_unfavorable_review": { + "effect": "review_required", + "description": "System mismatch combined with unfavorable Lab judgement requires review.", + }, + "system_mismatch_note": { + "effect": "deployable_with_note", + "description": "System mismatch reduces confidence and must be noted in release evidence.", + }, + "tradeoff_risk_review": { + "effect": "review_required", + "description": "Latency/accuracy trade-off risk requires deployment review.", + }, + "worker_uncompared_unknown": { + "effect": "unknown", + "description": "Worker result has not been compared by Lab yet.", + }, +} + + +def policy_summary_for_rules(triggered_rules: list[str]) -> list[dict[str, str]]: + return [ + { + "rule": rule, + "effect": POLICY_RULES.get(rule, {}).get("effect", "unknown"), + "description": POLICY_RULES.get(rule, {}).get( + "description", "Rule is not documented in this policy version." + ), + } + for rule in triggered_rules + ] def _decision_payload( @@ -16,14 +88,18 @@ def _decision_payload( guard_status: Any, guard_verdict_value: Any, recommended_action: str, + triggered_rules: list[str], ) -> dict[str, Any]: return { + "policy_version": POLICY_VERSION, "decision": decision, "reason": reason, "lab_overall": lab_overall, "guard_status": guard_status, "guard_verdict": guard_verdict_value, "recommended_action": recommended_action, + "triggered_rules": triggered_rules, + "policy_summary": policy_summary_for_rules(triggered_rules), } @@ -43,61 +119,74 @@ def build_deployment_decision(judgement: dict, guard_analysis: dict | None = Non guard_status=normalized_guard_status, guard_verdict_value=normalized_guard_verdict, recommended_action="Do not deploy until the Guard anomalies are resolved.", + triggered_rules=["guard_error_block"], ) if normalized_guard_status == "warning": decision = "review_required" reason = "Guard analysis reported warning-level validation risks." recommended_action = "Review Guard anomalies, suspected causes, and accuracy/provenance evidence before deployment." + triggered_rules = ["guard_warning_review"] elif normalized_guard_status == "skipped": decision = "unknown" reason = "Guard analysis was skipped." recommended_action = "Install InferEdgeAIGuard or run validation reasoning before deployment." + triggered_rules = ["guard_skipped_unknown"] elif normalized_guard_status is None: decision = "unknown" reason = "Guard analysis is unavailable." recommended_action = "Run compare with --with-guard before deployment decision." + triggered_rules = ["guard_unavailable_unknown"] elif normalized_guard_status == "ok": if lab_overall in {"improvement", "tradeoff_faster"}: decision = "deployable" reason = "Lab judgement is favorable and Guard analysis passed." recommended_action = "Deployment can proceed with normal rollout monitoring." + triggered_rules = ["guard_ok_lab_favorable_deployable"] elif lab_overall in {"neutral", "tradeoff_neutral"}: decision = "deployable_with_note" reason = "Lab judgement is neutral and Guard analysis passed." recommended_action = "Deployment can proceed, but keep the comparison note in release evidence." + triggered_rules = ["guard_ok_lab_neutral_deployable_note"] elif lab_overall in {"regression", "tradeoff_slower", "mismatch"}: decision = "review_required" reason = "Lab judgement indicates regression or mismatch despite Guard passing." recommended_action = "Review Lab comparison evidence before deployment." + triggered_rules = ["guard_ok_lab_unfavorable_review"] else: decision = "unknown" reason = "Lab judgement is not recognized for deployment decision." recommended_action = "Review the compare judgement before deployment." + triggered_rules = ["guard_ok_lab_unknown"] else: decision = "unknown" reason = "Guard analysis status is not recognized." recommended_action = "Review Guard output before deployment." + triggered_rules = ["guard_status_unrecognized_unknown"] if decision != "blocked" and shape_match is False: decision = "review_required" reason = "Input shape mismatch requires deployment review." recommended_action = "Resolve or explicitly approve the shape mismatch before deployment." + triggered_rules.append("shape_mismatch_review") if decision != "blocked" and system_match is False: if lab_overall in {"regression", "tradeoff_slower", "mismatch"}: decision = "review_required" reason = "System mismatch and unfavorable Lab judgement require deployment review." recommended_action = "Review system provenance and Lab regression evidence before deployment." + triggered_rules.append("system_mismatch_unfavorable_review") elif decision == "deployable": decision = "deployable_with_note" reason = "System mismatch reduces deployment confidence." recommended_action = "Deployment can proceed only with the system mismatch noted in release evidence." + triggered_rules.append("system_mismatch_note") if decision != "blocked" and tradeoff_risk in REVIEW_TRADEOFF_RISKS: decision = "review_required" reason = "Trade-off risk requires deployment review." recommended_action = "Review accuracy trade-off and provenance evidence before deployment." + triggered_rules.append("tradeoff_risk_review") return _decision_payload( decision=decision, @@ -106,4 +195,5 @@ def build_deployment_decision(judgement: dict, guard_analysis: dict | None = Non guard_status=normalized_guard_status, guard_verdict_value=normalized_guard_verdict, recommended_action=recommended_action, + triggered_rules=triggered_rules, ) diff --git a/inferedgelab/services/worker_contract.py b/inferedgelab/services/worker_contract.py index bcafcae..c69fe55 100644 --- a/inferedgelab/services/worker_contract.py +++ b/inferedgelab/services/worker_contract.py @@ -5,6 +5,8 @@ from inferedgelab.services.api_job_contract import ApiJobContractError from inferedgelab.services.api_job_contract import build_api_job_response from inferedgelab.services.api_job_contract import validate_api_job_response +from inferedgelab.services.deployment_decision import POLICY_VERSION +from inferedgelab.services.deployment_decision import policy_summary_for_rules from inferedgelab.services.guard_analysis import guard_status, guard_verdict @@ -199,13 +201,17 @@ def _build_completed_job_result(worker_response: dict[str, Any]) -> dict[str, An runtime_result = worker_response["runtime_result"] guard_analysis = worker_response.get("guard_analysis") normalized_guard_status = guard_status(guard_analysis) + triggered_rules = ["worker_uncompared_unknown"] deployment_decision = { + "policy_version": POLICY_VERSION, "decision": "unknown", "reason": "Worker response has not been compared by Lab yet.", "lab_overall": None, "guard_status": normalized_guard_status, "guard_verdict": guard_verdict(guard_analysis), "recommended_action": "Run Lab compare/report before deployment decision.", + "triggered_rules": triggered_rules, + "policy_summary": policy_summary_for_rules(triggered_rules), } result = { "summary": { diff --git a/tests/test_api_worker_workflow.py b/tests/test_api_worker_workflow.py index d095db9..4dbf600 100644 --- a/tests/test_api_worker_workflow.py +++ b/tests/test_api_worker_workflow.py @@ -61,6 +61,13 @@ def test_analyze_worker_completed_workflow_smoke(): mapped_completed = apply_worker_response_to_job(queued_job, worker_response) assert mapped_completed["status"] == "completed" assert mapped_completed["result"]["deployment_decision"]["decision"] == "unknown" + assert ( + mapped_completed["result"]["deployment_decision"]["policy_version"] + == "inferedge-lab-decision-policy-v1" + ) + assert mapped_completed["result"]["deployment_decision"]["triggered_rules"] == [ + "worker_uncompared_unknown" + ] assert mapped_completed["result"]["guard_analysis"] == worker_response["guard_analysis"] stored_completed = complete_dev_endpoint( diff --git a/tests/test_core4_conformance.py b/tests/test_core4_conformance.py index 209b09c..bd14f7b 100644 --- a/tests/test_core4_conformance.py +++ b/tests/test_core4_conformance.py @@ -23,6 +23,8 @@ def test_core4_conformance_report_passes_for_committed_fixtures(): assert report["layers"]["handoff"]["status"] == "pass" assert any(check["name"] == "handoff:source_model_sha256" for check in report["checks"]) assert any(check["name"] == "lab:compare_speedup" for check in report["checks"]) + assert any(check["name"] == "lab:deployment_policy_version" for check in report["checks"]) + assert any(check["name"] == "lab:deployment_triggered_rules" for check in report["checks"]) assert any(check["name"] == "aiguard:verdict_coverage" for check in report["checks"]) diff --git a/tests/test_demo_evidence_report.py b/tests/test_demo_evidence_report.py index 591aab9..d4f2c31 100644 --- a/tests/test_demo_evidence_report.py +++ b/tests/test_demo_evidence_report.py @@ -34,6 +34,11 @@ def test_demo_evidence_summary_reproduces_portfolio_metrics(): assert comparison["speedup"] == pytest.approx(4.512994, rel=1e-5) assert comparison["lab_overall"] == "tradeoff_faster" assert summary["deployment_decision"]["decision"] == "review_required" + assert ( + summary["deployment_decision"]["policy_version"] + == "inferedge-lab-decision-policy-v1" + ) + assert "guard_warning_review" in summary["deployment_decision"]["triggered_rules"] assert evaluation["metric_backend"] == "simplified" assert evaluation["ground_truth_boxes"] == 89 assert evaluation["map50"] == pytest.approx(0.1409784036) @@ -60,6 +65,8 @@ def test_demo_evidence_markdown_contains_report_sections(): assert "ONNX Runtime CPU baseline" in markdown assert "4.513x faster" in markdown assert "YOLOv8 COCO Subset Evaluation" in markdown + assert "inferedge-lab-decision-policy-v1" in markdown + assert "guard_warning_review" in markdown assert "latency_regression" in markdown assert "AIGuard Portfolio Cases" in markdown assert "Jetson Power Mode Evidence" in markdown diff --git a/tests/test_deployment_decision.py b/tests/test_deployment_decision.py index 8551512..6fb405f 100644 --- a/tests/test_deployment_decision.py +++ b/tests/test_deployment_decision.py @@ -1,5 +1,6 @@ from __future__ import annotations +from inferedgelab.services.deployment_decision import POLICY_VERSION from inferedgelab.services.deployment_decision import build_deployment_decision @@ -18,12 +19,21 @@ def make_judgement( } +def assert_policy(decision: dict, *rules: str) -> None: + assert decision["policy_version"] == POLICY_VERSION + for rule in rules: + assert rule in decision["triggered_rules"] + assert decision["policy_summary"] + assert decision["policy_summary"][0]["rule"] == decision["triggered_rules"][0] + + def test_guard_error_blocks_deployment(): decision = build_deployment_decision(make_judgement(), {"status": "error"}) assert decision["decision"] == "blocked" assert decision["reason"] == "Guard analysis reported an error-level validation issue." assert decision["recommended_action"] == "Do not deploy until the Guard anomalies are resolved." + assert_policy(decision, "guard_error_block") def test_guard_warning_requires_review(): @@ -31,6 +41,7 @@ def test_guard_warning_requires_review(): assert decision["decision"] == "review_required" assert decision["reason"] == "Guard analysis reported warning-level validation risks." + assert_policy(decision, "guard_warning_review") def test_guard_skipped_is_unknown(): @@ -38,6 +49,7 @@ def test_guard_skipped_is_unknown(): assert decision["decision"] == "unknown" assert decision["reason"] == "Guard analysis was skipped." + assert_policy(decision, "guard_skipped_unknown") def test_guard_ok_with_improvement_is_deployable(): @@ -47,18 +59,21 @@ def test_guard_ok_with_improvement_is_deployable(): assert decision["lab_overall"] == "improvement" assert decision["guard_status"] == "ok" assert decision["guard_verdict"] == "pass" + assert_policy(decision, "guard_ok_lab_favorable_deployable") def test_guard_ok_with_neutral_is_deployable_with_note(): decision = build_deployment_decision(make_judgement(overall="neutral"), {"status": "ok"}) assert decision["decision"] == "deployable_with_note" + assert_policy(decision, "guard_ok_lab_neutral_deployable_note") def test_guard_ok_with_regression_requires_review(): decision = build_deployment_decision(make_judgement(overall="regression"), {"status": "ok"}) assert decision["decision"] == "review_required" + assert_policy(decision, "guard_ok_lab_unfavorable_review") def test_shape_mismatch_requires_review_but_guard_error_stays_blocked(): @@ -67,6 +82,8 @@ def test_shape_mismatch_requires_review_but_guard_error_stays_blocked(): assert review_decision["decision"] == "review_required" assert blocked_decision["decision"] == "blocked" + assert_policy(review_decision, "shape_mismatch_review") + assert_policy(blocked_decision, "guard_error_block") def test_risky_tradeoff_requires_review(): @@ -76,6 +93,7 @@ def test_risky_tradeoff_requires_review(): ) assert decision["decision"] == "review_required" + assert_policy(decision, "tradeoff_risk_review") def test_diagnosis_guard_verdict_blocked_blocks_deployment(): @@ -93,6 +111,7 @@ def test_diagnosis_guard_verdict_blocked_blocks_deployment(): assert decision["decision"] == "blocked" assert decision["guard_status"] == "error" assert decision["guard_verdict"] == "blocked" + assert_policy(decision, "guard_error_block") def test_diagnosis_guard_verdict_review_requires_lab_review(): @@ -110,3 +129,4 @@ def test_diagnosis_guard_verdict_review_requires_lab_review(): assert decision["decision"] == "review_required" assert decision["guard_status"] == "warning" assert decision["guard_verdict"] == "review_required" + assert_policy(decision, "guard_warning_review") diff --git a/tests/test_report_generators.py b/tests/test_report_generators.py index f9219c9..3b2aa0b 100644 --- a/tests/test_report_generators.py +++ b/tests/test_report_generators.py @@ -289,18 +289,22 @@ def test_generate_compare_markdown_includes_deployment_decision_section(): compare_result = make_compare_result() judgement = make_judgement() deployment_decision = { + "policy_version": "inferedge-lab-decision-policy-v1", "decision": "deployable", "reason": "Lab judgement is favorable and Guard analysis passed.", "lab_overall": "improvement", "guard_status": "ok", "recommended_action": "Deployment can proceed with normal rollout monitoring.", + "triggered_rules": ["guard_ok_lab_favorable_deployable"], } text = generate_compare_markdown(compare_result, judgement, deployment_decision=deployment_decision) assert "## Deployment Decision" in text + assert "- policy_version: inferedge-lab-decision-policy-v1" in text assert "- decision: deployable" in text assert "- guard_status: ok" in text + assert "guard_ok_lab_favorable_deployable" in text def test_generate_compare_markdown_includes_diagnosis_guard_evidence(): @@ -432,18 +436,22 @@ def test_generate_compare_html_includes_deployment_decision_section(): compare_result = make_compare_result() judgement = make_judgement() deployment_decision = { + "policy_version": "inferedge-lab-decision-policy-v1", "decision": "deployable", "reason": "Lab judgement is favorable and Guard analysis passed.", "lab_overall": "improvement", "guard_status": "ok", "recommended_action": "Deployment can proceed with normal rollout monitoring.", + "triggered_rules": ["guard_ok_lab_favorable_deployable"], } html = generate_compare_html(compare_result, judgement, deployment_decision=deployment_decision) assert "Deployment Decision" in html + assert "inferedge-lab-decision-policy-v1" in html assert "deployable" in html assert "Deployment can proceed with normal rollout monitoring." in html + assert "guard_ok_lab_favorable_deployable" in html def test_generate_compare_html_includes_diagnosis_guard_evidence(): diff --git a/tests/test_worker_contract.py b/tests/test_worker_contract.py index 23c99b2..252ccf4 100644 --- a/tests/test_worker_contract.py +++ b/tests/test_worker_contract.py @@ -193,6 +193,13 @@ def test_completed_worker_response_maps_to_completed_job(): assert completed["updated_at"] == worker_response["completed_at"] assert completed["error"] is None assert completed["result"]["deployment_decision"]["decision"] == "unknown" + assert ( + completed["result"]["deployment_decision"]["policy_version"] + == "inferedge-lab-decision-policy-v1" + ) + assert completed["result"]["deployment_decision"]["triggered_rules"] == [ + "worker_uncompared_unknown" + ] assert completed["result"]["comparison"]["result"]["runtime_result"] == worker_response["runtime_result"] assert completed["result"]["provenance"]["forge_metadata"] == worker_response["forge_metadata"] assert completed["next_actions"] == ["review_deployment_decision"]