Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ poetry run inferedgelab export-demo-evidence --output reports/studio_demo_eviden
It validates the committed Studio fixtures, expected README/PPT metrics, portfolio docs, and local Studio assets without starting workers, queues, databases, or a production SaaS service.
`core4-conformance-check` is the cross-repo contract guardrail.
It validates the bundled Forge manifest/metadata fixture, Runtime result JSON, Lab compare/deployment decision surface, and AIGuard `guard_analysis` evidence without mutating existing schemas.
The Lab decision surface now also exposes `policy_version`, `triggered_rules`, and `policy_summary` so reviewers can see which local policy rules produced deploy/review/block/unknown outcomes.

![InferEdge Local Studio demo evidence](assets/images/local-studio-demo-evidence.png)

Expand Down
8 changes: 8 additions & 0 deletions docs/pipeline_contract.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,14 @@ Optional AIGuard `guard_analysis.status` can influence the Lab deployment decisi

The final deployment decision is always owned by InferEdgeLab. AIGuard supplies optional diagnosis evidence; it does not overwrite Lab policy.

Deployment decisions include a backward-compatible policy trace:

- `policy_version`: currently `inferedge-lab-decision-policy-v1`.
- `triggered_rules`: compact rule IDs explaining which Lab/Guard/contract conditions affected the decision.
- `policy_summary`: reviewer-facing rule effects and descriptions.

The v1 policy keeps existing decision behavior stable while making review/block reasoning explicit. Examples include Guard error evidence mapping to `blocked`, Guard warning evidence mapping to `review_required`, shape mismatch mapping to review, and latency/accuracy trade-off risk mapping to review.

## SaaS Boundary

This contract matters for the SaaS expansion path because the UI and API should return stable evidence bundles even when the pipeline is split across workers or services.
Expand Down
6 changes: 6 additions & 0 deletions inferedgelab/commands/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,15 @@ def _render_deployment_decision(deployment_decision: dict | None) -> None:
return

rprint("[bold]Deployment Decision[/bold]")
rprint(f"- policy_version: {deployment_decision.get('policy_version')}")
rprint(f"- decision: {deployment_decision.get('decision')}")
rprint(f"- reason: {deployment_decision.get('reason')}")
rprint(f"- recommended_action: {deployment_decision.get('recommended_action')}")
triggered_rules = deployment_decision.get("triggered_rules") or []
if triggered_rules:
rprint("- triggered_rules:")
for rule in triggered_rules:
rprint(f" - {rule}")


def compare_cmd(
Expand Down
6 changes: 6 additions & 0 deletions inferedgelab/commands/compare_latest.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,15 @@ def _render_deployment_decision(deployment_decision: dict | None) -> None:
return

rprint("[bold]Deployment Decision[/bold]")
rprint(f"- policy_version: {deployment_decision.get('policy_version')}")
rprint(f"- decision: {deployment_decision.get('decision')}")
rprint(f"- reason: {deployment_decision.get('reason')}")
rprint(f"- recommended_action: {deployment_decision.get('recommended_action')}")
triggered_rules = deployment_decision.get("triggered_rules") or []
if triggered_rules:
rprint("- triggered_rules:")
for rule in triggered_rules:
rprint(f" - {rule}")


def _render_compare_bundle(bundle: dict, markdown_out: str, html_out: str) -> None:
Expand Down
11 changes: 11 additions & 0 deletions inferedgelab/report/html_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,14 +274,25 @@ def _deployment_decision_to_html(deployment_decision: Dict[str, Any] | None) ->
if deployment_decision is None:
return ""

triggered_rules = deployment_decision.get("triggered_rules") or []
triggered_rules_html = ""
if triggered_rules:
triggered_rules_html = (
"<p><strong>triggered_rules</strong>: "
+ ", ".join(f"<code>{escape(str(rule))}</code>" for rule in triggered_rules)
+ "</p>"
)

return f"""
<h2>Deployment Decision</h2>
<div class="meta">
<p><strong>policy_version</strong>: <code>{escape(str(deployment_decision.get("policy_version")))}</code></p>
<p><strong>decision</strong>: <code>{escape(str(deployment_decision.get("decision")))}</code></p>
<p><strong>reason</strong>: {escape(str(deployment_decision.get("reason")))}</p>
<p><strong>lab_overall</strong>: <code>{escape(str(deployment_decision.get("lab_overall")))}</code></p>
<p><strong>guard_status</strong>: <code>{escape(str(deployment_decision.get("guard_status")))}</code></p>
<p><strong>recommended_action</strong>: {escape(str(deployment_decision.get("recommended_action")))}</p>
{triggered_rules_html}
</div>
"""

Expand Down
6 changes: 6 additions & 0 deletions inferedgelab/report/markdown_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,17 @@ def _append_guard_analysis(lines: list[str], guard_analysis: Dict[str, Any]) ->
def _append_deployment_decision(lines: list[str], deployment_decision: Dict[str, Any]) -> None:
lines.append("## Deployment Decision")
lines.append("")
lines.append(f"- policy_version: {deployment_decision.get('policy_version')}")
lines.append(f"- decision: {deployment_decision.get('decision')}")
lines.append(f"- reason: {deployment_decision.get('reason')}")
lines.append(f"- lab_overall: {deployment_decision.get('lab_overall')}")
lines.append(f"- guard_status: {deployment_decision.get('guard_status')}")
lines.append(f"- recommended_action: {deployment_decision.get('recommended_action')}")
triggered_rules = deployment_decision.get("triggered_rules") or []
if triggered_rules:
lines.append("- triggered_rules:")
for rule in triggered_rules:
lines.append(f" - {rule}")
lines.append("")


Expand Down
24 changes: 24 additions & 0 deletions inferedgelab/services/core4_conformance.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path
from typing import Any

from inferedgelab.services.deployment_decision import POLICY_VERSION
from inferedgelab.services.demo_evidence_report import build_demo_evidence_summary

SCHEMA_VERSION = "inferedgelab-core4-conformance-v1"
Expand Down Expand Up @@ -266,6 +267,29 @@ def _check_lab_contract(summary: dict[str, Any], checks: list[dict[str, Any]]) -
expected="non-empty reason",
observed=decision.get("reason"),
),
_check_item(
name="lab:deployment_policy_version",
passed=decision.get("policy_version") == POLICY_VERSION,
category="lab",
expected=POLICY_VERSION,
observed=decision.get("policy_version"),
),
_check_item(
name="lab:deployment_triggered_rules",
passed=isinstance(decision.get("triggered_rules"), list)
and len(decision["triggered_rules"]) > 0,
category="lab",
expected="non-empty triggered_rules list",
observed=decision.get("triggered_rules"),
),
_check_item(
name="lab:deployment_policy_summary",
passed=isinstance(decision.get("policy_summary"), list)
and len(decision["policy_summary"]) > 0,
category="lab",
expected="non-empty policy_summary list",
observed=decision.get("policy_summary"),
),
]
)

Expand Down
5 changes: 5 additions & 0 deletions inferedgelab/services/demo_evidence_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,14 @@ def build_demo_evidence_markdown(summary: dict[str, Any] | None = None) -> str:
"",
"## Deployment Decision",
"",
f"- policy_version: `{decision.get('policy_version')}`",
f"- decision: `{decision.get('decision')}`",
f"- reason: {decision.get('reason')}",
f"- guard_status: `{decision.get('guard_status')}`",
f"- guard_verdict: `{decision.get('guard_verdict')}`",
f"- recommended_action: {decision.get('recommended_action')}",
"- triggered_rules:",
*[f" - `{rule}`" for rule in decision.get("triggered_rules", [])],
"",
"## YOLOv8 COCO Subset Evaluation",
"",
Expand Down Expand Up @@ -429,6 +432,8 @@ def build_demo_evidence_summary_text(summary: dict[str, Any] | None = None) -> s
f"- TensorRT FPS: {_fmt_number(runtime['tensorrt_jetson_fp16_25w']['fps'])}",
f"- ONNX Runtime FPS: {_fmt_number(runtime['onnxruntime_cpu']['fps'])}",
f"- deployment_decision: {decision.get('decision')}",
f"- policy_version: {decision.get('policy_version')}",
f"- triggered_rules: {', '.join(decision.get('triggered_rules', [])) or '-'}",
f"- evaluation map50: {_fmt_number(evaluation['map50'])}",
f"- in_memory_note: {summary['in_memory_note']}",
"",
Expand Down
90 changes: 90 additions & 0 deletions inferedgelab/services/deployment_decision.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,78 @@


REVIEW_TRADEOFF_RISKS = {"risky_tradeoff", "severe_tradeoff", "not_beneficial"}
POLICY_VERSION = "inferedge-lab-decision-policy-v1"
POLICY_RULES: dict[str, dict[str, str]] = {
"guard_error_block": {
"effect": "blocked",
"description": "AIGuard reported error-level diagnosis evidence.",
},
"guard_warning_review": {
"effect": "review_required",
"description": "AIGuard reported warning-level diagnosis evidence.",
},
"guard_skipped_unknown": {
"effect": "unknown",
"description": "AIGuard was skipped, so diagnosis evidence is incomplete.",
},
"guard_unavailable_unknown": {
"effect": "unknown",
"description": "AIGuard evidence is unavailable for this comparison.",
},
"guard_ok_lab_favorable_deployable": {
"effect": "deployable",
"description": "Lab comparison is favorable and AIGuard passed.",
},
"guard_ok_lab_neutral_deployable_note": {
"effect": "deployable_with_note",
"description": "Lab comparison is neutral and AIGuard passed.",
},
"guard_ok_lab_unfavorable_review": {
"effect": "review_required",
"description": "Lab comparison indicates regression or mismatch despite AIGuard passing.",
},
"guard_ok_lab_unknown": {
"effect": "unknown",
"description": "Lab comparison judgement is not recognized by the decision policy.",
},
"guard_status_unrecognized_unknown": {
"effect": "unknown",
"description": "AIGuard status is not recognized by the decision policy.",
},
"shape_mismatch_review": {
"effect": "review_required",
"description": "Input shape mismatch requires explicit deployment review.",
},
"system_mismatch_unfavorable_review": {
"effect": "review_required",
"description": "System mismatch combined with unfavorable Lab judgement requires review.",
},
"system_mismatch_note": {
"effect": "deployable_with_note",
"description": "System mismatch reduces confidence and must be noted in release evidence.",
},
"tradeoff_risk_review": {
"effect": "review_required",
"description": "Latency/accuracy trade-off risk requires deployment review.",
},
"worker_uncompared_unknown": {
"effect": "unknown",
"description": "Worker result has not been compared by Lab yet.",
},
}


def policy_summary_for_rules(triggered_rules: list[str]) -> list[dict[str, str]]:
return [
{
"rule": rule,
"effect": POLICY_RULES.get(rule, {}).get("effect", "unknown"),
"description": POLICY_RULES.get(rule, {}).get(
"description", "Rule is not documented in this policy version."
),
}
for rule in triggered_rules
]


def _decision_payload(
Expand All @@ -16,14 +88,18 @@ def _decision_payload(
guard_status: Any,
guard_verdict_value: Any,
recommended_action: str,
triggered_rules: list[str],
) -> dict[str, Any]:
return {
"policy_version": POLICY_VERSION,
"decision": decision,
"reason": reason,
"lab_overall": lab_overall,
"guard_status": guard_status,
"guard_verdict": guard_verdict_value,
"recommended_action": recommended_action,
"triggered_rules": triggered_rules,
"policy_summary": policy_summary_for_rules(triggered_rules),
}


Expand All @@ -43,61 +119,74 @@ def build_deployment_decision(judgement: dict, guard_analysis: dict | None = Non
guard_status=normalized_guard_status,
guard_verdict_value=normalized_guard_verdict,
recommended_action="Do not deploy until the Guard anomalies are resolved.",
triggered_rules=["guard_error_block"],
)

if normalized_guard_status == "warning":
decision = "review_required"
reason = "Guard analysis reported warning-level validation risks."
recommended_action = "Review Guard anomalies, suspected causes, and accuracy/provenance evidence before deployment."
triggered_rules = ["guard_warning_review"]
elif normalized_guard_status == "skipped":
decision = "unknown"
reason = "Guard analysis was skipped."
recommended_action = "Install InferEdgeAIGuard or run validation reasoning before deployment."
triggered_rules = ["guard_skipped_unknown"]
elif normalized_guard_status is None:
decision = "unknown"
reason = "Guard analysis is unavailable."
recommended_action = "Run compare with --with-guard before deployment decision."
triggered_rules = ["guard_unavailable_unknown"]
elif normalized_guard_status == "ok":
if lab_overall in {"improvement", "tradeoff_faster"}:
decision = "deployable"
reason = "Lab judgement is favorable and Guard analysis passed."
recommended_action = "Deployment can proceed with normal rollout monitoring."
triggered_rules = ["guard_ok_lab_favorable_deployable"]
elif lab_overall in {"neutral", "tradeoff_neutral"}:
decision = "deployable_with_note"
reason = "Lab judgement is neutral and Guard analysis passed."
recommended_action = "Deployment can proceed, but keep the comparison note in release evidence."
triggered_rules = ["guard_ok_lab_neutral_deployable_note"]
elif lab_overall in {"regression", "tradeoff_slower", "mismatch"}:
decision = "review_required"
reason = "Lab judgement indicates regression or mismatch despite Guard passing."
recommended_action = "Review Lab comparison evidence before deployment."
triggered_rules = ["guard_ok_lab_unfavorable_review"]
else:
decision = "unknown"
reason = "Lab judgement is not recognized for deployment decision."
recommended_action = "Review the compare judgement before deployment."
triggered_rules = ["guard_ok_lab_unknown"]
else:
decision = "unknown"
reason = "Guard analysis status is not recognized."
recommended_action = "Review Guard output before deployment."
triggered_rules = ["guard_status_unrecognized_unknown"]

if decision != "blocked" and shape_match is False:
decision = "review_required"
reason = "Input shape mismatch requires deployment review."
recommended_action = "Resolve or explicitly approve the shape mismatch before deployment."
triggered_rules.append("shape_mismatch_review")

if decision != "blocked" and system_match is False:
if lab_overall in {"regression", "tradeoff_slower", "mismatch"}:
decision = "review_required"
reason = "System mismatch and unfavorable Lab judgement require deployment review."
recommended_action = "Review system provenance and Lab regression evidence before deployment."
triggered_rules.append("system_mismatch_unfavorable_review")
elif decision == "deployable":
decision = "deployable_with_note"
reason = "System mismatch reduces deployment confidence."
recommended_action = "Deployment can proceed only with the system mismatch noted in release evidence."
triggered_rules.append("system_mismatch_note")

if decision != "blocked" and tradeoff_risk in REVIEW_TRADEOFF_RISKS:
decision = "review_required"
reason = "Trade-off risk requires deployment review."
recommended_action = "Review accuracy trade-off and provenance evidence before deployment."
triggered_rules.append("tradeoff_risk_review")

return _decision_payload(
decision=decision,
Expand All @@ -106,4 +195,5 @@ def build_deployment_decision(judgement: dict, guard_analysis: dict | None = Non
guard_status=normalized_guard_status,
guard_verdict_value=normalized_guard_verdict,
recommended_action=recommended_action,
triggered_rules=triggered_rules,
)
6 changes: 6 additions & 0 deletions inferedgelab/services/worker_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from inferedgelab.services.api_job_contract import ApiJobContractError
from inferedgelab.services.api_job_contract import build_api_job_response
from inferedgelab.services.api_job_contract import validate_api_job_response
from inferedgelab.services.deployment_decision import POLICY_VERSION
from inferedgelab.services.deployment_decision import policy_summary_for_rules
from inferedgelab.services.guard_analysis import guard_status, guard_verdict


Expand Down Expand Up @@ -199,13 +201,17 @@ def _build_completed_job_result(worker_response: dict[str, Any]) -> dict[str, An
runtime_result = worker_response["runtime_result"]
guard_analysis = worker_response.get("guard_analysis")
normalized_guard_status = guard_status(guard_analysis)
triggered_rules = ["worker_uncompared_unknown"]
deployment_decision = {
"policy_version": POLICY_VERSION,
"decision": "unknown",
"reason": "Worker response has not been compared by Lab yet.",
"lab_overall": None,
"guard_status": normalized_guard_status,
"guard_verdict": guard_verdict(guard_analysis),
"recommended_action": "Run Lab compare/report before deployment decision.",
"triggered_rules": triggered_rules,
"policy_summary": policy_summary_for_rules(triggered_rules),
}
result = {
"summary": {
Expand Down
7 changes: 7 additions & 0 deletions tests/test_api_worker_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ def test_analyze_worker_completed_workflow_smoke():
mapped_completed = apply_worker_response_to_job(queued_job, worker_response)
assert mapped_completed["status"] == "completed"
assert mapped_completed["result"]["deployment_decision"]["decision"] == "unknown"
assert (
mapped_completed["result"]["deployment_decision"]["policy_version"]
== "inferedge-lab-decision-policy-v1"
)
assert mapped_completed["result"]["deployment_decision"]["triggered_rules"] == [
"worker_uncompared_unknown"
]
assert mapped_completed["result"]["guard_analysis"] == worker_response["guard_analysis"]

stored_completed = complete_dev_endpoint(
Expand Down
2 changes: 2 additions & 0 deletions tests/test_core4_conformance.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def test_core4_conformance_report_passes_for_committed_fixtures():
assert report["layers"]["handoff"]["status"] == "pass"
assert any(check["name"] == "handoff:source_model_sha256" for check in report["checks"])
assert any(check["name"] == "lab:compare_speedup" for check in report["checks"])
assert any(check["name"] == "lab:deployment_policy_version" for check in report["checks"])
assert any(check["name"] == "lab:deployment_triggered_rules" for check in report["checks"])
assert any(check["name"] == "aiguard:verdict_coverage" for check in report["checks"])


Expand Down
Loading
Loading