diff --git a/.github/workflows/verification-gate.yml b/.github/workflows/verification-gate.yml new file mode 100644 index 0000000..df6590a --- /dev/null +++ b/.github/workflows/verification-gate.yml @@ -0,0 +1,92 @@ +name: LS-N verification gate + +# Verifies that every approved loss-scenario in +# `safety/stpa/loss-scenarios.yaml` has a passing regression test by +# naming convention (`LS-A-11` -> `ls_a_11_*`). Posts a single sticky PR +# comment summarising passed / failed / missing counts. Fails the job +# only when an existing test fails; missing tests are reported as a +# warning (advisory) so older approved scenarios with ad-hoc test names +# can be migrated incrementally rather than blocking every PR. +# +# Adapted from spar's rivet-driven verification gate +# (pulseengine/spar@ba329f3d). meld has no rivet-style executable +# artifact, but `status: approved` LS entries pair with regression tests +# by the established `ls___*` naming convention; this gate +# makes that pairing a verifiable contract. +# +# Inputs are all integer/metadata fields (PR number, head_ref); no +# untrusted free-form text from PR titles/bodies/comments is read in +# `run:` blocks, so the standard injection vectors do not apply. + +on: + pull_request: + branches: [main] + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: write + +jobs: + verify: + name: LS-N verification gate + runs-on: [self-hosted, linux, x64, rust-cpu] + timeout-minutes: 30 + env: + CARGO_TERM_COLOR: always + CARGO_INCREMENTAL: 0 + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + + - name: Install PyYAML + # Self-hosted runners ship Debian/Ubuntu Python with PEP 668 + # protection; `--break-system-packages` is the documented opt-out + # for CI environments where the runner's Python install is + # disposable per workflow run. + run: pip install --user --break-system-packages pyyaml + + - name: Run LS-N verification + id: verify + continue-on-error: true + run: | + python3 tools/run_ls_verification.py \ + --results-json verification-results.json + + - name: Upload results artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: verification-results + path: verification-results.json + if-no-files-found: warn + + - name: Post sticky PR comment + if: github.event_name == 'pull_request' && always() + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + run: | + python3 tools/post_verification_comment.py "$PR_NUMBER" + + - name: Fail job if any approved LS-N test failed + # Exit code 1 from run_ls_verification.py = a regression test + # for an approved LS entry failed. Exit 2 = missing tests only; + # treated as advisory. Exit 0 = all approved entries verified. + if: steps.verify.outcome == 'failure' + run: | + # Re-check: outcome == failure can mean exit 1 (real fail) or + # exit 2 (missing only). Inspect the JSON to decide. + failed=$(python3 -c "import json; print(json.load(open('verification-results.json'))['failed_count'])") + if [ "$failed" -gt 0 ]; then + echo "::error::$failed approved LS-N entries have failing regression tests; see PR comment" + exit 1 + fi + echo "::warning::Some approved LS-N entries are missing regression tests (advisory only)" diff --git a/.gitignore b/.gitignore index e0fb83e..eb230bb 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,9 @@ credentials.json # Test output test-output/ +# Local LS-N verification gate output +verification-results.json + # Claude local files .claude/ diff --git a/AGENTS.md b/AGENTS.md index b970f51..292246b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -703,6 +703,40 @@ Block the release if any `confirmed` finding lacks an `approved LS-N` in `safety/stpa/loss-scenarios.yaml` with a shipped fix or an explicit risk-acceptance note. +### LS-N verification gate + +CI workflow `.github/workflows/verification-gate.yml` enforces the +test-naming contract on every PR: each `status: approved` entry in +`safety/stpa/loss-scenarios.yaml` must have at least one `#[test] fn +ls___*` in `meld-core` (e.g. `LS-A-11` → `ls_a_11_*`). + +The gate runs `tools/run_ls_verification.py`, which iterates approved +LS IDs and invokes `cargo test --lib --no-fail-fast ` per +entry, then posts a single sticky PR comment with passed / failed / +missing counts via `tools/post_verification_comment.py`. + +Same script runs locally: + +```bash +python3 tools/run_ls_verification.py --results-json /tmp/ls.json +``` + +Buckets and gate behaviour: + +- **Passed** — ≥1 matching test, all green. Approved entry is verified. +- **Failed** — ≥1 matching test failed. **Hard-fails the gate** (block merge). +- **Missing** — zero tests match the `ls___*` prefix. + Advisory only; surfaces as a warning so older approved scenarios + with ad-hoc test names (e.g. PR #114's + `test_canonical_abi_size_fixed_size_list_saturates_on_overflow` for + LS-P-4) can be migrated incrementally rather than blocking every PR. + +Adapted from spar's rivet-driven verification gate +(pulseengine/spar@ba329f3d), with meld's STPA loss-scenario artifacts +substituted for rivet's executable artifacts. Same sticky-comment +pattern (marker ``, upsert via +GitHub REST API). + ### Release Process #### Pre-Release Checklist (MANDATORY) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa75804..a56ea54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,27 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added + +- **LS-N verification gate** + (`.github/workflows/verification-gate.yml`, + `tools/run_ls_verification.py`, `tools/post_verification_comment.py`). + PR-time gate that enforces the test-naming contract: each + `status: approved` entry in `safety/stpa/loss-scenarios.yaml` must + have at least one `#[test] fn ls___*` in `meld-core` + (e.g. `LS-A-11` → `ls_a_11_*`). Runs the matching tests via cargo, + buckets results as passed / failed / missing, and upserts a single + sticky PR comment (marker ``). + Failed tests hard-fail the gate; missing tests are advisory so the + 10 older approved entries with ad-hoc test names (e.g. PR #114's + `test_canonical_abi_size_fixed_size_list_saturates_on_overflow` + for LS-P-4) can migrate incrementally rather than blocking every + PR. Adapted from spar's rivet-driven verification gate + (pulseengine/spar@ba329f3d); meld substitutes its STPA loss- + scenario artifacts for rivet's executable artifacts, resolving + test linkage via naming convention. The same script runs locally + via `python3 tools/run_ls_verification.py`. + ## [0.8.1] — 2026-05-16 ### Fixed diff --git a/meld-core/src/adapter/fact.rs b/meld-core/src/adapter/fact.rs index 30dbd25..7e028e8 100644 --- a/meld-core/src/adapter/fact.rs +++ b/meld-core/src/adapter/fact.rs @@ -5630,4 +5630,14 @@ mod tests { would see stale bytes", ); } + + /// LS-N verification gate convention alias. Pins LS-A-10 + /// (async-lift retptr writeback skips CABI alignment padding) + /// via the discoverable `ls_a_10_*` name. Same body as the + /// pre-existing `cabi_alignment_stackful_retptr_writes_i64_at_offset_8` + /// regression test. + #[test] + fn ls_a_10_cabi_align_retptr_writeback() { + cabi_alignment_stackful_retptr_writes_i64_at_offset_8(); + } } diff --git a/meld-core/src/parser.rs b/meld-core/src/parser.rs index e475faf..7ab4aa1 100644 --- a/meld-core/src/parser.rs +++ b/meld-core/src/parser.rs @@ -3461,6 +3461,13 @@ mod tests { } } + /// LS-N verification gate convention alias for the truncated- + /// module-section regression above. Same body, canonical name. + #[test] + fn ls_p_5_parser_rejects_truncated_module_section() { + test_parser_rejects_truncated_module_section_issue_118(); + } + #[test] fn test_convert_canonical_options_default() { // Empty options list should produce defaults @@ -4309,6 +4316,16 @@ mod tests { assert_eq!(flat_bytes, u32::MAX, "flat_byte_size must saturate"); } + /// LS-N verification gate convention alias for the saturation + /// regression test above. Delegates to the original test body so + /// the canonical `ls_p_4_*` name is discoverable via + /// `tools/run_ls_verification.py` without renaming the historical + /// test that pins issue #112 / v0.4 follow-up coverage. + #[test] + fn ls_p_4_canonical_abi_size_saturates_on_overflow() { + test_canonical_abi_size_fixed_size_list_saturates_on_overflow(); + } + /// align_up must not panic when given a saturated u32::MAX size and /// a non-trivial alignment — the previous `(size + align - 1)` form /// would overflow. diff --git a/meld-core/src/resolver.rs b/meld-core/src/resolver.rs index 51271c1..ce724c1 100644 --- a/meld-core/src/resolver.rs +++ b/meld-core/src/resolver.rs @@ -4832,6 +4832,17 @@ mod tests { ); } + /// LS-N verification gate convention alias for the + /// adapter-sites canonical-sort regression above. Pins the + /// LS-CP-3 (HashMap iteration leaks into adapter_sites order) + /// fix via the discoverable `ls_cp_3_*` name. The + /// `caller_encoding_fallback` half of LS-CP-3 still needs a + /// dedicated regression test — tracked as a follow-up. + #[test] + fn ls_cp_3_sort_adapter_sites_is_canonical() { + test_issue112_item4_sort_adapter_sites_is_canonical(); + } + /// Item 5 unit-level PoC: when two `ModuleResolution`s share the /// same `import_name` but have different `from_import_module`s, the /// promoted adapter sites must preserve the `from_import_module` in @@ -4976,6 +4987,14 @@ mod tests { (LS-R-10 / UCA-R-3 regression)" ); } + + /// LS-N verification gate convention alias. Pins LS-R-10 + /// (intra-component adapter promotion drops from_import_module + /// disambiguator) via the discoverable `ls_r_10_*` name. + #[test] + fn ls_r_10_intra_adapter_preserves_from_import_module() { + test_issue112_item5_intra_adapter_preserves_from_import_module(); + } } // ---------------------------------------------------------------------- diff --git a/tools/post_verification_comment.py b/tools/post_verification_comment.py new file mode 100755 index 0000000..6319c9f --- /dev/null +++ b/tools/post_verification_comment.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +"""Post (or update) a sticky PR comment summarising LS-N verification results. + +Reads the JSON written by `tools/run_ls_verification.py` and calls the +GitHub REST API directly to upsert a single marker-tagged comment on the +PR. Re-running on the same PR replaces the prior body rather than +appending another comment. Pure stdlib (urllib) — no `gh` CLI dependency. + +Usage: + tools/post_verification_comment.py [--results-json PATH] [--repo OWNER/NAME] + +Required env: + GH_TOKEN (or GITHUB_TOKEN) with `pull-requests: write`. +""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +import urllib.error +import urllib.request +from pathlib import Path + +MARKER = "" +API = "https://api.github.com" + + +def github_request( + method: str, path: str, token: str, body: dict | None = None +) -> tuple[int, bytes]: + url = f"{API}{path}" + data = json.dumps(body).encode("utf-8") if body is not None else None + req = urllib.request.Request( + url, + data=data, + method=method, + headers={ + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {token}", + "X-GitHub-Api-Version": "2022-11-28", + "User-Agent": "meld-ls-verification-gate", + "Content-Type": "application/json" if data else "application/octet-stream", + }, + ) + try: + with urllib.request.urlopen(req) as resp: + return resp.status, resp.read() + except urllib.error.HTTPError as e: + return e.code, e.read() + + +def render_body(results: dict) -> str: + passed = results["passed_count"] + failed = results["failed_count"] + missing = results["missing_count"] + total = results["total"] + failed_ids = results["failed"] + missing_ids = results["missing"] + + if failed == 0 and missing == 0: + status = f"✅ **{passed}/{total}** approved LS entries verified" + elif failed == 0: + status = ( + f"⚠️ **{passed}/{total}** verified — **{missing}** missing regression tests" + ) + else: + status = ( + f"❌ **{passed}/{total}** verified — **{failed}** failed, " + f"**{missing}** missing regression tests" + ) + + failed_section = ( + "\n".join(f"- `{i}`" for i in failed_ids) if failed_ids else "_(none)_" + ) + missing_section = ( + "\n".join(f"- `{i}`" for i in missing_ids) if missing_ids else "_(none)_" + ) + + return f"""{MARKER} +## LS-N verification gate + +{status} + +| | count | +|---|---:| +| Passed (≥1 test, all green) | {passed} | +| Failed (≥1 test failure) | {failed} | +| Missing (no `ls_*_NN_*` test found) | {missing} | + +Approved `loss-scenarios.yaml` entries are expected to have a +regression test named `ls___*` (e.g. `LS-A-11` → +`ls_a_11_*`). The gate runs each prefix via `cargo test --lib +--no-fail-fast` and aggregates pass/fail/missing. + +
Failed LS entries + +{failed_section} + +
+ +
Missing regression tests + +{missing_section} + +
+ +Updated automatically by `tools/post_verification_comment.py`. +Source of truth: `safety/stpa/loss-scenarios.yaml`.""" + + +def find_marker_comment(repo: str, pr: int, token: str) -> int | None: + """Page through PR comments looking for the marker. Returns comment id or None.""" + page = 1 + while True: + status, body = github_request( + "GET", + f"/repos/{repo}/issues/{pr}/comments?per_page=100&page={page}", + token, + ) + if status != 200: + print(f"GET comments failed: {status} {body[:200]}", file=sys.stderr) + return None + comments = json.loads(body) + if not comments: + return None + for c in comments: + if MARKER in (c.get("body") or ""): + return c["id"] + if len(comments) < 100: + return None + page += 1 + + +def upsert_comment(repo: str, pr: int, body: str, token: str) -> None: + existing = find_marker_comment(repo, pr, token) + if existing is not None: + print(f"updating comment {existing}", file=sys.stderr) + status, resp = github_request( + "PATCH", + f"/repos/{repo}/issues/comments/{existing}", + token, + {"body": body}, + ) + else: + print("creating new comment", file=sys.stderr) + status, resp = github_request( + "POST", + f"/repos/{repo}/issues/{pr}/comments", + token, + {"body": body}, + ) + if status not in (200, 201): + print(f"comment upsert failed: {status} {resp[:300]}", file=sys.stderr) + sys.exit(2) + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("pr", type=int, help="pull-request number") + parser.add_argument( + "--results-json", + default="verification-results.json", + type=Path, + help="path to the JSON summary (default: %(default)s)", + ) + parser.add_argument( + "--repo", + default=os.environ.get("GH_REPO", "pulseengine/meld"), + help="OWNER/NAME (default: %(default)s)", + ) + args = parser.parse_args() + + token = os.environ.get("GH_TOKEN") or os.environ.get("GITHUB_TOKEN") + if not token: + print("GH_TOKEN or GITHUB_TOKEN required", file=sys.stderr) + return 2 + + if not args.results_json.is_file(): + print(f"no {args.results_json} found; nothing to post", file=sys.stderr) + return 0 + + results = json.loads(args.results_json.read_text()) + body = render_body(results) + upsert_comment(args.repo, args.pr, body, token) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/run_ls_verification.py b/tools/run_ls_verification.py new file mode 100755 index 0000000..88c30c7 --- /dev/null +++ b/tools/run_ls_verification.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 +"""Verify every approved LS-N entry in `safety/stpa/loss-scenarios.yaml` +has a passing regression test. + +Each approved loss scenario must have at least one `#[test] fn +ls___*` in `meld-core` (e.g. `LS-A-11` → `ls_a_11_*`). +This script: + +1. Reads `safety/stpa/loss-scenarios.yaml` and filters entries where + `status: approved`. +2. For each ID, derives the test-name prefix (`LS-A-11` → `ls_a_11_`) + and runs `cargo test --lib --no-fail-fast `. +3. Buckets the result as `passed` (≥1 test, all green), `failed` (any + test failed), or `missing` (no test matched the prefix). +4. Writes a structured JSON summary alongside human-readable stdout. + +Usage: + tools/run_ls_verification.py [--results-json PATH] + [--scenarios-yaml PATH] + [--package NAME] + +Defaults: + --results-json verification-results.json + --scenarios-yaml safety/stpa/loss-scenarios.yaml + --package meld-core + +Exit code: + 0 if every approved LS-N has a passing regression test + 1 if any LS-N has a failing test + 2 if any LS-N is missing a regression test (advisory until tests + are added; treated as warning, not gate failure) +""" + +from __future__ import annotations + +import argparse +import json +import re +import subprocess +import sys +from dataclasses import dataclass, field, asdict +from pathlib import Path + +try: + import yaml +except ImportError: + print("PyYAML required (pip install pyyaml)", file=sys.stderr) + sys.exit(2) + + +@dataclass +class Result: + total: int = 0 + passed_count: int = 0 + failed_count: int = 0 + missing_count: int = 0 + passed: list[str] = field(default_factory=list) + failed: list[str] = field(default_factory=list) + missing: list[str] = field(default_factory=list) + + +def id_to_test_prefix(ls_id: str) -> str: + """`LS-A-11` -> `ls_a_11_`. Tail underscore matches Cargo's prefix filter.""" + return ls_id.lower().replace("-", "_") + "_" + + +def load_approved_ids(yaml_path: Path) -> list[str]: + data = yaml.safe_load(yaml_path.read_text()) + scenarios = data.get("loss-scenarios", []) + return [s["id"] for s in scenarios if s.get("status") == "approved"] + + +_TEST_LINE = re.compile(r"^test result: (?Pok|FAILED)\. " + r"(?P\d+) passed; (?P\d+) failed; " + r"(?P\d+) ignored;") + + +def run_prefix(package: str, prefix: str) -> tuple[int, int, int]: + """Return (matched, passed, failed) for `cargo test --lib `. + + `matched` = total selected tests (passed + failed + ignored). When + cargo finds no matching tests, it reports `0 passed; 0 failed; 0 + ignored` and still exits 0; the caller treats matched=0 as missing. + """ + cmd = [ + "cargo", + "test", + "-p", + package, + "--lib", + "--no-fail-fast", + "--", + "--exact" if False else "--test-threads=1", + prefix, + ] + proc = subprocess.run(cmd, capture_output=True, text=True) + passed = failed = ignored = 0 + for line in proc.stdout.splitlines(): + m = _TEST_LINE.match(line) + if m: + passed += int(m["passed"]) + failed += int(m["failed"]) + ignored += int(m["ignored"]) + return passed + failed + ignored, passed, failed + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--results-json", + default="verification-results.json", + type=Path, + help="path for the JSON summary (default: %(default)s)", + ) + parser.add_argument( + "--scenarios-yaml", + default="safety/stpa/loss-scenarios.yaml", + type=Path, + help="path to loss-scenarios.yaml (default: %(default)s)", + ) + parser.add_argument( + "--package", + default="meld-core", + help="cargo package to run tests against (default: %(default)s)", + ) + args = parser.parse_args() + + result = Result() + + print("== meld LS-N verification gate ==") + print(f"scenarios: {args.scenarios_yaml}") + print(f"package: {args.package}") + print() + + if not args.scenarios_yaml.is_file(): + print(f"{args.scenarios_yaml} not found", file=sys.stderr) + return 2 + + ids = load_approved_ids(args.scenarios_yaml) + if not ids: + print("No approved loss scenarios found.", file=sys.stderr) + args.results_json.write_text(json.dumps(asdict(result), indent=2)) + return 0 + + print(f"approved LS entries: {len(ids)}") + print() + result.total = len(ids) + + for ls_id in ids: + prefix = id_to_test_prefix(ls_id) + matched, passed, failed = run_prefix(args.package, prefix) + if matched == 0: + print(f"[MISS] {ls_id} (no `{prefix}*` test found)") + result.missing.append(ls_id) + elif failed > 0: + print(f"[FAIL] {ls_id} ({passed} pass, {failed} fail)") + result.failed.append(ls_id) + else: + print(f"[ OK ] {ls_id} ({passed} pass)") + result.passed.append(ls_id) + + result.passed_count = len(result.passed) + result.failed_count = len(result.failed) + result.missing_count = len(result.missing) + + args.results_json.write_text(json.dumps(asdict(result), indent=2)) + + print() + print("== summary ==") + print(f"passed: {result.passed_count}") + print(f"failed: {result.failed_count}") + print(f"missing: {result.missing_count}") + if result.failed: + print("failed IDs:") + for fid in result.failed: + print(f" - {fid}") + if result.missing: + print("missing IDs:") + for mid in result.missing: + print(f" - {mid}") + + if result.failed_count > 0: + return 1 + if result.missing_count > 0: + return 2 + return 0 + + +if __name__ == "__main__": + sys.exit(main())