diff --git a/.github/scripts/merge_coverage.py b/.github/scripts/merge_coverage.py new file mode 100644 index 000000000..513928d80 --- /dev/null +++ b/.github/scripts/merge_coverage.py @@ -0,0 +1,327 @@ +#!/usr/bin/env python3 +"""Merge per-platform llvm-cov JSON exports into a per-line set-union report. + +Usage: + merge_coverage.py --output-json OUT.json --output-md OUT.md \\ + LABEL=PATH [LABEL=PATH ...] + +Each PATH is a coverage.json from `llvm-cov export -format=json`. LABEL is +a short platform identifier (e.g. ``linux``, ``macos``, ``selfhost-shim``). + +Outputs: + OUT.json per-file (executable, covered) line sets, plus per-platform map. + OUT.md markdown summary suitable for a PR comment. + +The per-line set-union design and its invariant (covered(f) ⊆ executable(f)) +are documented inline. +""" +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + + +# --- Schema constants (llvm-cov JSON segment format) ------------------------- +# Segment = [line, col, count, has_count, is_region_entry, is_gap_region] +_SEG_LINE = 0 +_SEG_COUNT = 2 +_SEG_HAS_COUNT = 3 +_SEG_IS_GAP = 5 +_SEG_MIN_LEN = 6 + + +def normalise_path(path: str) -> str: + """Normalise a filename so the same source file from different CI runners + or build trees compares equal. + + Rule: strip everything before (and including) the last ``/src/snmalloc/`` + occurrence, after replacing backslashes with forward slashes. Paths + without ``/src/snmalloc/`` are kept verbatim (they are out-of-tree). + """ + p = path.replace("\\", "/") + needle = "/src/snmalloc/" + idx = p.rfind(needle) + if idx >= 0: + # Result begins with ``src/snmalloc/...`` (no leading slash). + return p[idx + 1:] + return p + + +def parse_platform(doc: dict) -> dict[str, dict]: + """Convert a single llvm-cov JSON doc into + ``{normalised_path: {"executable": set[int], "covered": set[int], + "regions_executable": int, "regions_covered": int}}``. + + A line ``ℓ`` enters ``executable`` iff at least one segment on ``ℓ`` + has ``has_count == true`` AND ``is_gap_region == false``. It enters + ``covered`` iff additionally at least one such segment has ``count > 0``. + By construction ``covered ⊆ executable``. + """ + files: dict[str, dict] = {} + for entry in doc.get("data", []): + for f in entry.get("files", []): + fn = f.get("filename") + if not isinstance(fn, str): + continue + key = normalise_path(fn) + executable = files.setdefault(key, { + "executable": set(), + "covered": set(), + "regions_executable": 0, + "regions_covered": 0, + }) + for seg in f.get("segments", []): + if not isinstance(seg, list) or len(seg) < _SEG_MIN_LEN: + continue + if not seg[_SEG_HAS_COUNT]: + continue + if seg[_SEG_IS_GAP]: + continue + line = seg[_SEG_LINE] + if not isinstance(line, int): + continue + executable["executable"].add(line) + if isinstance(seg[_SEG_COUNT], (int, float)) and seg[_SEG_COUNT] > 0: + executable["covered"].add(line) + # Region totals: read from per-file summary (advisory only). + summary = f.get("summary", {}) + regions = summary.get("regions", {}) + r_count = regions.get("count") + r_covered = regions.get("covered") + if isinstance(r_count, int): + executable["regions_executable"] += r_count + if isinstance(r_covered, int): + executable["regions_covered"] += r_covered + return files + + +def merge(platforms: dict[str, dict[str, dict]]) -> dict: + """Merge per-platform parsed maps into the canonical merged structure.""" + all_files: set[str] = set() + for pmap in platforms.values(): + all_files.update(pmap.keys()) + + merged_files: dict[str, dict] = {} + for fn in sorted(all_files): + executable: set[int] = set() + covered: set[int] = set() + for pmap in platforms.values(): + entry = pmap.get(fn) + if entry is None: + continue + executable |= entry["executable"] + covered |= entry["covered"] + # Defensive: assert invariant covered ⊆ executable + assert covered <= executable, f"invariant violation for {fn}" + merged_files[fn] = { + "executable": sorted(executable), + "covered": sorted(covered), + } + + total_exec = sum(len(v["executable"]) for v in merged_files.values()) + total_covered = sum(len(v["covered"]) for v in merged_files.values()) + + plat_out: dict[str, dict] = {} + for label, pmap in platforms.items(): + files_view = { + fn: { + "executable": len(entry["executable"]), + "covered": len(entry["covered"]), + "regions": { + "executable": entry["regions_executable"], + "covered": entry["regions_covered"], + }, + } + for fn, entry in pmap.items() + } + p_total_exec = sum(v["executable"] for v in files_view.values()) + p_total_covered = sum(v["covered"] for v in files_view.values()) + p_total_r_exec = sum(entry["regions_executable"] for entry in pmap.values()) + p_total_r_covered = sum(entry["regions_covered"] for entry in pmap.values()) + plat_out[label] = { + "files": files_view, + "totals": { + "executable": p_total_exec, + "covered": p_total_covered, + "regions": { + "executable": p_total_r_exec, + "covered": p_total_r_covered, + }, + }, + } + + return { + "files": merged_files, + "totals": {"executable": total_exec, "covered": total_covered}, + "platforms": plat_out, + } + + +# --- Markdown rendering ------------------------------------------------------ + +def md_escape(s: str) -> str: + """Escape a filename for inclusion in a markdown table cell.""" + return s.replace("|", r"\|").replace("\r", " ").replace("\n", " ") + + +def _pct(covered: int, executable: int) -> str: + if executable == 0: + return "n/a" + return f"{100.0 * covered / executable:.2f}%" + + +def _top_dir(path: str) -> str: + """Group key for the per-directory table. + + For paths under ``src/snmalloc/``, group by the immediate sub-directory + (e.g. ``src/snmalloc/pal/...`` → ``src/snmalloc/pal``). Other paths are + grouped under ``other``. + """ + if path.startswith("src/snmalloc/"): + rest = path[len("src/snmalloc/"):] + head, _, _ = rest.partition("/") + return f"src/snmalloc/{head}" if head else "src/snmalloc" + return "other" + + +def _in_scope(path: str) -> bool: + """Filter a normalised path to ``src/snmalloc/**`` (excluding tests and + concept headers). Same scoping as ``.copilot/coverage_diff.py``.""" + if not path.startswith("src/snmalloc/"): + return False + if path.endswith("_concept.h"): + return False + return True + + +def render_markdown(merged: dict) -> str: + out: list[str] = [] + # Marker used by .github/workflows/coverage-comment.yml's + # find-or-create logic (see the dual-marker policy: comment must + # be authored by github-actions[bot] AND its body must contain + # this marker). If you change this string you MUST update both + # occurrences in coverage-comment.yml in lockstep, or comment + # dedup silently breaks (every run posts a new comment). + out.append("") + out.append("## Coverage report (cross-platform merged)") + out.append("") + # Headline is in-scope (``src/snmalloc/**``) only — that is the project + # code being measured. The JSON artifact retains the full unfiltered + # data for downstream consumers (e.g. ``coverage_diff.py``). + scoped_exec = 0 + scoped_cov = 0 + for fn, v in merged["files"].items(): + if not _in_scope(fn): + continue + scoped_exec += len(v["executable"]) + scoped_cov += len(v["covered"]) + out.append( + f"**Lines covered (`src/snmalloc/**`): {scoped_cov} / {scoped_exec} " + f"({_pct(scoped_cov, scoped_exec)})**" + ) + out.append("") + out.append( + "_Merged line coverage is the per-line union across all platforms. " + "Region coverage is reported per-platform only; no cross-platform " + "region total is computed._" + ) + out.append("") + + # Per-directory breakdown (in-scope only). + dir_totals: dict[str, dict[str, int]] = {} + for fn, v in merged["files"].items(): + if not _in_scope(fn): + continue + d = _top_dir(fn) + bucket = dir_totals.setdefault(d, {"executable": 0, "covered": 0}) + bucket["executable"] += len(v["executable"]) + bucket["covered"] += len(v["covered"]) + + out.append("### Per-directory breakdown") + out.append("") + out.append("| Directory | Lines covered | Lines executable | % |") + out.append("| --- | ---: | ---: | ---: |") + rows = sorted( + dir_totals.items(), + key=lambda kv: ( + (kv[1]["covered"] / kv[1]["executable"]) if kv[1]["executable"] else 1.0, + kv[0], + ), + ) + for d, t in rows: + out.append( + f"| {md_escape(d)} | {t['covered']} | {t['executable']} | " + f"{_pct(t['covered'], t['executable'])} |" + ) + out.append("") + + # Per-platform contributions (advisory). + out.append("
Per-platform contributions (advisory)") + out.append("") + out.append("| Platform | Lines covered | Lines executable | Lines % | Regions covered | Regions executable | Regions % |") + out.append("| --- | ---: | ---: | ---: | ---: | ---: | ---: |") + for label in sorted(merged["platforms"].keys()): + pt = merged["platforms"][label]["totals"] + rt = pt["regions"] + out.append( + f"| {md_escape(label)} | {pt['covered']} | {pt['executable']} | " + f"{_pct(pt['covered'], pt['executable'])} | " + f"{rt['covered']} | {rt['executable']} | " + f"{_pct(rt['covered'], rt['executable'])} |" + ) + out.append("") + out.append("
") + out.append("") + return "\n".join(out) + + +# --- CLI --------------------------------------------------------------------- + +def parse_inputs(spec_list: list[str]) -> dict[str, Path]: + inputs: dict[str, Path] = {} + for spec in spec_list: + if "=" not in spec: + raise SystemExit(f"error: input must be LABEL=PATH, got {spec!r}") + label, _, path = spec.partition("=") + if not label or not path: + raise SystemExit(f"error: empty label or path in {spec!r}") + if label in inputs: + raise SystemExit(f"error: duplicate label {label!r}") + inputs[label] = Path(path) + return inputs + + +def load_doc(path: Path) -> dict: + try: + with path.open() as f: + doc = json.load(f) + except (OSError, json.JSONDecodeError) as exc: + raise SystemExit(f"error: cannot load {path}: {exc}") + if not isinstance(doc, dict) or "data" not in doc: + raise SystemExit(f"error: {path} missing top-level 'data' key") + return doc + + +def main(argv: list[str] | None = None) -> int: + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("--output-json", required=True, type=Path) + ap.add_argument("--output-md", required=True, type=Path) + ap.add_argument("inputs", nargs="+", help="LABEL=PATH pairs") + args = ap.parse_args(argv) + + spec = parse_inputs(args.inputs) + platforms: dict[str, dict[str, dict]] = {} + for label, path in spec.items(): + platforms[label] = parse_platform(load_doc(path)) + + merged = merge(platforms) + + args.output_json.write_text(json.dumps(merged, indent=2, sort_keys=True)) + args.output_md.write_text(render_markdown(merged)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/scripts/test_merge_coverage.py b/.github/scripts/test_merge_coverage.py new file mode 100644 index 000000000..9355c8d4c --- /dev/null +++ b/.github/scripts/test_merge_coverage.py @@ -0,0 +1,338 @@ +"""Pytest suite for ``merge_coverage.py``. + +Each case +constructs synthetic ``llvm-cov export``-shaped JSON, runs the merger, +and asserts an explicit property — including the global invariant +``covered(f) ⊆ executable(f)`` (case 14) on every merged output. +""" +from __future__ import annotations + +import json +import sys +from pathlib import Path + +import pytest + +# Allow ``import merge_coverage`` from the same directory. +sys.path.insert(0, str(Path(__file__).resolve().parent)) +import merge_coverage as mc # noqa: E402 + + +# --------------------------------------------------------------------------- +# Helpers to synthesise minimal llvm-cov-shaped JSON. +# --------------------------------------------------------------------------- + +def _seg(line: int, count: int, has_count: bool = True, gap: bool = False) -> list: + """Build a segment list ``[line, col, count, has_count, region_entry, gap]``.""" + return [line, 1, count, has_count, True, gap] + + +def _file(filename: str, segments: list[list], regions: tuple[int, int] = (0, 0)) -> dict: + r_count, r_covered = regions + return { + "filename": filename, + "segments": segments, + "summary": { + "regions": {"count": r_count, "covered": r_covered}, + "lines": {"count": 0, "covered": 0, "percent": 0.0}, + }, + } + + +def _doc(files: list[dict]) -> dict: + return { + "version": "2.0.1", + "type": "llvm.coverage.json.export", + "data": [{"files": files, "totals": {}}], + } + + +def _merge_dicts(platforms: dict[str, dict]) -> dict: + parsed = {label: mc.parse_platform(doc) for label, doc in platforms.items()} + return mc.merge(parsed) + + +def _assert_invariant(merged: dict) -> None: + """The global per-file invariant ``covered ⊆ executable`` (case 14).""" + for fn, v in merged["files"].items(): + cov = set(v["covered"]) + exe = set(v["executable"]) + assert cov <= exe, f"invariant violated for {fn}: covered={cov} executable={exe}" + + +# --------------------------------------------------------------------------- +# Case 1: disjoint files +# --------------------------------------------------------------------------- + +def test_disjoint_files(): + linux = _doc([_file("/work/src/snmalloc/a.h", [_seg(10, 1), _seg(11, 2)])]) + macos = _doc([_file("/work/src/snmalloc/b.h", [_seg(20, 1), _seg(21, 2)])]) + merged = _merge_dicts({"linux": linux, "macos": macos}) + assert set(merged["files"].keys()) == {"src/snmalloc/a.h", "src/snmalloc/b.h"} + assert merged["totals"] == {"executable": 4, "covered": 4} + _assert_invariant(merged) + + +# --------------------------------------------------------------------------- +# Case 2: overlapping files, disjoint covered lines +# --------------------------------------------------------------------------- + +def test_overlapping_files_disjoint_covered(): + linux = _doc([_file("/a/src/snmalloc/f.h", [_seg(1, 1), _seg(2, 0), _seg(3, 1)])]) + macos = _doc([_file("/b/src/snmalloc/f.h", [_seg(1, 0), _seg(2, 1), _seg(3, 0)])]) + merged = _merge_dicts({"linux": linux, "macos": macos}) + f = merged["files"]["src/snmalloc/f.h"] + assert f["executable"] == [1, 2, 3] + assert f["covered"] == [1, 2, 3] + _assert_invariant(merged) + + +# --------------------------------------------------------------------------- +# Case 3: identical coverage +# --------------------------------------------------------------------------- + +def test_identical_coverage(): + segs = [_seg(5, 1), _seg(6, 1), _seg(7, 0)] + a = _doc([_file("/x/src/snmalloc/f.h", segs)]) + b = _doc([_file("/x/src/snmalloc/f.h", segs)]) + merged = _merge_dicts({"a": a, "b": b}) + f = merged["files"]["src/snmalloc/f.h"] + assert f["executable"] == [5, 6, 7] + assert f["covered"] == [5, 6] + _assert_invariant(merged) + + +# --------------------------------------------------------------------------- +# Case 4: complementary lines +# --------------------------------------------------------------------------- + +def test_complementary_lines(): + odd = _doc([_file("/x/src/snmalloc/f.h", [_seg(1, 1), _seg(3, 1), _seg(5, 1)])]) + even = _doc([_file("/x/src/snmalloc/f.h", [_seg(2, 1), _seg(4, 1), _seg(6, 1)])]) + merged = _merge_dicts({"linux": odd, "macos": even}) + f = merged["files"]["src/snmalloc/f.h"] + assert f["executable"] == [1, 2, 3, 4, 5, 6] + assert f["covered"] == [1, 2, 3, 4, 5, 6] + _assert_invariant(merged) + + +# --------------------------------------------------------------------------- +# Case 5: gap-region only line — must NOT enter executable +# --------------------------------------------------------------------------- + +def test_gap_region_only_excluded(): + doc = _doc([_file("/x/src/snmalloc/f.h", [ + _seg(10, 0, has_count=True, gap=True), + _seg(11, 5, has_count=True, gap=False), + ])]) + merged = _merge_dicts({"linux": doc}) + f = merged["files"]["src/snmalloc/f.h"] + assert f["executable"] == [11] + assert f["covered"] == [11] + assert 10 not in f["executable"] + _assert_invariant(merged) + + +# --------------------------------------------------------------------------- +# Case 6: gap-region mixed line — non-gap segment qualifies it +# --------------------------------------------------------------------------- + +def test_gap_region_mixed_line_included(): + doc = _doc([_file("/x/src/snmalloc/f.h", [ + _seg(42, 0, has_count=True, gap=True), + _seg(42, 7, has_count=True, gap=False), + ])]) + merged = _merge_dicts({"linux": doc}) + f = merged["files"]["src/snmalloc/f.h"] + assert f["executable"] == [42] + assert f["covered"] == [42] + _assert_invariant(merged) + + +# --------------------------------------------------------------------------- +# Case 7: ifdef-gated different executable sets +# --------------------------------------------------------------------------- + +def test_ifdef_gated_executable_sets(): + linux = _doc([_file("/x/src/snmalloc/pal_ds.h", + [_seg(i, 1) for i in range(1, 11)])]) + macos = _doc([_file("/x/src/snmalloc/pal_ds.h", + [_seg(i, 0) for i in range(20, 31)])]) + merged = _merge_dicts({"linux": linux, "macos": macos}) + f = merged["files"]["src/snmalloc/pal_ds.h"] + assert f["executable"] == list(range(1, 11)) + list(range(20, 31)) + assert f["covered"] == list(range(1, 11)) + _assert_invariant(merged) + + +# --------------------------------------------------------------------------- +# Case 8: empty data on one platform +# --------------------------------------------------------------------------- + +def test_empty_platform_data(): + linux = _doc([_file("/x/src/snmalloc/f.h", [_seg(1, 1)])]) + empty = {"version": "2.0.1", "type": "llvm.coverage.json.export", + "data": [{"files": [], "totals": {}}]} + merged = _merge_dicts({"linux": linux, "macos": empty}) + assert "src/snmalloc/f.h" in merged["files"] + assert merged["platforms"]["macos"]["totals"]["executable"] == 0 + assert merged["platforms"]["macos"]["totals"]["covered"] == 0 + _assert_invariant(merged) + + +def test_all_platforms_empty(): + empty1 = {"version": "2.0.1", "type": "llvm.coverage.json.export", + "data": [{"files": [], "totals": {}}]} + empty2 = dict(empty1) + merged = _merge_dicts({"a": empty1, "b": empty2}) + assert merged["files"] == {} + assert merged["totals"] == {"executable": 0, "covered": 0} + md = mc.render_markdown(merged) + assert "0 / 0" in md or "n/a" in md + _assert_invariant(merged) + + +# --------------------------------------------------------------------------- +# Case 9: path normalization — same file via different absolute prefixes +# --------------------------------------------------------------------------- + +def test_path_normalization_same_file(): + linux_path = "/home/runner/work/snmalloc/snmalloc/src/snmalloc/foo.h" + selfhost_path = "/build/relwithdebinfo/src/snmalloc/foo.h" + linux = _doc([_file(linux_path, [_seg(1, 1), _seg(2, 0)])]) + selfhost = _doc([_file(selfhost_path, [_seg(2, 1), _seg(3, 1)])]) + merged = _merge_dicts({"linux": linux, "selfhost": selfhost}) + assert list(merged["files"].keys()) == ["src/snmalloc/foo.h"] + f = merged["files"]["src/snmalloc/foo.h"] + # Linux: executable={1,2}, covered={1}; selfhost: executable={2,3}, + # covered={2,3}. Union: executable={1,2,3}, covered={1,2,3}. + assert f["executable"] == [1, 2, 3] + assert f["covered"] == [1, 2, 3] + _assert_invariant(merged) + + +# --------------------------------------------------------------------------- +# Case 10: path normalization — outside the snmalloc tree, kept verbatim +# --------------------------------------------------------------------------- + +def test_path_normalization_outside_tree(): + doc = _doc([_file("/usr/include/stdlib.h", [_seg(50, 1)])]) + merged = _merge_dicts({"linux": doc}) + assert "/usr/include/stdlib.h" in merged["files"] + _assert_invariant(merged) + + +# --------------------------------------------------------------------------- +# Case 11: Windows backslashes +# --------------------------------------------------------------------------- + +def test_path_normalization_windows_backslashes(): + win_path = r"C:\runner\work\snmalloc\snmalloc\src\snmalloc\pal\pal_windows.h" + doc = _doc([_file(win_path, [_seg(7, 1)])]) + merged = _merge_dicts({"windows": doc}) + assert "src/snmalloc/pal/pal_windows.h" in merged["files"] + _assert_invariant(merged) + + +# --------------------------------------------------------------------------- +# Case 12: filename markdown escape +# --------------------------------------------------------------------------- + +def test_markdown_escape_pipe_and_newline(): + weird1 = "/x/src/snmalloc/weird|name.h" + weird2 = "/x/src/snmalloc/weird\nname.h" + docs = { + "linux": _doc([ + _file(weird1, [_seg(1, 1)]), + _file(weird2, [_seg(1, 1)]), + ]), + } + merged = _merge_dicts(docs) + md = mc.render_markdown(merged) + # The pipe-containing path renders with the embedded '|' escaped. + pipe_lines = [ln for ln in md.splitlines() + if "weird" in ln and "|name.h" not in ln.replace(r"\|", "")] + # Strict: exactly one rendered row contains the escaped pipe. + assert any(r"weird\|name.h" in ln for ln in md.splitlines()), md + # Newline inside filename must not break table structure: rendered as + # a single row with the newline replaced by a space. + assert "weird name.h" in md + # No row contains a literal unescaped LF inside the filename. + for ln in md.splitlines(): + assert "weird\nname.h" not in ln + + +# --------------------------------------------------------------------------- +# Case 13: schema mismatch +# --------------------------------------------------------------------------- + +def test_schema_mismatch_exits_nonzero(tmp_path: Path): + bad = tmp_path / "bad.json" + bad.write_text(json.dumps({"not_data": 42})) + out_json = tmp_path / "merged.json" + out_md = tmp_path / "merged.md" + with pytest.raises(SystemExit) as excinfo: + mc.main([ + "--output-json", str(out_json), + "--output-md", str(out_md), + f"linux={bad}", + ]) + assert excinfo.value.code != 0 + + +# --------------------------------------------------------------------------- +# Case 14: invariant property over a randomly mixed merge +# --------------------------------------------------------------------------- + +def test_invariant_holds_under_arbitrary_mix(tmp_path: Path): + docs = { + "linux": _doc([ + _file("/a/src/snmalloc/x.h", + [_seg(i, i % 2) for i in range(1, 30)]), + _file("/a/src/snmalloc/y.h", + [_seg(i, 1) for i in range(1, 5)]), + ]), + "macos": _doc([ + _file("/b/src/snmalloc/x.h", + [_seg(i, (i + 1) % 2) for i in range(15, 40)]), + _file("/b/src/snmalloc/z.h", + [_seg(i, 0) for i in range(1, 8)]), + ]), + "selfhost": _doc([ + _file("/c/src/snmalloc/y.h", + [_seg(i, 2) for i in range(3, 8)]), + ]), + } + merged = _merge_dicts(docs) + _assert_invariant(merged) + # And the totals must equal the count of executable lines summed. + total_exec = sum(len(v["executable"]) for v in merged["files"].values()) + total_cov = sum(len(v["covered"]) for v in merged["files"].values()) + assert merged["totals"]["executable"] == total_exec + assert merged["totals"]["covered"] == total_cov + + +# --------------------------------------------------------------------------- +# CLI smoke: end-to-end via main() +# --------------------------------------------------------------------------- + +def test_cli_end_to_end(tmp_path: Path): + in1 = tmp_path / "linux.json" + in2 = tmp_path / "macos.json" + in1.write_text(json.dumps(_doc([_file("/a/src/snmalloc/foo.h", [_seg(1, 1), _seg(2, 0)])]))) + in2.write_text(json.dumps(_doc([_file("/b/src/snmalloc/foo.h", [_seg(2, 1), _seg(3, 1)])]))) + out_json = tmp_path / "merged.json" + out_md = tmp_path / "merged.md" + rc = mc.main([ + "--output-json", str(out_json), + "--output-md", str(out_md), + f"linux={in1}", + f"macos={in2}", + ]) + assert rc == 0 + merged = json.loads(out_json.read_text()) + assert merged["files"]["src/snmalloc/foo.h"]["executable"] == [1, 2, 3] + assert merged["files"]["src/snmalloc/foo.h"]["covered"] == [1, 2, 3] + md = out_md.read_text() + assert "" in md + assert "3 / 3" in md diff --git a/.github/workflows/coverage-comment.yml b/.github/workflows/coverage-comment.yml new file mode 100644 index 000000000..0e527610f --- /dev/null +++ b/.github/workflows/coverage-comment.yml @@ -0,0 +1,230 @@ +name: Coverage comment + +# Posts the merged coverage report from `coverage.yml` onto the PR +# (or updates the tracking issue body for nightly / push runs). +# +# This workflow is intentionally split from `coverage.yml` so that +# the build job runs with the default read-only `pull_request` token +# (no privilege when running on an untrusted fork PR), and only this +# narrow workflow holds the write token. There is no checkout, no +# arbitrary code execution from the PR, and no use of any artefact +# beyond the validated JSON. +# +# Trust model: +# - The build (`coverage.yml`) runs untrusted PR code under +# `read-all` permissions; an attacker who compromises the build +# cannot post anywhere. +# - This workflow runs *trusted* code from the default branch +# (workflow_run resolves the workflow file from the default +# branch, not from the PR). The only PR-controlled input is the +# contents of `coverage-merged.json`, which is parsed with +# strict size + structural limits below. + +on: + workflow_run: + workflows: [ "Coverage" ] + types: [ completed ] + +# Minimum required to post / edit comments and edit the tracking +# issue. No `contents: read` — we never check out the repo. +permissions: + pull-requests: write + issues: write + +jobs: + comment: + name: Post coverage report + runs-on: ubuntu-24.04 + # Only act on successful Coverage runs that came from a PR or + # from a default-branch schedule/push. Forks do not need this + # filter — `workflow_run` already restricts to runs of the + # `Coverage` workflow as defined on the default branch. + if: >- + github.event.workflow_run.conclusion == 'success' && + (github.event.workflow_run.event == 'pull_request' || + ((github.event.workflow_run.event == 'schedule' || + github.event.workflow_run.event == 'push') && + github.event.workflow_run.head_branch == + github.event.repository.default_branch)) + steps: + - name: Download merged coverage artifact + uses: actions/download-artifact@v4 + with: + # Must match the upload name in coverage.yml's merge job. + name: coverage-merged + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + path: artifact + # Provisional caps; revisit after first week + # of nightlies. download-artifact doesn't enforce a per-file + # cap, so we re-validate explicitly in the next step. + + - name: Validate artifact + id: validate + run: | + set -euo pipefail + # Per-file cap: 50 MB. Total cap: 500 MB. + MAX_FILE=$((50 * 1024 * 1024)) + MAX_TOTAL=$((500 * 1024 * 1024)) + total=0 + for f in artifact/*; do + [ -f "$f" ] || continue + sz=$(stat -c%s "$f") + if [ "$sz" -gt "$MAX_FILE" ]; then + echo "::error::artifact file $f too large ($sz > $MAX_FILE)" + exit 1 + fi + total=$((total + sz)) + done + if [ "$total" -gt "$MAX_TOTAL" ]; then + echo "::error::artifact total $total > cap $MAX_TOTAL" + exit 1 + fi + + # Structural validation: must parse as JSON with the + # exact top-level shape merge_coverage.py emits. + python3 - <<'PY' + import json, sys + with open("artifact/coverage-merged.json") as f: + m = json.load(f) + for k in ("files", "totals", "platforms"): + if k not in m: + print(f"::error::missing top-level key '{k}'", file=sys.stderr) + sys.exit(1) + if not isinstance(m["files"], dict): + print("::error::'files' is not an object", file=sys.stderr); sys.exit(1) + if not isinstance(m["totals"], dict): + print("::error::'totals' is not an object", file=sys.stderr); sys.exit(1) + for k in ("executable", "covered"): + if k not in m["totals"] or not isinstance(m["totals"][k], int): + print(f"::error::totals.{k} missing or not int", file=sys.stderr) + sys.exit(1) + if m["totals"]["covered"] > m["totals"]["executable"]: + print("::error::covered > executable", file=sys.stderr); sys.exit(1) + print(f"validated: {len(m['files'])} files, " + f"{m['totals']['covered']}/{m['totals']['executable']} lines, " + f"{len(m['platforms'])} platforms") + PY + + # Ensure the markdown body carries the bot marker — the + # comment search relies on this, and a missing marker + # would orphan future comments. + if ! grep -qF '' artifact/coverage-merged.md; then + echo "::error::coverage-merged.md missing '' marker" + exit 1 + fi + + # Stash markdown size & first-line for follow-up steps. + echo "md_bytes=$(stat -c%s artifact/coverage-merged.md)" >> "$GITHUB_OUTPUT" + + - name: Resolve PR number + id: pr + run: | + set -euo pipefail + # workflow_run.pull_requests[] is empty for fork PRs and + # for default-branch schedule/push runs. Empty == no PR + # to comment on; fall through to the tracking-issue path. + pr=$(jq -r '.workflow_run.pull_requests[0].number // empty' \ + <<<'${{ toJson(github.event) }}') + echo "pr=$pr" >> "$GITHUB_OUTPUT" + if [ -n "$pr" ]; then + echo "Will comment on PR #$pr" + else + echo "No PR in workflow_run payload; will update tracking issue" + fi + + # ------------------------------------------------------------ + # PR path: find-or-create the bot comment, dual-marker check. + # ------------------------------------------------------------ + - name: Comment on PR + if: steps.pr.outputs.pr != '' + uses: actions/github-script@v7 + env: + PR_NUMBER: ${{ steps.pr.outputs.pr }} + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + const body = fs.readFileSync('artifact/coverage-merged.md', 'utf8'); + const MARKER = ''; + if (!body.includes(MARKER)) { + core.setFailed('marker missing from body'); + return; + } + const pr = Number(process.env.PR_NUMBER); + + // Dual-marker policy: the comment we edit must be both + // authored by github-actions[bot] AND contain the bot + // marker in its body. Either alone is insufficient. + // - login alone: collides with any other bot comment. + // - marker alone: someone could quote the marker in + // a regular comment and have us silently overwrite + // their comment. + const comments = await github.paginate( + github.rest.issues.listComments, + { owner: context.repo.owner, repo: context.repo.repo, + issue_number: pr, per_page: 100 }); + const existing = comments.find(c => + c.user && c.user.login === 'github-actions[bot]' && + typeof c.body === 'string' && c.body.includes(MARKER)); + + // 3-attempt backoff for transient 409/403 (e.g. another + // bot writing concurrently, secondary rate limits). + async function withRetry(op) { + const delays = [0, 2000, 8000]; + let lastErr; + for (const d of delays) { + if (d) await new Promise(r => setTimeout(r, d)); + try { return await op(); } + catch (e) { + if (e.status !== 409 && e.status !== 403) throw e; + lastErr = e; + } + } + throw lastErr; + } + + if (existing) { + core.info(`Updating comment ${existing.id}`); + await withRetry(() => github.rest.issues.updateComment({ + owner: context.repo.owner, repo: context.repo.repo, + comment_id: existing.id, body })); + } else { + core.info(`Creating new comment on PR #${pr}`); + await withRetry(() => github.rest.issues.createComment({ + owner: context.repo.owner, repo: context.repo.repo, + issue_number: pr, body })); + } + + # ------------------------------------------------------------ + # Tracking-issue path: update the body of the issue named by + # the COVERAGE_TRACKING_ISSUE repo variable. Never the + # comments — body keeps history short and avoids notification + # spam on every nightly. + # ------------------------------------------------------------ + - name: Update tracking issue + if: steps.pr.outputs.pr == '' && vars.COVERAGE_TRACKING_ISSUE != '' + uses: actions/github-script@v7 + env: + ISSUE_NUMBER: ${{ vars.COVERAGE_TRACKING_ISSUE }} + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + const body = fs.readFileSync('artifact/coverage-merged.md', 'utf8'); + // Marker must match the constant in merge_coverage.py. + const MARKER = ''; + if (!body.includes(MARKER)) { + core.setFailed('marker missing from body'); + return; + } + const issue = Number(process.env.ISSUE_NUMBER); + await github.rest.issues.update({ + owner: context.repo.owner, repo: context.repo.repo, + issue_number: issue, body }); + core.info(`Updated tracking issue #${issue}`); + + - name: No-op summary + if: steps.pr.outputs.pr == '' && vars.COVERAGE_TRACKING_ISSUE == '' + run: | + echo "::warning::no PR and COVERAGE_TRACKING_ISSUE unset; nothing posted" diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 000000000..31db572de --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,255 @@ +name: Coverage + +# Runs clang source-based coverage on every PR (advisory only — +# never gates) and on a nightly schedule. The output is a per-line +# union of all platforms in the matrix, exported as the +# `coverage-merged` artifact (containing both .json and .md), and +# consumed by `coverage-comment.yml` (a separate workflow with the +# write token) to post the report on PRs / update the tracking +# issue. +# +# This workflow mirrors the regular ctest invocation across the CI +# matrix, so the coverage it reports is exactly what every-PR CI +# exercises. + +on: + pull_request: + branches: [ main ] + schedule: + # Nightly at 04:00 UTC; cheapest free-runner slot. + - cron: '0 4 * * *' + workflow_dispatch: + +# Default token; the build does not push, comment, or modify any +# resource. The follow-on `coverage-comment.yml` is the only writer. +permissions: read-all + +concurrency: + group: coverage-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + +jobs: + # ============================================================================ + # Linux + macOS host builds via reusable-cmake-build.yml. + # ============================================================================ + build: + name: coverage / ${{ matrix.os }}${{ matrix.label-suffix }} + strategy: + fail-fast: false + matrix: + include: + - os: macos-14 + label-suffix: '' + label: macos-14 + # macOS runners have AppleClang preinstalled but `llvm-cov` + # / `llvm-profdata` are NOT on PATH (they live behind + # `xcrun`). The coverage target's find_program(LLVM_COV ...) + # only looks for unversioned/-19/-15 names, so we install + # llvm@19 via brew and pass the explicit binary paths. + # + # SDKROOT is exported in the dependencies step so brew + # clang treats the Apple SDK as a system header path, + # which suppresses -Wundef on Apple's _STDC_VERSION_ + # check inside . + dependencies: | + brew install --quiet ninja llvm@19 + echo "SDKROOT=$(xcrun --show-sdk-path)" >> "$GITHUB_ENV" + extra-cmake-flags: >- + -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm@19/bin/clang++ + -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm@19/bin/clang + -DLLVM_COV=/opt/homebrew/opt/llvm@19/bin/llvm-cov + -DLLVM_PROFDATA=/opt/homebrew/opt/llvm@19/bin/llvm-profdata + self-host: false + # ------------------------------------------------------------------ + # Linux representative leg. Mirrors main.yml's self-host job + # (SNMALLOC_MEMCPY_BOUNDS=ON + SNMALLOC_CHECK_LOADS=ON), so + # the bounds-checked memcpy and load-check paths are + # exercised; these are not reached by any other coverage + # leg. The self-host step iterates over every shim variant + # built (libsnmallocshim.so, libsnmallocshim-checks.so, + # libsnmallocshim-checks-memcpy-only.so) and the export + # step combines profraws from all of them. + # ------------------------------------------------------------------ + - os: ubuntu-24.04 + label-suffix: ' / self-host shim-checks' + label: linux-self-host-shim-checks + dependencies: 'sudo apt install -y ninja-build clang-19 llvm-19' + extra-cmake-flags: '-DCMAKE_CXX_COMPILER=clang++-19 -DCMAKE_C_COMPILER=clang-19 -DSNMALLOC_MEMCPY_BOUNDS=ON -DSNMALLOC_CHECK_LOADS=ON' + self-host: true + uses: ./.github/workflows/reusable-cmake-build.yml + with: + os: ${{ matrix.os }} + # build-type is overridden to Debug by the reusable workflow + # whenever coverage is true, but the input is required. + build-type: Debug + cmake-config: '-G Ninja' + extra-cmake-flags: ${{ matrix.extra-cmake-flags }} + dependencies: ${{ matrix.dependencies }} + self-host: ${{ matrix.self-host }} + coverage: true + coverage-artifact-name: ${{ matrix.label }} + + # ============================================================================ + # FreeBSD / NetBSD via reusable-vm-build.yml. + # + # llvm-profdata / llvm-cov must be installed in the VM by the + # `dependencies:` script. The reusable workflow forces + # copyback: true when coverage is true so coverage.json + # makes it back to the host runner. + # ============================================================================ + build-vm: + name: coverage / ${{ matrix.label }} + strategy: + fail-fast: false + matrix: + include: + - label: freebsd-14 + vm-type: freebsd + vm-version: '14.1' + # FreeBSD's llvm19 port installs versioned binaries + # (clang19, clang++19, llvm-cov19, llvm-profdata19) + # directly under /usr/local/bin/ — not under a + # /usr/local/llvm19/bin/ subtree. Pass absolute paths so + # find_program is preset and doesn't depend on PATH. + dependencies: 'pkg install -y cmake ninja llvm19' + cmake-flags: >- + -DCMAKE_CXX_COMPILER=/usr/local/bin/clang++19 + -DCMAKE_C_COMPILER=/usr/local/bin/clang19 + -DLLVM_COV=/usr/local/bin/llvm-cov19 + -DLLVM_PROFDATA=/usr/local/bin/llvm-profdata19 + # NetBSD intentionally omitted. pkgsrc's compiler-rt-19 + # package ships a libclang_rt.profile-x86_64.a in which + # __llvm_profile_raw_version is declared hidden but not + # defined, so any -fprofile-instr-generate shared library + # (e.g. libsnmallocshim.so) fails to link with: + # R_X86_64_PC32 against undefined hidden symbol + # `__llvm_profile_raw_version` can not be used when + # making a shared object + # Revisit when pkgsrc ships a fixed compiler-rt, or wire + # up an in-VM compiler-rt build from llvm-project source. + uses: ./.github/workflows/reusable-vm-build.yml + with: + vm-type: ${{ matrix.vm-type }} + vm-version: ${{ matrix.vm-version }} + build-type: Debug + dependencies: ${{ matrix.dependencies }} + cmake-flags: ${{ matrix.cmake-flags }} + coverage: true + coverage-artifact-name: ${{ matrix.label }} + + # ============================================================================ + # Windows clang-cl coverage. Exercises the Windows PAL surface, + # which no other leg in the matrix touches. + # + # GitHub windows-2022 runners ship LLVM (clang-cl, llvm-profdata, + # llvm-cov) under C:\Program Files\LLVM\bin, with that directory + # already on PATH and ninja preinstalled. We rely on PATH lookup + # rather than passing -DLLVM_COV / -DLLVM_PROFDATA absolute paths, + # because the install dir contains a space ("Program Files") which + # YAML folded scalars + the reusable workflow's shell-expansion of + # ${{ inputs.extra-cmake-flags }} cannot preserve. Quoting at any + # single layer is undone by the next, leaving CMake to receive + # -DLLVM_PROFDATA=C:/Program and a phantom positional argument. + # ============================================================================ + windows: + name: coverage / windows-2022 clang-cl + uses: ./.github/workflows/reusable-cmake-build.yml + with: + os: windows-2022 + build-type: Debug + cmake-config: '-G Ninja' + extra-cmake-flags: '-DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl' + dependencies: '' + coverage: true + coverage-artifact-name: windows-2022 + + # ============================================================================ + # Merge per-line union across every leg that produced + # coverage.json. + # ============================================================================ + merge: + name: merge coverage + needs: [ build, build-vm, windows ] + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + + - name: Download coverage artifacts + uses: actions/download-artifact@v4 + with: + path: coverage-artifacts + pattern: coverage-* + merge-multiple: false + + - name: Inventory artifacts + run: | + set -euo pipefail + echo "Downloaded artifacts:" + find coverage-artifacts -mindepth 1 -maxdepth 1 -type d -printf ' %f\n' + echo + echo "JSON files:" + find coverage-artifacts -name '*.json' -printf ' %p (%s bytes)\n' + + - name: Build merge inputs + id: inputs + run: | + set -euo pipefail + # Each artifact directory is named coverage-