diff --git a/.github/scripts/merge_coverage.py b/.github/scripts/merge_coverage.py
new file mode 100644
index 000000000..513928d80
--- /dev/null
+++ b/.github/scripts/merge_coverage.py
@@ -0,0 +1,327 @@
+#!/usr/bin/env python3
+"""Merge per-platform llvm-cov JSON exports into a per-line set-union report.
+
+Usage:
+ merge_coverage.py --output-json OUT.json --output-md OUT.md \\
+ LABEL=PATH [LABEL=PATH ...]
+
+Each PATH is a coverage.json from `llvm-cov export -format=json`. LABEL is
+a short platform identifier (e.g. ``linux``, ``macos``, ``selfhost-shim``).
+
+Outputs:
+ OUT.json per-file (executable, covered) line sets, plus per-platform map.
+ OUT.md markdown summary suitable for a PR comment.
+
+The per-line set-union design and its invariant (covered(f) ⊆ executable(f))
+are documented inline.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+
+# --- Schema constants (llvm-cov JSON segment format) -------------------------
+# Segment = [line, col, count, has_count, is_region_entry, is_gap_region]
+_SEG_LINE = 0
+_SEG_COUNT = 2
+_SEG_HAS_COUNT = 3
+_SEG_IS_GAP = 5
+_SEG_MIN_LEN = 6
+
+
+def normalise_path(path: str) -> str:
+ """Normalise a filename so the same source file from different CI runners
+ or build trees compares equal.
+
+ Rule: strip everything before (and including) the last ``/src/snmalloc/``
+ occurrence, after replacing backslashes with forward slashes. Paths
+ without ``/src/snmalloc/`` are kept verbatim (they are out-of-tree).
+ """
+ p = path.replace("\\", "/")
+ needle = "/src/snmalloc/"
+ idx = p.rfind(needle)
+ if idx >= 0:
+ # Result begins with ``src/snmalloc/...`` (no leading slash).
+ return p[idx + 1:]
+ return p
+
+
+def parse_platform(doc: dict) -> dict[str, dict]:
+ """Convert a single llvm-cov JSON doc into
+ ``{normalised_path: {"executable": set[int], "covered": set[int],
+ "regions_executable": int, "regions_covered": int}}``.
+
+ A line ``ℓ`` enters ``executable`` iff at least one segment on ``ℓ``
+ has ``has_count == true`` AND ``is_gap_region == false``. It enters
+ ``covered`` iff additionally at least one such segment has ``count > 0``.
+ By construction ``covered ⊆ executable``.
+ """
+ files: dict[str, dict] = {}
+ for entry in doc.get("data", []):
+ for f in entry.get("files", []):
+ fn = f.get("filename")
+ if not isinstance(fn, str):
+ continue
+ key = normalise_path(fn)
+ executable = files.setdefault(key, {
+ "executable": set(),
+ "covered": set(),
+ "regions_executable": 0,
+ "regions_covered": 0,
+ })
+ for seg in f.get("segments", []):
+ if not isinstance(seg, list) or len(seg) < _SEG_MIN_LEN:
+ continue
+ if not seg[_SEG_HAS_COUNT]:
+ continue
+ if seg[_SEG_IS_GAP]:
+ continue
+ line = seg[_SEG_LINE]
+ if not isinstance(line, int):
+ continue
+ executable["executable"].add(line)
+ if isinstance(seg[_SEG_COUNT], (int, float)) and seg[_SEG_COUNT] > 0:
+ executable["covered"].add(line)
+ # Region totals: read from per-file summary (advisory only).
+ summary = f.get("summary", {})
+ regions = summary.get("regions", {})
+ r_count = regions.get("count")
+ r_covered = regions.get("covered")
+ if isinstance(r_count, int):
+ executable["regions_executable"] += r_count
+ if isinstance(r_covered, int):
+ executable["regions_covered"] += r_covered
+ return files
+
+
+def merge(platforms: dict[str, dict[str, dict]]) -> dict:
+ """Merge per-platform parsed maps into the canonical merged structure."""
+ all_files: set[str] = set()
+ for pmap in platforms.values():
+ all_files.update(pmap.keys())
+
+ merged_files: dict[str, dict] = {}
+ for fn in sorted(all_files):
+ executable: set[int] = set()
+ covered: set[int] = set()
+ for pmap in platforms.values():
+ entry = pmap.get(fn)
+ if entry is None:
+ continue
+ executable |= entry["executable"]
+ covered |= entry["covered"]
+ # Defensive: assert invariant covered ⊆ executable
+ assert covered <= executable, f"invariant violation for {fn}"
+ merged_files[fn] = {
+ "executable": sorted(executable),
+ "covered": sorted(covered),
+ }
+
+ total_exec = sum(len(v["executable"]) for v in merged_files.values())
+ total_covered = sum(len(v["covered"]) for v in merged_files.values())
+
+ plat_out: dict[str, dict] = {}
+ for label, pmap in platforms.items():
+ files_view = {
+ fn: {
+ "executable": len(entry["executable"]),
+ "covered": len(entry["covered"]),
+ "regions": {
+ "executable": entry["regions_executable"],
+ "covered": entry["regions_covered"],
+ },
+ }
+ for fn, entry in pmap.items()
+ }
+ p_total_exec = sum(v["executable"] for v in files_view.values())
+ p_total_covered = sum(v["covered"] for v in files_view.values())
+ p_total_r_exec = sum(entry["regions_executable"] for entry in pmap.values())
+ p_total_r_covered = sum(entry["regions_covered"] for entry in pmap.values())
+ plat_out[label] = {
+ "files": files_view,
+ "totals": {
+ "executable": p_total_exec,
+ "covered": p_total_covered,
+ "regions": {
+ "executable": p_total_r_exec,
+ "covered": p_total_r_covered,
+ },
+ },
+ }
+
+ return {
+ "files": merged_files,
+ "totals": {"executable": total_exec, "covered": total_covered},
+ "platforms": plat_out,
+ }
+
+
+# --- Markdown rendering ------------------------------------------------------
+
+def md_escape(s: str) -> str:
+ """Escape a filename for inclusion in a markdown table cell."""
+ return s.replace("|", r"\|").replace("\r", " ").replace("\n", " ")
+
+
+def _pct(covered: int, executable: int) -> str:
+ if executable == 0:
+ return "n/a"
+ return f"{100.0 * covered / executable:.2f}%"
+
+
+def _top_dir(path: str) -> str:
+ """Group key for the per-directory table.
+
+ For paths under ``src/snmalloc/``, group by the immediate sub-directory
+ (e.g. ``src/snmalloc/pal/...`` → ``src/snmalloc/pal``). Other paths are
+ grouped under ``other``.
+ """
+ if path.startswith("src/snmalloc/"):
+ rest = path[len("src/snmalloc/"):]
+ head, _, _ = rest.partition("/")
+ return f"src/snmalloc/{head}" if head else "src/snmalloc"
+ return "other"
+
+
+def _in_scope(path: str) -> bool:
+ """Filter a normalised path to ``src/snmalloc/**`` (excluding tests and
+ concept headers). Same scoping as ``.copilot/coverage_diff.py``."""
+ if not path.startswith("src/snmalloc/"):
+ return False
+ if path.endswith("_concept.h"):
+ return False
+ return True
+
+
+def render_markdown(merged: dict) -> str:
+ out: list[str] = []
+ # Marker used by .github/workflows/coverage-comment.yml's
+ # find-or-create logic (see the dual-marker policy: comment must
+ # be authored by github-actions[bot] AND its body must contain
+ # this marker). If you change this string you MUST update both
+ # occurrences in coverage-comment.yml in lockstep, or comment
+ # dedup silently breaks (every run posts a new comment).
+ out.append("")
+ out.append("## Coverage report (cross-platform merged)")
+ out.append("")
+ # Headline is in-scope (``src/snmalloc/**``) only — that is the project
+ # code being measured. The JSON artifact retains the full unfiltered
+ # data for downstream consumers (e.g. ``coverage_diff.py``).
+ scoped_exec = 0
+ scoped_cov = 0
+ for fn, v in merged["files"].items():
+ if not _in_scope(fn):
+ continue
+ scoped_exec += len(v["executable"])
+ scoped_cov += len(v["covered"])
+ out.append(
+ f"**Lines covered (`src/snmalloc/**`): {scoped_cov} / {scoped_exec} "
+ f"({_pct(scoped_cov, scoped_exec)})**"
+ )
+ out.append("")
+ out.append(
+ "_Merged line coverage is the per-line union across all platforms. "
+ "Region coverage is reported per-platform only; no cross-platform "
+ "region total is computed._"
+ )
+ out.append("")
+
+ # Per-directory breakdown (in-scope only).
+ dir_totals: dict[str, dict[str, int]] = {}
+ for fn, v in merged["files"].items():
+ if not _in_scope(fn):
+ continue
+ d = _top_dir(fn)
+ bucket = dir_totals.setdefault(d, {"executable": 0, "covered": 0})
+ bucket["executable"] += len(v["executable"])
+ bucket["covered"] += len(v["covered"])
+
+ out.append("### Per-directory breakdown")
+ out.append("")
+ out.append("| Directory | Lines covered | Lines executable | % |")
+ out.append("| --- | ---: | ---: | ---: |")
+ rows = sorted(
+ dir_totals.items(),
+ key=lambda kv: (
+ (kv[1]["covered"] / kv[1]["executable"]) if kv[1]["executable"] else 1.0,
+ kv[0],
+ ),
+ )
+ for d, t in rows:
+ out.append(
+ f"| {md_escape(d)} | {t['covered']} | {t['executable']} | "
+ f"{_pct(t['covered'], t['executable'])} |"
+ )
+ out.append("")
+
+ # Per-platform contributions (advisory).
+ out.append("Per-platform contributions (advisory)
")
+ out.append("")
+ out.append("| Platform | Lines covered | Lines executable | Lines % | Regions covered | Regions executable | Regions % |")
+ out.append("| --- | ---: | ---: | ---: | ---: | ---: | ---: |")
+ for label in sorted(merged["platforms"].keys()):
+ pt = merged["platforms"][label]["totals"]
+ rt = pt["regions"]
+ out.append(
+ f"| {md_escape(label)} | {pt['covered']} | {pt['executable']} | "
+ f"{_pct(pt['covered'], pt['executable'])} | "
+ f"{rt['covered']} | {rt['executable']} | "
+ f"{_pct(rt['covered'], rt['executable'])} |"
+ )
+ out.append("")
+ out.append(" ")
+ out.append("")
+ return "\n".join(out)
+
+
+# --- CLI ---------------------------------------------------------------------
+
+def parse_inputs(spec_list: list[str]) -> dict[str, Path]:
+ inputs: dict[str, Path] = {}
+ for spec in spec_list:
+ if "=" not in spec:
+ raise SystemExit(f"error: input must be LABEL=PATH, got {spec!r}")
+ label, _, path = spec.partition("=")
+ if not label or not path:
+ raise SystemExit(f"error: empty label or path in {spec!r}")
+ if label in inputs:
+ raise SystemExit(f"error: duplicate label {label!r}")
+ inputs[label] = Path(path)
+ return inputs
+
+
+def load_doc(path: Path) -> dict:
+ try:
+ with path.open() as f:
+ doc = json.load(f)
+ except (OSError, json.JSONDecodeError) as exc:
+ raise SystemExit(f"error: cannot load {path}: {exc}")
+ if not isinstance(doc, dict) or "data" not in doc:
+ raise SystemExit(f"error: {path} missing top-level 'data' key")
+ return doc
+
+
+def main(argv: list[str] | None = None) -> int:
+ ap = argparse.ArgumentParser(description=__doc__)
+ ap.add_argument("--output-json", required=True, type=Path)
+ ap.add_argument("--output-md", required=True, type=Path)
+ ap.add_argument("inputs", nargs="+", help="LABEL=PATH pairs")
+ args = ap.parse_args(argv)
+
+ spec = parse_inputs(args.inputs)
+ platforms: dict[str, dict[str, dict]] = {}
+ for label, path in spec.items():
+ platforms[label] = parse_platform(load_doc(path))
+
+ merged = merge(platforms)
+
+ args.output_json.write_text(json.dumps(merged, indent=2, sort_keys=True))
+ args.output_md.write_text(render_markdown(merged))
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/.github/scripts/test_merge_coverage.py b/.github/scripts/test_merge_coverage.py
new file mode 100644
index 000000000..9355c8d4c
--- /dev/null
+++ b/.github/scripts/test_merge_coverage.py
@@ -0,0 +1,338 @@
+"""Pytest suite for ``merge_coverage.py``.
+
+Each case
+constructs synthetic ``llvm-cov export``-shaped JSON, runs the merger,
+and asserts an explicit property — including the global invariant
+``covered(f) ⊆ executable(f)`` (case 14) on every merged output.
+"""
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+import pytest
+
+# Allow ``import merge_coverage`` from the same directory.
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+import merge_coverage as mc # noqa: E402
+
+
+# ---------------------------------------------------------------------------
+# Helpers to synthesise minimal llvm-cov-shaped JSON.
+# ---------------------------------------------------------------------------
+
+def _seg(line: int, count: int, has_count: bool = True, gap: bool = False) -> list:
+ """Build a segment list ``[line, col, count, has_count, region_entry, gap]``."""
+ return [line, 1, count, has_count, True, gap]
+
+
+def _file(filename: str, segments: list[list], regions: tuple[int, int] = (0, 0)) -> dict:
+ r_count, r_covered = regions
+ return {
+ "filename": filename,
+ "segments": segments,
+ "summary": {
+ "regions": {"count": r_count, "covered": r_covered},
+ "lines": {"count": 0, "covered": 0, "percent": 0.0},
+ },
+ }
+
+
+def _doc(files: list[dict]) -> dict:
+ return {
+ "version": "2.0.1",
+ "type": "llvm.coverage.json.export",
+ "data": [{"files": files, "totals": {}}],
+ }
+
+
+def _merge_dicts(platforms: dict[str, dict]) -> dict:
+ parsed = {label: mc.parse_platform(doc) for label, doc in platforms.items()}
+ return mc.merge(parsed)
+
+
+def _assert_invariant(merged: dict) -> None:
+ """The global per-file invariant ``covered ⊆ executable`` (case 14)."""
+ for fn, v in merged["files"].items():
+ cov = set(v["covered"])
+ exe = set(v["executable"])
+ assert cov <= exe, f"invariant violated for {fn}: covered={cov} executable={exe}"
+
+
+# ---------------------------------------------------------------------------
+# Case 1: disjoint files
+# ---------------------------------------------------------------------------
+
+def test_disjoint_files():
+ linux = _doc([_file("/work/src/snmalloc/a.h", [_seg(10, 1), _seg(11, 2)])])
+ macos = _doc([_file("/work/src/snmalloc/b.h", [_seg(20, 1), _seg(21, 2)])])
+ merged = _merge_dicts({"linux": linux, "macos": macos})
+ assert set(merged["files"].keys()) == {"src/snmalloc/a.h", "src/snmalloc/b.h"}
+ assert merged["totals"] == {"executable": 4, "covered": 4}
+ _assert_invariant(merged)
+
+
+# ---------------------------------------------------------------------------
+# Case 2: overlapping files, disjoint covered lines
+# ---------------------------------------------------------------------------
+
+def test_overlapping_files_disjoint_covered():
+ linux = _doc([_file("/a/src/snmalloc/f.h", [_seg(1, 1), _seg(2, 0), _seg(3, 1)])])
+ macos = _doc([_file("/b/src/snmalloc/f.h", [_seg(1, 0), _seg(2, 1), _seg(3, 0)])])
+ merged = _merge_dicts({"linux": linux, "macos": macos})
+ f = merged["files"]["src/snmalloc/f.h"]
+ assert f["executable"] == [1, 2, 3]
+ assert f["covered"] == [1, 2, 3]
+ _assert_invariant(merged)
+
+
+# ---------------------------------------------------------------------------
+# Case 3: identical coverage
+# ---------------------------------------------------------------------------
+
+def test_identical_coverage():
+ segs = [_seg(5, 1), _seg(6, 1), _seg(7, 0)]
+ a = _doc([_file("/x/src/snmalloc/f.h", segs)])
+ b = _doc([_file("/x/src/snmalloc/f.h", segs)])
+ merged = _merge_dicts({"a": a, "b": b})
+ f = merged["files"]["src/snmalloc/f.h"]
+ assert f["executable"] == [5, 6, 7]
+ assert f["covered"] == [5, 6]
+ _assert_invariant(merged)
+
+
+# ---------------------------------------------------------------------------
+# Case 4: complementary lines
+# ---------------------------------------------------------------------------
+
+def test_complementary_lines():
+ odd = _doc([_file("/x/src/snmalloc/f.h", [_seg(1, 1), _seg(3, 1), _seg(5, 1)])])
+ even = _doc([_file("/x/src/snmalloc/f.h", [_seg(2, 1), _seg(4, 1), _seg(6, 1)])])
+ merged = _merge_dicts({"linux": odd, "macos": even})
+ f = merged["files"]["src/snmalloc/f.h"]
+ assert f["executable"] == [1, 2, 3, 4, 5, 6]
+ assert f["covered"] == [1, 2, 3, 4, 5, 6]
+ _assert_invariant(merged)
+
+
+# ---------------------------------------------------------------------------
+# Case 5: gap-region only line — must NOT enter executable
+# ---------------------------------------------------------------------------
+
+def test_gap_region_only_excluded():
+ doc = _doc([_file("/x/src/snmalloc/f.h", [
+ _seg(10, 0, has_count=True, gap=True),
+ _seg(11, 5, has_count=True, gap=False),
+ ])])
+ merged = _merge_dicts({"linux": doc})
+ f = merged["files"]["src/snmalloc/f.h"]
+ assert f["executable"] == [11]
+ assert f["covered"] == [11]
+ assert 10 not in f["executable"]
+ _assert_invariant(merged)
+
+
+# ---------------------------------------------------------------------------
+# Case 6: gap-region mixed line — non-gap segment qualifies it
+# ---------------------------------------------------------------------------
+
+def test_gap_region_mixed_line_included():
+ doc = _doc([_file("/x/src/snmalloc/f.h", [
+ _seg(42, 0, has_count=True, gap=True),
+ _seg(42, 7, has_count=True, gap=False),
+ ])])
+ merged = _merge_dicts({"linux": doc})
+ f = merged["files"]["src/snmalloc/f.h"]
+ assert f["executable"] == [42]
+ assert f["covered"] == [42]
+ _assert_invariant(merged)
+
+
+# ---------------------------------------------------------------------------
+# Case 7: ifdef-gated different executable sets
+# ---------------------------------------------------------------------------
+
+def test_ifdef_gated_executable_sets():
+ linux = _doc([_file("/x/src/snmalloc/pal_ds.h",
+ [_seg(i, 1) for i in range(1, 11)])])
+ macos = _doc([_file("/x/src/snmalloc/pal_ds.h",
+ [_seg(i, 0) for i in range(20, 31)])])
+ merged = _merge_dicts({"linux": linux, "macos": macos})
+ f = merged["files"]["src/snmalloc/pal_ds.h"]
+ assert f["executable"] == list(range(1, 11)) + list(range(20, 31))
+ assert f["covered"] == list(range(1, 11))
+ _assert_invariant(merged)
+
+
+# ---------------------------------------------------------------------------
+# Case 8: empty data on one platform
+# ---------------------------------------------------------------------------
+
+def test_empty_platform_data():
+ linux = _doc([_file("/x/src/snmalloc/f.h", [_seg(1, 1)])])
+ empty = {"version": "2.0.1", "type": "llvm.coverage.json.export",
+ "data": [{"files": [], "totals": {}}]}
+ merged = _merge_dicts({"linux": linux, "macos": empty})
+ assert "src/snmalloc/f.h" in merged["files"]
+ assert merged["platforms"]["macos"]["totals"]["executable"] == 0
+ assert merged["platforms"]["macos"]["totals"]["covered"] == 0
+ _assert_invariant(merged)
+
+
+def test_all_platforms_empty():
+ empty1 = {"version": "2.0.1", "type": "llvm.coverage.json.export",
+ "data": [{"files": [], "totals": {}}]}
+ empty2 = dict(empty1)
+ merged = _merge_dicts({"a": empty1, "b": empty2})
+ assert merged["files"] == {}
+ assert merged["totals"] == {"executable": 0, "covered": 0}
+ md = mc.render_markdown(merged)
+ assert "0 / 0" in md or "n/a" in md
+ _assert_invariant(merged)
+
+
+# ---------------------------------------------------------------------------
+# Case 9: path normalization — same file via different absolute prefixes
+# ---------------------------------------------------------------------------
+
+def test_path_normalization_same_file():
+ linux_path = "/home/runner/work/snmalloc/snmalloc/src/snmalloc/foo.h"
+ selfhost_path = "/build/relwithdebinfo/src/snmalloc/foo.h"
+ linux = _doc([_file(linux_path, [_seg(1, 1), _seg(2, 0)])])
+ selfhost = _doc([_file(selfhost_path, [_seg(2, 1), _seg(3, 1)])])
+ merged = _merge_dicts({"linux": linux, "selfhost": selfhost})
+ assert list(merged["files"].keys()) == ["src/snmalloc/foo.h"]
+ f = merged["files"]["src/snmalloc/foo.h"]
+ # Linux: executable={1,2}, covered={1}; selfhost: executable={2,3},
+ # covered={2,3}. Union: executable={1,2,3}, covered={1,2,3}.
+ assert f["executable"] == [1, 2, 3]
+ assert f["covered"] == [1, 2, 3]
+ _assert_invariant(merged)
+
+
+# ---------------------------------------------------------------------------
+# Case 10: path normalization — outside the snmalloc tree, kept verbatim
+# ---------------------------------------------------------------------------
+
+def test_path_normalization_outside_tree():
+ doc = _doc([_file("/usr/include/stdlib.h", [_seg(50, 1)])])
+ merged = _merge_dicts({"linux": doc})
+ assert "/usr/include/stdlib.h" in merged["files"]
+ _assert_invariant(merged)
+
+
+# ---------------------------------------------------------------------------
+# Case 11: Windows backslashes
+# ---------------------------------------------------------------------------
+
+def test_path_normalization_windows_backslashes():
+ win_path = r"C:\runner\work\snmalloc\snmalloc\src\snmalloc\pal\pal_windows.h"
+ doc = _doc([_file(win_path, [_seg(7, 1)])])
+ merged = _merge_dicts({"windows": doc})
+ assert "src/snmalloc/pal/pal_windows.h" in merged["files"]
+ _assert_invariant(merged)
+
+
+# ---------------------------------------------------------------------------
+# Case 12: filename markdown escape
+# ---------------------------------------------------------------------------
+
+def test_markdown_escape_pipe_and_newline():
+ weird1 = "/x/src/snmalloc/weird|name.h"
+ weird2 = "/x/src/snmalloc/weird\nname.h"
+ docs = {
+ "linux": _doc([
+ _file(weird1, [_seg(1, 1)]),
+ _file(weird2, [_seg(1, 1)]),
+ ]),
+ }
+ merged = _merge_dicts(docs)
+ md = mc.render_markdown(merged)
+ # The pipe-containing path renders with the embedded '|' escaped.
+ pipe_lines = [ln for ln in md.splitlines()
+ if "weird" in ln and "|name.h" not in ln.replace(r"\|", "")]
+ # Strict: exactly one rendered row contains the escaped pipe.
+ assert any(r"weird\|name.h" in ln for ln in md.splitlines()), md
+ # Newline inside filename must not break table structure: rendered as
+ # a single row with the newline replaced by a space.
+ assert "weird name.h" in md
+ # No row contains a literal unescaped LF inside the filename.
+ for ln in md.splitlines():
+ assert "weird\nname.h" not in ln
+
+
+# ---------------------------------------------------------------------------
+# Case 13: schema mismatch
+# ---------------------------------------------------------------------------
+
+def test_schema_mismatch_exits_nonzero(tmp_path: Path):
+ bad = tmp_path / "bad.json"
+ bad.write_text(json.dumps({"not_data": 42}))
+ out_json = tmp_path / "merged.json"
+ out_md = tmp_path / "merged.md"
+ with pytest.raises(SystemExit) as excinfo:
+ mc.main([
+ "--output-json", str(out_json),
+ "--output-md", str(out_md),
+ f"linux={bad}",
+ ])
+ assert excinfo.value.code != 0
+
+
+# ---------------------------------------------------------------------------
+# Case 14: invariant property over a randomly mixed merge
+# ---------------------------------------------------------------------------
+
+def test_invariant_holds_under_arbitrary_mix(tmp_path: Path):
+ docs = {
+ "linux": _doc([
+ _file("/a/src/snmalloc/x.h",
+ [_seg(i, i % 2) for i in range(1, 30)]),
+ _file("/a/src/snmalloc/y.h",
+ [_seg(i, 1) for i in range(1, 5)]),
+ ]),
+ "macos": _doc([
+ _file("/b/src/snmalloc/x.h",
+ [_seg(i, (i + 1) % 2) for i in range(15, 40)]),
+ _file("/b/src/snmalloc/z.h",
+ [_seg(i, 0) for i in range(1, 8)]),
+ ]),
+ "selfhost": _doc([
+ _file("/c/src/snmalloc/y.h",
+ [_seg(i, 2) for i in range(3, 8)]),
+ ]),
+ }
+ merged = _merge_dicts(docs)
+ _assert_invariant(merged)
+ # And the totals must equal the count of executable lines summed.
+ total_exec = sum(len(v["executable"]) for v in merged["files"].values())
+ total_cov = sum(len(v["covered"]) for v in merged["files"].values())
+ assert merged["totals"]["executable"] == total_exec
+ assert merged["totals"]["covered"] == total_cov
+
+
+# ---------------------------------------------------------------------------
+# CLI smoke: end-to-end via main()
+# ---------------------------------------------------------------------------
+
+def test_cli_end_to_end(tmp_path: Path):
+ in1 = tmp_path / "linux.json"
+ in2 = tmp_path / "macos.json"
+ in1.write_text(json.dumps(_doc([_file("/a/src/snmalloc/foo.h", [_seg(1, 1), _seg(2, 0)])])))
+ in2.write_text(json.dumps(_doc([_file("/b/src/snmalloc/foo.h", [_seg(2, 1), _seg(3, 1)])])))
+ out_json = tmp_path / "merged.json"
+ out_md = tmp_path / "merged.md"
+ rc = mc.main([
+ "--output-json", str(out_json),
+ "--output-md", str(out_md),
+ f"linux={in1}",
+ f"macos={in2}",
+ ])
+ assert rc == 0
+ merged = json.loads(out_json.read_text())
+ assert merged["files"]["src/snmalloc/foo.h"]["executable"] == [1, 2, 3]
+ assert merged["files"]["src/snmalloc/foo.h"]["covered"] == [1, 2, 3]
+ md = out_md.read_text()
+ assert "" in md
+ assert "3 / 3" in md
diff --git a/.github/workflows/coverage-comment.yml b/.github/workflows/coverage-comment.yml
new file mode 100644
index 000000000..0e527610f
--- /dev/null
+++ b/.github/workflows/coverage-comment.yml
@@ -0,0 +1,230 @@
+name: Coverage comment
+
+# Posts the merged coverage report from `coverage.yml` onto the PR
+# (or updates the tracking issue body for nightly / push runs).
+#
+# This workflow is intentionally split from `coverage.yml` so that
+# the build job runs with the default read-only `pull_request` token
+# (no privilege when running on an untrusted fork PR), and only this
+# narrow workflow holds the write token. There is no checkout, no
+# arbitrary code execution from the PR, and no use of any artefact
+# beyond the validated JSON.
+#
+# Trust model:
+# - The build (`coverage.yml`) runs untrusted PR code under
+# `read-all` permissions; an attacker who compromises the build
+# cannot post anywhere.
+# - This workflow runs *trusted* code from the default branch
+# (workflow_run resolves the workflow file from the default
+# branch, not from the PR). The only PR-controlled input is the
+# contents of `coverage-merged.json`, which is parsed with
+# strict size + structural limits below.
+
+on:
+ workflow_run:
+ workflows: [ "Coverage" ]
+ types: [ completed ]
+
+# Minimum required to post / edit comments and edit the tracking
+# issue. No `contents: read` — we never check out the repo.
+permissions:
+ pull-requests: write
+ issues: write
+
+jobs:
+ comment:
+ name: Post coverage report
+ runs-on: ubuntu-24.04
+ # Only act on successful Coverage runs that came from a PR or
+ # from a default-branch schedule/push. Forks do not need this
+ # filter — `workflow_run` already restricts to runs of the
+ # `Coverage` workflow as defined on the default branch.
+ if: >-
+ github.event.workflow_run.conclusion == 'success' &&
+ (github.event.workflow_run.event == 'pull_request' ||
+ ((github.event.workflow_run.event == 'schedule' ||
+ github.event.workflow_run.event == 'push') &&
+ github.event.workflow_run.head_branch ==
+ github.event.repository.default_branch))
+ steps:
+ - name: Download merged coverage artifact
+ uses: actions/download-artifact@v4
+ with:
+ # Must match the upload name in coverage.yml's merge job.
+ name: coverage-merged
+ run-id: ${{ github.event.workflow_run.id }}
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ path: artifact
+ # Provisional caps; revisit after first week
+ # of nightlies. download-artifact doesn't enforce a per-file
+ # cap, so we re-validate explicitly in the next step.
+
+ - name: Validate artifact
+ id: validate
+ run: |
+ set -euo pipefail
+ # Per-file cap: 50 MB. Total cap: 500 MB.
+ MAX_FILE=$((50 * 1024 * 1024))
+ MAX_TOTAL=$((500 * 1024 * 1024))
+ total=0
+ for f in artifact/*; do
+ [ -f "$f" ] || continue
+ sz=$(stat -c%s "$f")
+ if [ "$sz" -gt "$MAX_FILE" ]; then
+ echo "::error::artifact file $f too large ($sz > $MAX_FILE)"
+ exit 1
+ fi
+ total=$((total + sz))
+ done
+ if [ "$total" -gt "$MAX_TOTAL" ]; then
+ echo "::error::artifact total $total > cap $MAX_TOTAL"
+ exit 1
+ fi
+
+ # Structural validation: must parse as JSON with the
+ # exact top-level shape merge_coverage.py emits.
+ python3 - <<'PY'
+ import json, sys
+ with open("artifact/coverage-merged.json") as f:
+ m = json.load(f)
+ for k in ("files", "totals", "platforms"):
+ if k not in m:
+ print(f"::error::missing top-level key '{k}'", file=sys.stderr)
+ sys.exit(1)
+ if not isinstance(m["files"], dict):
+ print("::error::'files' is not an object", file=sys.stderr); sys.exit(1)
+ if not isinstance(m["totals"], dict):
+ print("::error::'totals' is not an object", file=sys.stderr); sys.exit(1)
+ for k in ("executable", "covered"):
+ if k not in m["totals"] or not isinstance(m["totals"][k], int):
+ print(f"::error::totals.{k} missing or not int", file=sys.stderr)
+ sys.exit(1)
+ if m["totals"]["covered"] > m["totals"]["executable"]:
+ print("::error::covered > executable", file=sys.stderr); sys.exit(1)
+ print(f"validated: {len(m['files'])} files, "
+ f"{m['totals']['covered']}/{m['totals']['executable']} lines, "
+ f"{len(m['platforms'])} platforms")
+ PY
+
+ # Ensure the markdown body carries the bot marker — the
+ # comment search relies on this, and a missing marker
+ # would orphan future comments.
+ if ! grep -qF '' artifact/coverage-merged.md; then
+ echo "::error::coverage-merged.md missing '' marker"
+ exit 1
+ fi
+
+ # Stash markdown size & first-line for follow-up steps.
+ echo "md_bytes=$(stat -c%s artifact/coverage-merged.md)" >> "$GITHUB_OUTPUT"
+
+ - name: Resolve PR number
+ id: pr
+ run: |
+ set -euo pipefail
+ # workflow_run.pull_requests[] is empty for fork PRs and
+ # for default-branch schedule/push runs. Empty == no PR
+ # to comment on; fall through to the tracking-issue path.
+ pr=$(jq -r '.workflow_run.pull_requests[0].number // empty' \
+ <<<'${{ toJson(github.event) }}')
+ echo "pr=$pr" >> "$GITHUB_OUTPUT"
+ if [ -n "$pr" ]; then
+ echo "Will comment on PR #$pr"
+ else
+ echo "No PR in workflow_run payload; will update tracking issue"
+ fi
+
+ # ------------------------------------------------------------
+ # PR path: find-or-create the bot comment, dual-marker check.
+ # ------------------------------------------------------------
+ - name: Comment on PR
+ if: steps.pr.outputs.pr != ''
+ uses: actions/github-script@v7
+ env:
+ PR_NUMBER: ${{ steps.pr.outputs.pr }}
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ script: |
+ const fs = require('fs');
+ const body = fs.readFileSync('artifact/coverage-merged.md', 'utf8');
+ const MARKER = '';
+ if (!body.includes(MARKER)) {
+ core.setFailed('marker missing from body');
+ return;
+ }
+ const pr = Number(process.env.PR_NUMBER);
+
+ // Dual-marker policy: the comment we edit must be both
+ // authored by github-actions[bot] AND contain the bot
+ // marker in its body. Either alone is insufficient.
+ // - login alone: collides with any other bot comment.
+ // - marker alone: someone could quote the marker in
+ // a regular comment and have us silently overwrite
+ // their comment.
+ const comments = await github.paginate(
+ github.rest.issues.listComments,
+ { owner: context.repo.owner, repo: context.repo.repo,
+ issue_number: pr, per_page: 100 });
+ const existing = comments.find(c =>
+ c.user && c.user.login === 'github-actions[bot]' &&
+ typeof c.body === 'string' && c.body.includes(MARKER));
+
+ // 3-attempt backoff for transient 409/403 (e.g. another
+ // bot writing concurrently, secondary rate limits).
+ async function withRetry(op) {
+ const delays = [0, 2000, 8000];
+ let lastErr;
+ for (const d of delays) {
+ if (d) await new Promise(r => setTimeout(r, d));
+ try { return await op(); }
+ catch (e) {
+ if (e.status !== 409 && e.status !== 403) throw e;
+ lastErr = e;
+ }
+ }
+ throw lastErr;
+ }
+
+ if (existing) {
+ core.info(`Updating comment ${existing.id}`);
+ await withRetry(() => github.rest.issues.updateComment({
+ owner: context.repo.owner, repo: context.repo.repo,
+ comment_id: existing.id, body }));
+ } else {
+ core.info(`Creating new comment on PR #${pr}`);
+ await withRetry(() => github.rest.issues.createComment({
+ owner: context.repo.owner, repo: context.repo.repo,
+ issue_number: pr, body }));
+ }
+
+ # ------------------------------------------------------------
+ # Tracking-issue path: update the body of the issue named by
+ # the COVERAGE_TRACKING_ISSUE repo variable. Never the
+ # comments — body keeps history short and avoids notification
+ # spam on every nightly.
+ # ------------------------------------------------------------
+ - name: Update tracking issue
+ if: steps.pr.outputs.pr == '' && vars.COVERAGE_TRACKING_ISSUE != ''
+ uses: actions/github-script@v7
+ env:
+ ISSUE_NUMBER: ${{ vars.COVERAGE_TRACKING_ISSUE }}
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ script: |
+ const fs = require('fs');
+ const body = fs.readFileSync('artifact/coverage-merged.md', 'utf8');
+ // Marker must match the constant in merge_coverage.py.
+ const MARKER = '';
+ if (!body.includes(MARKER)) {
+ core.setFailed('marker missing from body');
+ return;
+ }
+ const issue = Number(process.env.ISSUE_NUMBER);
+ await github.rest.issues.update({
+ owner: context.repo.owner, repo: context.repo.repo,
+ issue_number: issue, body });
+ core.info(`Updated tracking issue #${issue}`);
+
+ - name: No-op summary
+ if: steps.pr.outputs.pr == '' && vars.COVERAGE_TRACKING_ISSUE == ''
+ run: |
+ echo "::warning::no PR and COVERAGE_TRACKING_ISSUE unset; nothing posted"
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
new file mode 100644
index 000000000..31db572de
--- /dev/null
+++ b/.github/workflows/coverage.yml
@@ -0,0 +1,255 @@
+name: Coverage
+
+# Runs clang source-based coverage on every PR (advisory only —
+# never gates) and on a nightly schedule. The output is a per-line
+# union of all platforms in the matrix, exported as the
+# `coverage-merged` artifact (containing both .json and .md), and
+# consumed by `coverage-comment.yml` (a separate workflow with the
+# write token) to post the report on PRs / update the tracking
+# issue.
+#
+# This workflow mirrors the regular ctest invocation across the CI
+# matrix, so the coverage it reports is exactly what every-PR CI
+# exercises.
+
+on:
+ pull_request:
+ branches: [ main ]
+ schedule:
+ # Nightly at 04:00 UTC; cheapest free-runner slot.
+ - cron: '0 4 * * *'
+ workflow_dispatch:
+
+# Default token; the build does not push, comment, or modify any
+# resource. The follow-on `coverage-comment.yml` is the only writer.
+permissions: read-all
+
+concurrency:
+ group: coverage-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+jobs:
+ # ============================================================================
+ # Linux + macOS host builds via reusable-cmake-build.yml.
+ # ============================================================================
+ build:
+ name: coverage / ${{ matrix.os }}${{ matrix.label-suffix }}
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - os: macos-14
+ label-suffix: ''
+ label: macos-14
+ # macOS runners have AppleClang preinstalled but `llvm-cov`
+ # / `llvm-profdata` are NOT on PATH (they live behind
+ # `xcrun`). The coverage target's find_program(LLVM_COV ...)
+ # only looks for unversioned/-19/-15 names, so we install
+ # llvm@19 via brew and pass the explicit binary paths.
+ #
+ # SDKROOT is exported in the dependencies step so brew
+ # clang treats the Apple SDK as a system header path,
+ # which suppresses -Wundef on Apple's _STDC_VERSION_
+ # check inside .
+ dependencies: |
+ brew install --quiet ninja llvm@19
+ echo "SDKROOT=$(xcrun --show-sdk-path)" >> "$GITHUB_ENV"
+ extra-cmake-flags: >-
+ -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm@19/bin/clang++
+ -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm@19/bin/clang
+ -DLLVM_COV=/opt/homebrew/opt/llvm@19/bin/llvm-cov
+ -DLLVM_PROFDATA=/opt/homebrew/opt/llvm@19/bin/llvm-profdata
+ self-host: false
+ # ------------------------------------------------------------------
+ # Linux representative leg. Mirrors main.yml's self-host job
+ # (SNMALLOC_MEMCPY_BOUNDS=ON + SNMALLOC_CHECK_LOADS=ON), so
+ # the bounds-checked memcpy and load-check paths are
+ # exercised; these are not reached by any other coverage
+ # leg. The self-host step iterates over every shim variant
+ # built (libsnmallocshim.so, libsnmallocshim-checks.so,
+ # libsnmallocshim-checks-memcpy-only.so) and the export
+ # step combines profraws from all of them.
+ # ------------------------------------------------------------------
+ - os: ubuntu-24.04
+ label-suffix: ' / self-host shim-checks'
+ label: linux-self-host-shim-checks
+ dependencies: 'sudo apt install -y ninja-build clang-19 llvm-19'
+ extra-cmake-flags: '-DCMAKE_CXX_COMPILER=clang++-19 -DCMAKE_C_COMPILER=clang-19 -DSNMALLOC_MEMCPY_BOUNDS=ON -DSNMALLOC_CHECK_LOADS=ON'
+ self-host: true
+ uses: ./.github/workflows/reusable-cmake-build.yml
+ with:
+ os: ${{ matrix.os }}
+ # build-type is overridden to Debug by the reusable workflow
+ # whenever coverage is true, but the input is required.
+ build-type: Debug
+ cmake-config: '-G Ninja'
+ extra-cmake-flags: ${{ matrix.extra-cmake-flags }}
+ dependencies: ${{ matrix.dependencies }}
+ self-host: ${{ matrix.self-host }}
+ coverage: true
+ coverage-artifact-name: ${{ matrix.label }}
+
+ # ============================================================================
+ # FreeBSD / NetBSD via reusable-vm-build.yml.
+ #
+ # llvm-profdata / llvm-cov must be installed in the VM by the
+ # `dependencies:` script. The reusable workflow forces
+ # copyback: true when coverage is true so coverage.json
+ # makes it back to the host runner.
+ # ============================================================================
+ build-vm:
+ name: coverage / ${{ matrix.label }}
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - label: freebsd-14
+ vm-type: freebsd
+ vm-version: '14.1'
+ # FreeBSD's llvm19 port installs versioned binaries
+ # (clang19, clang++19, llvm-cov19, llvm-profdata19)
+ # directly under /usr/local/bin/ — not under a
+ # /usr/local/llvm19/bin/ subtree. Pass absolute paths so
+ # find_program is preset and doesn't depend on PATH.
+ dependencies: 'pkg install -y cmake ninja llvm19'
+ cmake-flags: >-
+ -DCMAKE_CXX_COMPILER=/usr/local/bin/clang++19
+ -DCMAKE_C_COMPILER=/usr/local/bin/clang19
+ -DLLVM_COV=/usr/local/bin/llvm-cov19
+ -DLLVM_PROFDATA=/usr/local/bin/llvm-profdata19
+ # NetBSD intentionally omitted. pkgsrc's compiler-rt-19
+ # package ships a libclang_rt.profile-x86_64.a in which
+ # __llvm_profile_raw_version is declared hidden but not
+ # defined, so any -fprofile-instr-generate shared library
+ # (e.g. libsnmallocshim.so) fails to link with:
+ # R_X86_64_PC32 against undefined hidden symbol
+ # `__llvm_profile_raw_version` can not be used when
+ # making a shared object
+ # Revisit when pkgsrc ships a fixed compiler-rt, or wire
+ # up an in-VM compiler-rt build from llvm-project source.
+ uses: ./.github/workflows/reusable-vm-build.yml
+ with:
+ vm-type: ${{ matrix.vm-type }}
+ vm-version: ${{ matrix.vm-version }}
+ build-type: Debug
+ dependencies: ${{ matrix.dependencies }}
+ cmake-flags: ${{ matrix.cmake-flags }}
+ coverage: true
+ coverage-artifact-name: ${{ matrix.label }}
+
+ # ============================================================================
+ # Windows clang-cl coverage. Exercises the Windows PAL surface,
+ # which no other leg in the matrix touches.
+ #
+ # GitHub windows-2022 runners ship LLVM (clang-cl, llvm-profdata,
+ # llvm-cov) under C:\Program Files\LLVM\bin, with that directory
+ # already on PATH and ninja preinstalled. We rely on PATH lookup
+ # rather than passing -DLLVM_COV / -DLLVM_PROFDATA absolute paths,
+ # because the install dir contains a space ("Program Files") which
+ # YAML folded scalars + the reusable workflow's shell-expansion of
+ # ${{ inputs.extra-cmake-flags }} cannot preserve. Quoting at any
+ # single layer is undone by the next, leaving CMake to receive
+ # -DLLVM_PROFDATA=C:/Program and a phantom positional argument.
+ # ============================================================================
+ windows:
+ name: coverage / windows-2022 clang-cl
+ uses: ./.github/workflows/reusable-cmake-build.yml
+ with:
+ os: windows-2022
+ build-type: Debug
+ cmake-config: '-G Ninja'
+ extra-cmake-flags: '-DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl'
+ dependencies: ''
+ coverage: true
+ coverage-artifact-name: windows-2022
+
+ # ============================================================================
+ # Merge per-line union across every leg that produced
+ # coverage.json.
+ # ============================================================================
+ merge:
+ name: merge coverage
+ needs: [ build, build-vm, windows ]
+ runs-on: ubuntu-24.04
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Download coverage artifacts
+ uses: actions/download-artifact@v4
+ with:
+ path: coverage-artifacts
+ pattern: coverage-*
+ merge-multiple: false
+
+ - name: Inventory artifacts
+ run: |
+ set -euo pipefail
+ echo "Downloaded artifacts:"
+ find coverage-artifacts -mindepth 1 -maxdepth 1 -type d -printf ' %f\n'
+ echo
+ echo "JSON files:"
+ find coverage-artifacts -name '*.json' -printf ' %p (%s bytes)\n'
+
+ - name: Build merge inputs
+ id: inputs
+ run: |
+ set -euo pipefail
+ # Each artifact directory is named coverage-