diff --git a/.github/file-filter.yml b/.github/file-filter.yml
index c0e7477cf2..0cc4698db8 100644
--- a/.github/file-filter.yml
+++ b/.github/file-filter.yml
@@ -37,3 +37,6 @@ checkall: &checkall
   - *tests
   - *scripts
   - *yml
+
+cases_py:
+  - 'toolchain/mfc/test/cases.py'
diff --git a/.github/scripts/retry-build.sh b/.github/scripts/retry-build.sh
index b82a2e5d8d..d32b78f920 100755
--- a/.github/scripts/retry-build.sh
+++ b/.github/scripts/retry-build.sh
@@ -8,7 +8,8 @@
 # Try normal cleanup; if it fails, escalate to cache nuke.
 _retry_clean() {
     local clean_cmd="$1"
-    if eval "$clean_cmd" 2>/dev/null; then
+    # shellcheck disable=SC2086  # word splitting is intentional here
+    if $clean_cmd 2>/dev/null; then
         return 0
     fi
     echo "  Normal cleanup failed."
diff --git a/.github/scripts/run-tests-with-retry.sh b/.github/scripts/run-tests-with-retry.sh
index 18f1d05d0b..a625b4ae9d 100755
--- a/.github/scripts/run-tests-with-retry.sh
+++ b/.github/scripts/run-tests-with-retry.sh
@@ -8,7 +8,7 @@
 PASSTHROUGH=""
 for arg in "$@"; do
     case "$arg" in
-        --test-all) PASSTHROUGH="$PASSTHROUGH --test-all" ;;
+        --test-all|--single|--debug|--gcov|--only-changes) PASSTHROUGH="$PASSTHROUGH $arg" ;;
     esac
 done
 
diff --git a/.github/scripts/run_monitored_slurm_job.sh b/.github/scripts/run_monitored_slurm_job.sh
new file mode 100644
index 0000000000..905520c45e
--- /dev/null
+++ b/.github/scripts/run_monitored_slurm_job.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Run monitor_slurm_job.sh and recover if the monitor is killed (e.g. SIGKILL
+# from the runner OS) before the SLURM job completes.  When the monitor exits
+# non-zero, sacct is used to verify the job's actual final state; if the SLURM
+# job succeeded we exit 0 so the CI step is not falsely marked as failed.
+#
+# Usage: run_monitored_slurm_job.sh <job_id> <output_file>
+
+set -euo pipefail
+
+if [ $# -ne 2 ]; then
+    echo "Usage: $0 <job_id> <output_file>"
+    exit 1
+fi
+
+job_id="$1"
+output_file="$2"
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+monitor_exit=0
+bash "$SCRIPT_DIR/monitor_slurm_job.sh" "$job_id" "$output_file" || monitor_exit=$?
+
+if [ "$monitor_exit" -ne 0 ]; then
+    echo "Monitor exited with code $monitor_exit; re-checking SLURM job $job_id final state..."
+    # Give the SLURM epilog time to finalize if the job just finished
+    sleep 30
+    final_state=$(sacct -j "$job_id" -n -X -P -o State 2>/dev/null | head -n1 | cut -d'|' -f1 | tr -d ' ' || echo "UNKNOWN")
+    final_exit=$(sacct -j "$job_id" --format=ExitCode --noheader --parsable2 2>/dev/null | head -n1 | tr -d ' ' || echo "")
+    echo "Final SLURM state=$final_state exit=$final_exit"
+    if [ "$final_state" = "COMPLETED" ] && [ "$final_exit" = "0:0" ]; then
+        echo "SLURM job $job_id completed successfully despite monitor failure — continuing."
+    else
+        echo "ERROR: SLURM job $job_id did not complete successfully (state=$final_state exit=$final_exit)"
+        exit 1
+    fi
+fi
diff --git a/.github/scripts/submit_and_monitor_bench.sh b/.github/scripts/submit_and_monitor_bench.sh
index c081c8692a..e0a6eb7384 100755
--- a/.github/scripts/submit_and_monitor_bench.sh
+++ b/.github/scripts/submit_and_monitor_bench.sh
@@ -14,12 +14,18 @@ device="$2"
 interface="$3"
 cluster="$4"
 
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
 echo "[$dir] Submitting benchmark for $device-$interface on $cluster..."
 cd "$dir"
 
-# Submit and monitor job (submit.sh auto-detects bench mode from script name)
-bash .github/workflows/$cluster/submit.sh \
-    .github/workflows/$cluster/bench.sh "$device" "$interface"
+# Always use the PR's submit.sh so both master and PR builds benefit from the
+# run_monitored_slurm_job.sh SIGKILL recovery wrapper.  The bench script is
+# still resolved relative to the current directory (master/ or pr/) so the
+# correct branch code is benchmarked.  SLURM_SUBMIT_DIR ensures the job runs
+# in the right directory regardless of which submit.sh is invoked.
+PR_SUBMIT="${SCRIPT_DIR}/../workflows/${cluster}/submit.sh"
+bash "$PR_SUBMIT" .github/workflows/$cluster/bench.sh "$device" "$interface"
 
 # Verify the YAML output file was created
 job_slug="bench-$device-$interface"
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index b45fc45e40..5cf9681e33 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -88,7 +88,7 @@ jobs:
     runs-on:
       group: ${{ matrix.group }}
       labels: ${{ matrix.labels }}
-    timeout-minutes: 480
+    timeout-minutes: 240
     steps:
       - name: Clone - PR
         uses: actions/checkout@v4
diff --git a/.github/workflows/frontier/bench.sh b/.github/workflows/frontier/bench.sh
index b60f8541a2..b896feb17c 100644
--- a/.github/workflows/frontier/bench.sh
+++ b/.github/workflows/frontier/bench.sh
@@ -2,8 +2,11 @@
 
 source .github/scripts/bench-preamble.sh
 
+# Cap parallel jobs at 64 to avoid overwhelming MPI daemons on large nodes.
+n_jobs=$(( $(nproc) > 64 ? 64 : $(nproc) ))
+
 if [ "$job_device" = "gpu" ]; then
     ./mfc.sh bench --mem 4 -j $n_ranks -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
 else
-    ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
+    ./mfc.sh bench --mem 1 -j $n_jobs -o "$job_slug.yaml" -- -c $job_cluster $device_opts -n $n_ranks
 fi
diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh
index 88446ad2a0..d21b1ddac4 100644
--- a/.github/workflows/frontier/build.sh
+++ b/.github/workflows/frontier/build.sh
@@ -20,10 +20,7 @@ build_opts="$gpu_opts"
 
 . ./mfc.sh load -c $compiler_flag -m $([ "$job_device" = "gpu" ] && echo "g" || echo "c")
 
-# Only set up build cache for test suite, not benchmarks
-if [ "$run_bench" != "bench" ]; then
-    source .github/scripts/setup-build-cache.sh "$cluster_name" "$job_device" "$job_interface"
-fi
+rm -rf build
 
 source .github/scripts/retry-build.sh
 if [ "$run_bench" == "bench" ]; then
diff --git a/.github/workflows/frontier/submit.sh b/.github/workflows/frontier/submit.sh
index 16d4f0d73c..37157cf934 100644
--- a/.github/workflows/frontier/submit.sh
+++ b/.github/workflows/frontier/submit.sh
@@ -44,17 +44,10 @@ else
 fi
 
 # Select SBATCH params based on job type
-if [ "$job_type" = "bench" ]; then
-    sbatch_account="#SBATCH -A ENG160"
-    sbatch_time="#SBATCH -t 05:59:00"
-    sbatch_partition="#SBATCH -p extended"
-    sbatch_extra=""
-else
-    sbatch_account="#SBATCH -A CFD154"
-    sbatch_time="#SBATCH -t 01:59:00"
-    sbatch_partition="#SBATCH -p batch"
-    sbatch_extra="#SBATCH --qos=normal"
-fi
+sbatch_account="#SBATCH -A CFD154"
+sbatch_time="#SBATCH -t 01:59:00"
+sbatch_partition="#SBATCH -p batch"
+sbatch_extra="#SBATCH --qos=normal"
 
 shard_suffix=""
 if [ -n "$4" ]; then
@@ -85,6 +78,7 @@ job_device="$2"
 job_interface="$3"
 job_shard="$4"
 job_cluster="$cluster_name"
+export GITHUB_EVENT_NAME="$GITHUB_EVENT_NAME"
 
 . ./mfc.sh load -c $compiler_flag -m $([ "$2" = "gpu" ] && echo "g" || echo "c")
 
@@ -102,5 +96,4 @@ fi
 
 echo "Submitted batch job $job_id"
 
-# Use resilient monitoring instead of sbatch -W
-bash "$SCRIPT_DIR/../../scripts/monitor_slurm_job.sh" "$job_id" "$output_file"
+bash "$SCRIPT_DIR/../../scripts/run_monitored_slurm_job.sh" "$job_id" "$output_file"
diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh
index 78797ab8ec..1cfcff6fec 100644
--- a/.github/workflows/frontier/test.sh
+++ b/.github/workflows/frontier/test.sh
@@ -9,12 +9,18 @@ if [ -n "$job_shard" ]; then
     shard_opts="--shard $job_shard"
 fi
 
+# Only prune tests on PRs; master pushes must run the full suite.
+prune_flag=""
+if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
+    prune_flag="--only-changes"
+fi
+
 if [ "$job_device" = "gpu" ]; then
     rdma_opts=""
     if [ "$job_cluster" = "frontier" ]; then
         rdma_opts="--rdma-mpi"
     fi
-    ./mfc.sh test -v -a $rdma_opts --max-attempts 3 -j $ngpus $device_opts $shard_opts -- -c $job_cluster
+    ./mfc.sh test -v -a $rdma_opts --max-attempts 3 $prune_flag -j $ngpus $device_opts $shard_opts -- -c $job_cluster
 else
-    ./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu $shard_opts -- -c $job_cluster
+    ./mfc.sh test -v -a --max-attempts 3 $prune_flag -j 32 --no-gpu $shard_opts -- -c $job_cluster
 fi
diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh
index 0eafc485d1..218cf68a5f 100644
--- a/.github/workflows/phoenix/bench.sh
+++ b/.github/workflows/phoenix/bench.sh
@@ -2,7 +2,11 @@
 
 source .github/scripts/bench-preamble.sh
 
-tmpbuild=/storage/scratch1/6/sbryngelson3/mytmp_build
+# Cap parallel jobs at 64 to avoid overwhelming MPI daemons on large nodes
+# (GNR nodes have 192 cores but nproc is too aggressive for build/bench).
+n_jobs=$(( $(nproc) > 64 ? 64 : $(nproc) ))
+
+tmpbuild=/storage/project/r-sbryngelson3-0/sbryngelson3/mytmp_build
 currentdir=$tmpbuild/run-$(( RANDOM % 900 ))
 mkdir -p $tmpbuild
 mkdir -p $currentdir
@@ -15,10 +19,12 @@ else
     bench_opts="--mem 1"
 fi
 
+rm -rf build
+
 source .github/scripts/retry-build.sh
-RETRY_CLEAN_CMD="./mfc.sh clean" retry_build ./mfc.sh build -j $(nproc) $build_opts || exit 1
+RETRY_CLEAN_CMD="./mfc.sh clean" retry_build ./mfc.sh build -j $n_jobs $build_opts || exit 1
 
-./mfc.sh bench $bench_opts -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks
+./mfc.sh bench $bench_opts -j $n_jobs -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks
 
 sleep 10
 rm -rf "$currentdir" || true
diff --git a/.github/workflows/phoenix/rebuild-cache.sh b/.github/workflows/phoenix/rebuild-cache.sh
new file mode 100644
index 0000000000..4ef2a09522
--- /dev/null
+++ b/.github/workflows/phoenix/rebuild-cache.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+set -e
+
+# Number of parallel jobs: use SLURM allocation or default to 24.
+# Cap at 64 to avoid overwhelming OpenMPI daemons and OS process limits with concurrent launches.
+NJOBS="${SLURM_CPUS_ON_NODE:-24}"
+if [ "$NJOBS" -gt 64 ]; then NJOBS=64; fi
+
+# Clean stale build artifacts: the self-hosted runner may have a cached
+# GPU build (e.g. --gpu mp) whose CMake flags are incompatible with gcov.
+./mfc.sh clean
+
+# Source retry_build() for NFS stale file handle resilience (3 attempts).
+source .github/scripts/retry-build.sh
+
+# Build MFC with gcov coverage instrumentation (CPU-only, gfortran).
+retry_build ./mfc.sh build --gcov -j 8
+
+# Run all tests in parallel, collecting per-test coverage data.
+# Each test gets an isolated GCOV_PREFIX directory so .gcda files
+# don't collide. Coverage is collected per-test after all tests finish.
+# --gcov is required so the internal build step preserves instrumentation.
+./mfc.sh test --build-coverage-cache --gcov -j "$NJOBS"
diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh
index 5b7162fef7..61ea80635d 100755
--- a/.github/workflows/phoenix/submit.sh
+++ b/.github/workflows/phoenix/submit.sh
@@ -24,22 +24,22 @@ case "$script_basename" in
 esac
 
 sbatch_cpu_opts="\
-#SBATCH -p cpu-small               # partition
-#SBATCH --ntasks-per-node=24       # Number of cores per node required
-#SBATCH --mem-per-cpu=2G           # Memory per core\
+#SBATCH -p cpu-gnr                 # partition (full Granite Rapids node)
+#SBATCH --exclusive                # exclusive access to all cores
+#SBATCH -C graniterapids           # constrain to GNR architecture\
 "
 
 if [ "$job_type" = "bench" ]; then
     sbatch_gpu_opts="\
 #SBATCH -CL40S
-#SBATCH --ntasks-per-node=4       # Number of cores per node required
+#SBATCH --ntasks-per-node=4       # Number of MPI tasks per node required
 #SBATCH -G2\
 "
     sbatch_time="#SBATCH -t 04:00:00"
 else
     sbatch_gpu_opts="\
 #SBATCH -p gpu-v100,gpu-a100,gpu-h100,gpu-l40s
-#SBATCH --ntasks-per-node=4       # Number of cores per node required
+#SBATCH --ntasks-per-node=4       # Number of MPI tasks per node required
 #SBATCH -G2\
 "
     sbatch_time="#SBATCH -t 03:00:00"
@@ -77,6 +77,7 @@ echo "Running in $(pwd):"
 job_slug="$job_slug"
 job_device="$2"
 job_interface="$3"
+export GITHUB_EVENT_NAME="$GITHUB_EVENT_NAME"
 
 . ./mfc.sh load -c p -m $2
 
@@ -94,6 +95,5 @@ fi
 
 echo "Submitted batch job $job_id"
 
-# Use resilient monitoring instead of sbatch -W
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-bash "$SCRIPT_DIR/../../scripts/monitor_slurm_job.sh" "$job_id" "$output_file"
+bash "$SCRIPT_DIR/../../scripts/run_monitored_slurm_job.sh" "$job_id" "$output_file"
diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh
index 6816bd9a25..5c2d57d27f 100644
--- a/.github/workflows/phoenix/test.sh
+++ b/.github/workflows/phoenix/test.sh
@@ -3,8 +3,7 @@
 source .github/scripts/gpu-opts.sh
 build_opts="$gpu_opts"
 
-# Set up persistent build cache
-source .github/scripts/setup-build-cache.sh phoenix "$job_device" "$job_interface"
+rm -rf build
 
 # Build with retry; smoke-test cached binaries to catch architecture mismatches
 # (SIGILL from binaries compiled on a different compute node).
@@ -12,7 +11,9 @@ source .github/scripts/retry-build.sh
 RETRY_VALIDATE_CMD='syscheck_bin=$(find build/install -name syscheck -type f 2>/dev/null | head -1); [ -z "$syscheck_bin" ] || "$syscheck_bin" > /dev/null 2>&1' \
     retry_build ./mfc.sh test -v --dry-run -j 8 $build_opts || exit 1
 
-n_test_threads=8
+# Use up to 64 parallel test threads on CPU (GNR nodes have 192 cores).
+# Cap at 64 to avoid overwhelming OpenMPI daemons and OS process limits with concurrent launches.
+n_test_threads=$(( SLURM_CPUS_ON_NODE > 64 ? 64 : ${SLURM_CPUS_ON_NODE:-8} ))
 
 if [ "$job_device" = "gpu" ]; then
     source .github/scripts/detect-gpus.sh
@@ -20,4 +21,10 @@ if [ "$job_device" = "gpu" ]; then
     n_test_threads=$((ngpus * 2))
 fi
 
-./mfc.sh test -v --max-attempts 3 -a -j $n_test_threads $device_opts -- -c phoenix
+# Only prune tests on PRs; master pushes must run the full suite.
+prune_flag=""
+if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
+    prune_flag="--only-changes"
+fi
+
+./mfc.sh test -v --max-attempts 3 $prune_flag -a -j $n_test_threads $device_opts -- -c phoenix
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5dd072072d..9810184a60 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -56,8 +56,10 @@ jobs:
   file-changes:
     name: Detect File Changes
     runs-on: 'ubuntu-latest'
-    outputs: 
+    outputs:
       checkall: ${{ steps.changes.outputs.checkall }}
+      cases_py: ${{ steps.changes.outputs.cases_py }}
+      dep_changed: ${{ steps.dep-check.outputs.dep_changed }}
     steps:
       - name: Clone
         uses: actions/checkout@v4
@@ -65,13 +67,103 @@ jobs:
       - name: Detect Changes
         uses: dorny/paths-filter@v3
         id: changes
-        with: 
+        with:
           filters: ".github/file-filter.yml"
 
+      - name: Check for Fortran dependency changes
+        id: dep-check
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          # Detect added/removed use/include statements that change the
+          # Fortran dependency graph, which would make the coverage cache stale.
+          PR_NUMBER="${{ github.event.pull_request.number }}"
+          BEFORE="${{ github.event.before }}"
+          AFTER="${{ github.event.after }}"
+          if [ "${{ github.event_name }}" = "pull_request" ]; then
+            # Default to dep_changed=true if gh pr diff fails (safe fallback).
+            DIFF=$(gh pr diff "$PR_NUMBER" 2>/dev/null) || {
+              echo "gh pr diff failed — defaulting to dep_changed=true for safety."
+              echo "dep_changed=true" >> "$GITHUB_OUTPUT"
+              exit 0
+            }
+          elif [ "${{ github.event_name }}" = "push" ]; then
+            DIFF=$(git diff "$BEFORE".."$AFTER" 2>/dev/null || echo "")
+          else
+            DIFF=""
+          fi
+          if echo "$DIFF" | \
+             grep -qP '^[+-]\s*(use[\s,]+\w|#:include\s|include\s+['"'"'"])'; then
+            echo "dep_changed=true" >> "$GITHUB_OUTPUT"
+            echo "Fortran dependency change detected — will rebuild coverage cache."
+          else
+            echo "dep_changed=false" >> "$GITHUB_OUTPUT"
+          fi
+
+  rebuild-cache:
+    name: Rebuild Coverage Cache
+    needs: [lint-gate, file-changes]
+    if: >-
+      github.repository == 'MFlowCode/MFC' &&
+      (
+        (github.event_name == 'pull_request' &&
+         (needs.file-changes.outputs.cases_py == 'true' ||
+          needs.file-changes.outputs.dep_changed == 'true')) ||
+        (github.event_name == 'push' &&
+         (needs.file-changes.outputs.cases_py == 'true' ||
+          needs.file-changes.outputs.dep_changed == 'true')) ||
+        github.event_name == 'workflow_dispatch'
+      )
+    timeout-minutes: 240
+    runs-on:
+      group:  phoenix
+      labels: gt
+    permissions:
+      contents: write   # Required for Commit Cache to Master on push events
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+          clean: false
+
+      - name: Rebuild Cache via SLURM
+        run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/rebuild-cache.sh cpu none
+
+      - name: Print Logs
+        if:   always()
+        run:  cat rebuild-cache-cpu-none.out
+
+      - name: Upload Cache Artifact
+        if: github.event_name == 'pull_request'
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-cache
+          path: toolchain/mfc/test/test_coverage_cache.json.gz
+          retention-days: 1
+
+      - name: Commit Cache to Master
+        if: (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.ref == 'refs/heads/master'
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add toolchain/mfc/test/test_coverage_cache.json.gz
+          if git diff --cached --quiet; then
+            echo "Coverage cache unchanged."
+          else
+            git commit -m "Regenerate gcov coverage cache [skip ci]"
+            git push origin HEAD:refs/heads/master
+          fi
+
   github:
     name: Github
-    if: needs.file-changes.outputs.checkall == 'true'
-    needs: [lint-gate, file-changes]
+    needs: [lint-gate, file-changes, rebuild-cache]
+    if: >-
+      always() &&
+      needs.lint-gate.result == 'success' &&
+      needs.file-changes.result == 'success' &&
+      needs.rebuild-cache.result != 'cancelled' &&
+      needs.file-changes.outputs.checkall == 'true'
     strategy:
       matrix:
         os:    ['ubuntu', 'macos']
@@ -91,13 +183,26 @@ jobs:
             intel: false
 
       fail-fast: false
-    continue-on-error: true
     runs-on: ${{ matrix.os }}-latest
 
     steps:
       - name: Clone
         uses: actions/checkout@v4
 
+      - name: Fetch master for coverage diff
+        run: |
+          git fetch origin master:master --depth=1
+          git fetch --deepen=200
+        continue-on-error: true
+
+      - name: Download Coverage Cache
+        if: needs.rebuild-cache.result == 'success'
+        uses: actions/download-artifact@v4
+        with:
+          name: coverage-cache
+          path: toolchain/mfc/test
+        continue-on-error: true
+
       - name: Setup MacOS
         if:   matrix.os == 'macos'
         run:  |
@@ -156,15 +261,23 @@ jobs:
           PRECISION: ${{ matrix.precision != '' && format('--{0}', matrix.precision) || '' }}
 
       - name: Test
-        run: bash .github/scripts/run-tests-with-retry.sh -v --max-attempts 3 -j "$(nproc)" $TEST_ALL $TEST_PCT
+        run: bash .github/scripts/run-tests-with-retry.sh -v --max-attempts 3 -j "$(nproc)" $ONLY_CHANGES $TEST_ALL $TEST_PCT
         env:
           TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
           TEST_PCT: ${{ matrix.debug == 'debug' && '-% 20' || '' }}
+          ONLY_CHANGES: ${{ github.event_name == 'pull_request' && '--only-changes' || '' }}
 
   self:
     name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }}${{ matrix.shard != '' && format(' [{0}]', matrix.shard) || '' }})"
-    if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' && github.event.pull_request.draft != true
-    needs: [lint-gate, file-changes]
+    needs: [lint-gate, file-changes, rebuild-cache]
+    if: >-
+      always() &&
+      needs.lint-gate.result == 'success' &&
+      needs.file-changes.result == 'success' &&
+      needs.rebuild-cache.result != 'cancelled' &&
+      github.repository == 'MFlowCode/MFC' &&
+      needs.file-changes.outputs.checkall == 'true' &&
+      github.event.pull_request.draft != true
     continue-on-error: false
     timeout-minutes: 480
     strategy:
@@ -245,6 +358,14 @@ jobs:
         with:
           clean: false
 
+      - name: Download Coverage Cache
+        if: needs.rebuild-cache.result == 'success'
+        uses: actions/download-artifact@v4
+        with:
+          name: coverage-cache
+          path: toolchain/mfc/test
+        continue-on-error: true
+
       - name: Build
         if:   matrix.cluster != 'phoenix'
         uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3
diff --git a/.gitignore b/.gitignore
index e80d14a6f9..943624a1f7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,9 @@ __pycache__
 # Auto-generated version file
 toolchain/mfc/_version.py
 
+# Raw coverage cache — legacy, not tracked (the .json.gz version IS committed)
+toolchain/mfc/test/test_coverage_cache.json
+
 # Auto-generated toolchain files (regenerate with: ./mfc.sh generate)
 toolchain/completions/mfc.bash
 toolchain/completions/_mfc
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ddb3876724..01da0c7a28 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -131,13 +131,20 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
         add_compile_options(
             $<$<COMPILE_LANGUAGE:Fortran>:-fprofile-arcs>
             $<$<COMPILE_LANGUAGE:Fortran>:-ftest-coverage>
-            $<$<COMPILE_LANGUAGE:Fortran>:-O1>
-	    )
+        )
 
         add_link_options(
             $<$<COMPILE_LANGUAGE:Fortran>:-lgcov>
             $<$<COMPILE_LANGUAGE:Fortran>:--coverage>
         )
+
+        # Override Release -O3 with -O1 for gcov: coverage instrumentation is
+        # inaccurate at -O3, and aggressive codegen (e.g. AVX-512 FP16 on
+        # Granite Rapids) can emit instructions that older assemblers reject.
+        set(CMAKE_Fortran_FLAGS_RELEASE "-O1 -DNDEBUG" CACHE STRING "" FORCE)
+
+        # Use gfortran5 line markers so gcov can map coverage to .fpp sources.
+        set(FYPP_GCOV_OPTS "--line-marker-format=gfortran5")
     endif()
 
     if (CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -224,18 +231,32 @@ endif()
 
 if (CMAKE_BUILD_TYPE STREQUAL "Release")
     # Processor tuning: Check if we can target the host's native CPU's ISA.
-    CHECK_FORTRAN_COMPILER_FLAG("-march=native" SUPPORTS_MARCH_NATIVE)
-    if (SUPPORTS_MARCH_NATIVE)
-        add_compile_options($<$<COMPILE_LANGUAGE:Fortran>:-march=native>)
-    else()
-    	CHECK_FORTRAN_COMPILER_FLAG("-mcpu=native" SUPPORTS_MCPU_NATIVE)
-        if (SUPPORTS_MCPU_NATIVE)
-            add_compile_options($<$<COMPILE_LANGUAGE:Fortran>:-mcpu=native>)
+    # Skip for gcov builds — -march=native on newer CPUs (e.g. Granite Rapids)
+    # can emit instructions the system assembler doesn't support.
+    if (NOT MFC_GCov)
+        CHECK_FORTRAN_COMPILER_FLAG("-march=native" SUPPORTS_MARCH_NATIVE)
+        if (SUPPORTS_MARCH_NATIVE)
+            add_compile_options($<$<COMPILE_LANGUAGE:Fortran>:-march=native>)
+            # Disable AVX-512 FP16: gfortran ≥12 emits vmovw instructions on
+            # Granite Rapids CPUs, but binutils <2.38 cannot assemble them.
+            # FP16 is unused in MFC's double-precision computations.
+            CHECK_FORTRAN_COMPILER_FLAG("-mno-avx512fp16" SUPPORTS_MNO_AVX512FP16)
+            if (SUPPORTS_MNO_AVX512FP16)
+                add_compile_options($<$<COMPILE_LANGUAGE:Fortran>:-mno-avx512fp16>)
+            endif()
+        else()
+            CHECK_FORTRAN_COMPILER_FLAG("-mcpu=native" SUPPORTS_MCPU_NATIVE)
+            if (SUPPORTS_MCPU_NATIVE)
+                add_compile_options($<$<COMPILE_LANGUAGE:Fortran>:-mcpu=native>)
+            endif()
         endif()
     endif()
 
-    # Enable LTO/IPO if supported
-    if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
+    # Enable LTO/IPO if supported (skip for gcov — LTO interferes with coverage
+    # instrumentation and can trigger assembler errors on newer architectures).
+    if (MFC_GCov)
+        message(STATUS "LTO/IPO disabled for gcov build")
+    elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
         if (MFC_Unified)
             message(STATUS "LTO/IPO is not available with NVHPC using Unified Memory")
         elseif (CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER "24.11" AND CMAKE_Fortran_COMPILER_VERSION VERSION_LESS "25.9")
@@ -381,6 +402,7 @@ macro(HANDLE_SOURCES target useCommon)
                                  --no-folding
 								 --line-length=999
 		 						 --line-numbering-mode=nocontlines
+                                 ${FYPP_GCOV_OPTS}
                                  "${fpp}" "${f90}"
             DEPENDS  "${fpp};${${target}_incs}"
             COMMENT  "Preprocessing (Fypp) ${fpp_filename}"
diff --git a/benchmarks/5eq_rk3_weno3_hllc/case.py b/benchmarks/5eq_rk3_weno3_hllc/case.py
index 5ecc327e8f..fa09426ffe 100644
--- a/benchmarks/5eq_rk3_weno3_hllc/case.py
+++ b/benchmarks/5eq_rk3_weno3_hllc/case.py
@@ -191,8 +191,8 @@
             "cyl_coord": "F",
             "dt": dt,
             "t_step_start": 0,
-            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
-            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
+            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
             # Simulation Algorithm Parameters
             "num_patches": 3,
             "model_eqns": 2,
diff --git a/benchmarks/hypo_hll/case.py b/benchmarks/hypo_hll/case.py
index 1663a507aa..f8d0928a01 100644
--- a/benchmarks/hypo_hll/case.py
+++ b/benchmarks/hypo_hll/case.py
@@ -44,8 +44,8 @@
             "p": Nz,
             "dt": 1e-8,
             "t_step_start": 0,
-            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
-            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
+            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
             # Simulation Algorithm Parameters
             "num_patches": 2,
             "model_eqns": 2,
diff --git a/benchmarks/ibm/case.py b/benchmarks/ibm/case.py
index e16cb620b7..303cf7fcaf 100644
--- a/benchmarks/ibm/case.py
+++ b/benchmarks/ibm/case.py
@@ -48,8 +48,8 @@
             "p": Nz,
             "dt": mydt,
             "t_step_start": 0,
-            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
-            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
+            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
             # Simulation Algorithm Parameters
             "num_patches": 1,
             "model_eqns": 2,
diff --git a/benchmarks/igr/case.py b/benchmarks/igr/case.py
index 469bff1fa9..4ceed76257 100644
--- a/benchmarks/igr/case.py
+++ b/benchmarks/igr/case.py
@@ -63,8 +63,8 @@
             "cyl_coord": "F",
             "dt": dt,
             "t_step_start": 0,
-            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
-            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(7 * (5 * size + 5)),
+            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
             # Simulation Algorithm Parameters
             "num_patches": 1,
             "model_eqns": 2,
diff --git a/benchmarks/viscous_weno5_sgb_acoustic/case.py b/benchmarks/viscous_weno5_sgb_acoustic/case.py
index 9f1351b0c1..83bdc43e9c 100644
--- a/benchmarks/viscous_weno5_sgb_acoustic/case.py
+++ b/benchmarks/viscous_weno5_sgb_acoustic/case.py
@@ -94,8 +94,8 @@
             "p": Nz,
             "dt": dt,
             "t_step_start": 0,
-            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(6 * (5 * size + 5)),
-            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(6 * (5 * size + 5)),
+            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
             # Simulation Algorithm Parameters
             "num_patches": 2,
             "model_eqns": 2,
diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py
index 6430f7ad35..08ff6d7510 100644
--- a/toolchain/mfc/build.py
+++ b/toolchain/mfc/build.py
@@ -1,6 +1,7 @@
 import os, typing, hashlib, dataclasses, subprocess, re, time, sys, threading, queue
 
 from rich.panel import Panel
+from rich.text  import Text
 from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeElapsedColumn, TaskProgressColumn
 
 from .case    import Case
@@ -273,14 +274,14 @@ def _show_build_error(result: subprocess.CompletedProcess, stage: str):
         stdout_text = result.stdout if isinstance(result.stdout, str) else result.stdout.decode('utf-8', errors='replace')
         stdout_text = stdout_text.strip()
         if stdout_text:
-            cons.raw.print(Panel(stdout_text, title="Output", border_style="yellow"))
+            cons.raw.print(Panel(Text(stdout_text), title="Output", border_style="yellow"))
 
     # Show stderr if available
     if result.stderr:
         stderr_text = result.stderr if isinstance(result.stderr, str) else result.stderr.decode('utf-8', errors='replace')
         stderr_text = stderr_text.strip()
         if stderr_text:
-            cons.raw.print(Panel(stderr_text, title="Errors", border_style="red"))
+            cons.raw.print(Panel(Text(stderr_text), title="Errors", border_style="red"))
 
     cons.print()
 
diff --git a/toolchain/mfc/cli/commands.py b/toolchain/mfc/cli/commands.py
index d4b34df3d8..618ec1aea6 100644
--- a/toolchain/mfc/cli/commands.py
+++ b/toolchain/mfc/cli/commands.py
@@ -458,6 +458,27 @@
             type=str,
             default=None,
         ),
+        Argument(
+            name="build-coverage-cache",
+            help="Run all tests with gcov instrumentation to build the file-level coverage cache. Requires a prior --gcov build: ./mfc.sh build --gcov -j 8",
+            action=ArgAction.STORE_TRUE,
+            default=False,
+            dest="build_coverage_cache",
+        ),
+        Argument(
+            name="only-changes",
+            help="Only run tests whose covered files overlap with files changed since branching from master (uses file-level gcov coverage cache).",
+            action=ArgAction.STORE_TRUE,
+            default=False,
+            dest="only_changes",
+        ),
+        Argument(
+            name="changes-branch",
+            help="Branch to compare against for --only-changes (default: master).",
+            type=str,
+            default="master",
+            dest="changes_branch",
+        ),
     ],
     mutually_exclusive=[
         MutuallyExclusiveGroup(arguments=[
@@ -488,6 +509,8 @@
         Example("./mfc.sh test -j 4", "Run with 4 parallel jobs"),
         Example("./mfc.sh test --only 3D", "Run only 3D tests"),
         Example("./mfc.sh test --generate", "Regenerate golden files"),
+        Example("./mfc.sh test --only-changes -j 4", "Run tests affected by changed files"),
+        Example("./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache", "One-time: build file-coverage cache"),
     ],
     key_options=[
         ("-j, --jobs N", "Number of parallel test jobs"),
@@ -495,6 +518,8 @@
         ("-f, --from UUID", "Start from specific test"),
         ("--generate", "Generate/update golden files"),
         ("--no-build", "Skip rebuilding MFC"),
+        ("--build-coverage-cache", "Build file-level gcov coverage cache (one-time)"),
+        ("--only-changes", "Run tests affected by changed files (requires cache)"),
     ],
 )
 
diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py
index c5ffdd301a..9d27e9df62 100644
--- a/toolchain/mfc/test/case.py
+++ b/toolchain/mfc/test/case.py
@@ -1,4 +1,4 @@
-import os, glob, hashlib, binascii, subprocess, itertools, dataclasses, shutil
+import os, json, glob, hashlib, binascii, subprocess, itertools, dataclasses, shutil
 
 from typing import List, Set, Union, Callable, Optional
 
@@ -7,6 +7,44 @@
 from ..run   import input
 from ..build import MFCTarget, get_target
 
+# Parameters that enable simulation output writing for post_process.
+# When post_process is a target, simulation must write field data so
+# post_process has something to read.  Used in the generated case.py
+# template and by the coverage cache builder.
+POST_PROCESS_OUTPUT_PARAMS = {
+    'parallel_io':  'T', 'cons_vars_wrt':   'T',
+    'prim_vars_wrt': 'T', 'alpha_rho_wrt(1)': 'T',
+    'rho_wrt':      'T', 'mom_wrt(1)':      'T',
+    'vel_wrt(1)':   'T', 'E_wrt':           'T',
+    'pres_wrt':     'T', 'alpha_wrt(1)':    'T',
+    'gamma_wrt':    'T', 'heat_ratio_wrt':  'T',
+    'pi_inf_wrt':   'T', 'pres_inf_wrt':    'T',
+    'c_wrt':        'T',
+}
+
+# Additional output parameters for 3D cases (p != 0).
+POST_PROCESS_3D_PARAMS = {
+    'fd_order':     1,
+    'omega_wrt(1)': 'T',
+    'omega_wrt(2)': 'T',
+    'omega_wrt(3)': 'T',
+}
+
+# Parameters set when post_process is NOT a target.
+POST_PROCESS_OFF_PARAMS = {
+    'parallel_io':   'F',
+    'prim_vars_wrt': 'F',
+}
+
+
+def get_post_process_mods(case_params: dict) -> dict:
+    """Return parameter modifications needed when post_process is a target."""
+    mods = dict(POST_PROCESS_OUTPUT_PARAMS)
+    if int(case_params.get('p', 0)) != 0:
+        mods.update(POST_PROCESS_3D_PARAMS)
+    return mods
+
+
 Tend = 0.25
 Nt   = 50
 mydt = 0.0005
@@ -204,25 +242,11 @@ def create_directory(self):
 mods = {{}}
 
 if "post_process" in ARGS["mfc"]["targets"]:
-    mods = {{
-        'parallel_io'  : 'T', 'cons_vars_wrt'   : 'T',
-        'prim_vars_wrt': 'T', 'alpha_rho_wrt(1)': 'T',
-        'rho_wrt'      : 'T', 'mom_wrt(1)'      : 'T',
-        'vel_wrt(1)'   : 'T', 'E_wrt'           : 'T',
-        'pres_wrt'     : 'T', 'alpha_wrt(1)'    : 'T',
-        'gamma_wrt'    : 'T', 'heat_ratio_wrt'  : 'T',
-        'pi_inf_wrt'   : 'T', 'pres_inf_wrt'    : 'T',
-        'c_wrt'        : 'T',
-    }}
-
+    mods = {json.dumps(POST_PROCESS_OUTPUT_PARAMS)}
     if case['p'] != 0:
-        mods['fd_order']  = 1
-        mods['omega_wrt(1)'] = 'T'
-        mods['omega_wrt(2)'] = 'T'
-        mods['omega_wrt(3)'] = 'T'
+        mods.update({json.dumps(POST_PROCESS_3D_PARAMS)})
 else:
-    mods['parallel_io']   = 'F'
-    mods['prim_vars_wrt'] = 'F'
+    mods = {json.dumps(POST_PROCESS_OFF_PARAMS)}
 
 print(json.dumps({{**case, **mods}}))
 """)
diff --git a/toolchain/mfc/test/cases.py b/toolchain/mfc/test/cases.py
index 7835981151..4c385f3b31 100644
--- a/toolchain/mfc/test/cases.py
+++ b/toolchain/mfc/test/cases.py
@@ -1071,7 +1071,7 @@ def foreach_example():
                            "2D_forward_facing_step",
                            "1D_convergence",
                            "3D_IGR_33jet", "1D_multispecies_diffusion",
-                           "2D_ibm_stl_MFCCharacter"]
+                           "2D_ibm_stl_MFCCharacter", "1D_qbmm"]
             if path in casesToSkip:
                 continue
             name = f"{path.split('_')[0]} -> Example -> {'_'.join(path.split('_')[1:])}"
diff --git a/toolchain/mfc/test/coverage.py b/toolchain/mfc/test/coverage.py
new file mode 100644
index 0000000000..5bb534489b
--- /dev/null
+++ b/toolchain/mfc/test/coverage.py
@@ -0,0 +1,795 @@
+"""
+File-level gcov coverage-based test pruning for MFC.
+
+Build MFC once with gfortran --coverage, run all tests individually, record
+which .fpp files each test executes, and cache that mapping.
+
+When files change on a PR, intersect the changed .fpp files against each test's
+covered file set. Only tests that touch at least one changed file run.
+
+Workflow:
+    ./mfc.sh build --gcov -j 8              # one-time: build with coverage
+    ./mfc.sh test --build-coverage-cache    # one-time: populate the cache
+    ./mfc.sh test --only-changes -j 8       # fast: run only affected tests
+"""
+
+import io
+import os
+import re
+import json
+import gzip
+import shutil
+import hashlib
+import tempfile
+import subprocess
+import datetime
+from pathlib import Path
+from typing import Optional
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+from ..printer import cons
+from .. import common
+from ..common import MFCException
+from ..build import PRE_PROCESS, SIMULATION, POST_PROCESS
+from .case import (input_bubbles_lagrange, get_post_process_mods,
+                    POST_PROCESS_3D_PARAMS)
+
+
+COVERAGE_CACHE_PATH = Path(common.MFC_ROOT_DIR) / "toolchain/mfc/test/test_coverage_cache.json.gz"
+
+# Changes to these files trigger the full test suite.
+# CPU coverage cannot tell us about GPU directive changes (macro files), and
+# toolchain files define or change the set of tests themselves.
+ALWAYS_RUN_ALL = frozenset([
+    "src/common/include/parallel_macros.fpp",
+    "src/common/include/acc_macros.fpp",
+    "src/common/include/omp_macros.fpp",
+    "src/common/include/shared_parallel_macros.fpp",
+    "src/common/include/macros.fpp",
+    "src/common/include/case.fpp",
+    "toolchain/mfc/test/case.py",
+    "toolchain/mfc/test/cases.py",
+    "toolchain/mfc/test/coverage.py",
+    "toolchain/mfc/params/definitions.py",
+    "toolchain/mfc/run/input.py",
+    "toolchain/mfc/case_validator.py",
+])
+
+# Directory prefixes: any changed file under these paths triggers full suite.
+# Note: src/simulation/include/ (.fpp files like inline_riemann.fpp) is NOT
+# listed here — Fypp line markers (--line-marker-format=gfortran5) correctly
+# attribute included file paths, so gcov coverage tracks them accurately.
+ALWAYS_RUN_ALL_PREFIXES = (
+    "toolchain/cmake/",
+)
+
+
+def _get_gcov_version(gcov_binary: str) -> str:
+    """Return the version string from gcov --version."""
+    try:
+        result = subprocess.run(
+            [gcov_binary, "--version"],
+            capture_output=True, text=True, timeout=10, check=False
+        )
+        for line in result.stdout.splitlines():
+            if line.strip():
+                return line.strip()
+    except Exception:
+        pass
+    return "unknown"
+
+
+def find_gcov_binary() -> str:
+    """
+    Find a GNU gcov binary compatible with the system gfortran.
+
+    On macOS with Homebrew GCC, the binary is gcov-{major} (e.g. gcov-15).
+    On Linux with system GCC, plain gcov is usually correct.
+    Apple LLVM's /usr/bin/gcov is incompatible with gfortran .gcda files.
+    """
+    # Determine gfortran major version
+    major = None
+    try:
+        result = subprocess.run(
+            ["gfortran", "--version"],
+            capture_output=True, text=True, timeout=10, check=False
+        )
+        m = re.search(r'(\d+)\.\d+\.\d+', result.stdout)
+        if m:
+            major = m.group(1)
+    except Exception:
+        pass
+
+    # Try versioned binary first (Homebrew macOS), then plain gcov
+    candidates = []
+    if major:
+        candidates.append(f"gcov-{major}")
+    candidates.append("gcov")
+
+    for candidate in candidates:
+        path = shutil.which(candidate)
+        if path is None:
+            continue
+        try:
+            result = subprocess.run(
+                [path, "--version"],
+                capture_output=True, text=True, timeout=10, check=False
+            )
+            version_out = result.stdout
+            if "Apple LLVM" in version_out or "Apple clang" in version_out:
+                continue  # Apple's gcov cannot parse GCC-generated .gcda files
+            if "GCC" in version_out or "GNU" in version_out:
+                return path
+        except Exception:
+            continue
+
+    raise MFCException(
+        "GNU gcov not found. gcov is required for the coverage cache.\n"
+        "  On macOS (Homebrew):  brew install gcc\n"
+        "  On Linux (Debian/Ubuntu): apt install gcc\n"
+        "  On Linux (RHEL/CentOS):  yum install gcc\n"
+        "Apple's /usr/bin/gcov is incompatible with gfortran .gcda files."
+    )
+
+
+def find_gcno_files(root_dir: str) -> list:
+    """
+    Walk build/ and return all .gcno files (excluding venv paths).
+    Raises if none found (indicates build was not done with --gcov).
+    """
+    build_dir = Path(root_dir) / "build"
+    gcno_files = [
+        p for p in build_dir.rglob("*.gcno")
+        if "venv" not in p.parts
+    ]
+    if not gcno_files:
+        raise MFCException(
+            "No .gcno files found. Build with --gcov instrumentation first:\n"
+            "  ./mfc.sh build --gcov -j 8"
+        )
+    return gcno_files
+
+
+
+def _parse_gcov_json_output(raw_bytes: bytes, root_dir: str) -> set:
+    """
+    Parse gcov JSON output and return the set of .fpp file paths with coverage.
+    Handles both gzip-compressed (gcov 13+) and raw JSON (gcov 12) formats.
+    Handles concatenated JSON objects from batched gcov calls (multiple .gcno
+    files passed to a single gcov invocation).
+    Only .fpp files with at least one executed line are included.
+    """
+    try:
+        text = gzip.decompress(raw_bytes).decode("utf-8", errors="replace")
+    except (gzip.BadGzipFile, OSError):
+        try:
+            text = raw_bytes.decode("utf-8", errors="replace")
+        except (UnicodeDecodeError, ValueError):
+            cons.print("[yellow]Warning: gcov output is not valid UTF-8 or gzip — "
+                       "no coverage recorded for this test.[/yellow]")
+            return set()
+
+    result = set()
+    real_root = os.path.realpath(root_dir)
+
+    # Parse potentially concatenated JSON objects (one per .gcno file).
+    decoder = json.JSONDecoder()
+    pos = 0
+    while pos < len(text):
+        while pos < len(text) and text[pos] in " \t\n\r":
+            pos += 1
+        if pos >= len(text):
+            break
+        try:
+            data, end_pos = decoder.raw_decode(text, pos)
+            pos = end_pos
+        except json.JSONDecodeError:
+            remaining = len(text) - pos
+            if remaining > 0:
+                cons.print(f"[yellow]Warning: gcov JSON parse error at offset "
+                           f"{pos} ({remaining} bytes remaining) — partial "
+                           f"coverage recorded for this test.[/yellow]")
+            break
+
+        for file_entry in data.get("files", []):
+            file_path = file_entry.get("file", "")
+            if not file_path.endswith(".fpp"):
+                continue
+            if any(line.get("count", 0) > 0 for line in file_entry.get("lines", [])):
+                try:
+                    rel_path = os.path.relpath(os.path.realpath(file_path), real_root)
+                except ValueError:
+                    rel_path = file_path
+                # Only keep src/ paths — build/staging/ artifacts from
+                # case-optimized builds are auto-generated and never
+                # appear in PR diffs.
+                if rel_path.startswith("src/"):
+                    result.add(rel_path)
+
+    return result
+
+
+def _compute_gcov_prefix_strip(root_dir: str) -> str:
+    """
+    Compute GCOV_PREFIX_STRIP so .gcda files preserve the build/ tree.
+
+    GCOV_PREFIX_STRIP removes N leading path components from the compile-time
+    absolute .gcda path.  We strip all components of the MFC root directory
+    so the prefix tree starts with ``build/staging/...``.
+    """
+    real_root = os.path.realpath(root_dir)
+    return str(len(Path(real_root).parts) - 1)  # -1 excludes root '/'
+
+
+def _collect_single_test_coverage(  # pylint: disable=too-many-locals
+    uuid: str, test_gcda: str, root_dir: str, gcov_bin: str,
+) -> tuple:
+    """
+    Collect file-level coverage for a single test, fully self-contained.
+
+    Copies .gcno files from the real build tree into the test's isolated
+    .gcda directory (alongside the .gcda files), runs a batched gcov call,
+    then removes the .gcno copies.  Each test has its own directory, so
+    this is safe to call concurrently without touching the shared build tree.
+    """
+    build_subdir = os.path.join(test_gcda, "build")
+    if not os.path.isdir(build_subdir):
+        return uuid, []
+
+    gcno_copies = []
+
+    for dirpath, _, filenames in os.walk(build_subdir):
+        for fname in filenames:
+            if not fname.endswith(".gcda"):
+                continue
+            # Derive matching .gcno path in the real build tree
+            gcda_path = os.path.join(dirpath, fname)
+            rel = os.path.relpath(gcda_path, test_gcda)
+            gcno_rel = rel[:-5] + ".gcno"
+            gcno_src = os.path.join(root_dir, gcno_rel)
+            if os.path.isfile(gcno_src):
+                # Copy .gcno alongside .gcda in the test's isolated dir.
+                # Wrap in try/except for NFS TOCTOU races (file may vanish
+                # between isfile() and copy on networked filesystems).
+                gcno_dst = os.path.join(dirpath, fname[:-5] + ".gcno")
+                try:
+                    shutil.copy2(gcno_src, gcno_dst)
+                except OSError:
+                    continue
+                gcno_copies.append(gcno_dst)
+
+    if not gcno_copies:
+        return uuid, []
+
+    # Batch: single gcov call for all .gcno files in this test.
+    # Run from root_dir so source path resolution works correctly.
+    cmd = [gcov_bin, "--json-format", "--stdout"] + gcno_copies
+    try:
+        proc = subprocess.run(
+            cmd, capture_output=True, cwd=root_dir, timeout=120, check=False
+        )
+    except (subprocess.TimeoutExpired, subprocess.SubprocessError, OSError) as exc:
+        cons.print(f"[yellow]Warning: gcov failed for {uuid}: {exc}[/yellow]")
+        return uuid, []
+    finally:
+        for g in gcno_copies:
+            try:
+                os.remove(g)
+            except OSError:
+                pass
+
+    if proc.returncode != 0 or not proc.stdout:
+        if proc.returncode != 0:
+            cons.print(f"[yellow]Warning: gcov exited {proc.returncode} for {uuid}[/yellow]")
+        return uuid, []
+
+    coverage = _parse_gcov_json_output(proc.stdout, root_dir)
+    return uuid, sorted(coverage)
+
+
+def _run_single_test_direct(test_info: dict, gcda_dir: str, strip: str) -> tuple:  # pylint: disable=too-many-locals
+    """
+    Run a single test by invoking Fortran executables directly.
+
+    Bypasses ``./mfc.sh run`` entirely (no Python startup, no Mako template
+    rendering, no shell script generation).  Input files and binary paths are
+    pre-computed by the caller.
+
+    Returns (uuid, test_gcda_path, failures).
+    """
+    uuid = test_info["uuid"]
+    test_dir = test_info["dir"]
+    binaries = test_info["binaries"]  # ordered list of (target_name, bin_path)
+    ppn = test_info["ppn"]
+
+    test_gcda = os.path.join(gcda_dir, uuid)
+    os.makedirs(test_gcda, exist_ok=True)
+
+    env = {**os.environ, "GCOV_PREFIX": test_gcda, "GCOV_PREFIX_STRIP": strip}
+
+    # MPI-compiled binaries must be launched via an MPI launcher (even ppn=1).
+    # Use --bind-to none to avoid binding issues with concurrent launches.
+    if shutil.which("mpirun"):
+        mpi_cmd = ["mpirun", "--bind-to", "none", "-np", str(ppn)]
+    elif shutil.which("srun"):
+        mpi_cmd = ["srun", "--ntasks", str(ppn)]
+    else:
+        raise MFCException(
+            "No MPI launcher found (mpirun or srun). "
+            "MFC binaries require an MPI launcher.\n"
+            "  On Ubuntu: sudo apt install openmpi-bin\n"
+            "  On macOS:  brew install open-mpi"
+        )
+
+    failures = []
+    for target_name, bin_path in binaries:
+        if not os.path.isfile(bin_path):
+            # Record missing binary as a failure and stop: downstream targets
+            # depend on outputs from earlier ones (e.g. simulation needs the
+            # grid from pre_process), so running them without a predecessor
+            # produces misleading init-only gcda files.
+            failures.append((target_name, "missing-binary",
+                             f"binary not found: {bin_path}"))
+            break
+
+        # Verify .inp file exists before running (diagnostic for transient
+        # filesystem issues where the file goes missing between phases).
+        inp_file = os.path.join(test_dir, f"{target_name}.inp")
+        if not os.path.isfile(inp_file):
+            failures.append((target_name, "missing-inp",
+                             f"{inp_file} not found before launch"))
+            break
+
+        cmd = mpi_cmd + [bin_path]
+        try:
+            result = subprocess.run(cmd, check=False, text=True,
+                                    stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                                    env=env, cwd=test_dir, timeout=600)
+            if result.returncode != 0:
+                # Save last lines of output for debugging.  Stop here: a
+                # failed pre_process/simulation leaves no valid outputs for
+                # the next target, and running it produces spurious coverage.
+                tail = "\n".join(result.stdout.strip().splitlines()[-15:])
+                failures.append((target_name, result.returncode, tail))
+                break
+        except subprocess.TimeoutExpired:
+            failures.append((target_name, "timeout", ""))
+            break
+        except (subprocess.SubprocessError, OSError) as exc:
+            failures.append((target_name, str(exc), ""))
+            break
+
+    return uuid, test_gcda, failures
+
+
+def _prepare_test(case, root_dir: str) -> dict:  # pylint: disable=unused-argument,too-many-locals
+    """
+    Prepare a test for direct execution: create directory, generate .inp
+    files, and resolve binary paths.  All Python/toolchain overhead happens
+    here (single-threaded) so the parallel phase is pure subprocess calls.
+    """
+    try:
+        case.delete_output()
+        case.create_directory()
+    except OSError as exc:
+        cons.print(f"[yellow]Warning: Failed to prepare test directory for "
+                   f"{case.get_uuid()}: {exc}[/yellow]")
+
+    # Lagrange bubble tests need input files generated before running.
+    if case.params.get("bubbles_lagrange", 'F') == 'T':
+        try:
+            input_bubbles_lagrange(case)
+        except Exception as exc:
+            cons.print(f"[yellow]Warning: Failed to generate Lagrange bubble input "
+                       f"for {case.get_uuid()}: {exc}[/yellow]")
+
+    # Apply post_process output params so simulation writes data files that
+    # post_process reads.  Mirrors the generated case.py logic that normally
+    # runs via ./mfc.sh run (see POST_PROCESS_OUTPUT_PARAMS in case.py).
+    case.params.update(get_post_process_mods(case.params))
+
+    # Run only one timestep: we only need to know which source files are
+    # *touched*, not verify correctness.  A single step exercises the key
+    # code paths across all three executables while preventing heavy 3D tests
+    # from timing out under gcov instrumentation (~10x slowdown).
+    case.params['t_step_stop'] = 1
+
+    # Adaptive-dt tests: post_process computes n_save = int(t_stop/t_save)+1
+    # and iterates over that many save indices.  But with small t_step_stop
+    # the simulation produces far fewer saves.  Clamp t_stop so post_process
+    # only reads saves that actually exist.
+    if case.params.get('cfl_adap_dt', 'F') == 'T':
+        t_save = float(case.params.get('t_save', 1.0))
+        case.params['t_stop'] = t_save  # n_save = 2: indices 0 and 1
+
+    # Heavy 3D tests: remove vorticity output (omega_wrt + fd_order) for
+    # 3D QBMM tests.  Normal test execution never runs post_process (only
+    # PRE_PROCESS + SIMULATION, never POST_PROCESS), so post_process on
+    # heavy 3D configs is untested.  Vorticity FD computation on large grids
+    # with many QBMM variables causes post_process to crash (exit code 2).
+    if (int(case.params.get('p', 0)) > 0 and
+            case.params.get('qbmm', 'F') == 'T'):
+        for key in POST_PROCESS_3D_PARAMS:
+            case.params.pop(key, None)
+
+    test_dir = case.get_dirpath()
+    input_file = case.to_input_file()
+
+    # Write .inp files directly (no subprocess, no Mako templates).
+    # Suppress console output from get_inp() to avoid one message per (test, target) pair.
+    # Run all three executables to capture coverage across the full pipeline
+    # (pre_process: grid/IC generation; simulation: RHS/time-stepper; post_process: field I/O).
+    targets = [PRE_PROCESS, SIMULATION, POST_PROCESS]
+    binaries = []
+    # NOTE: not thread-safe — Phase 1 must remain single-threaded.
+    orig_file = cons.raw.file
+    cons.raw.file = io.StringIO()
+    try:
+        for target in targets:
+            inp_content = case.get_inp(target)
+            common.file_write(os.path.join(test_dir, f"{target.name}.inp"),
+                              inp_content)
+            bin_path = target.get_install_binpath(input_file)
+            binaries.append((target.name, bin_path))
+    finally:
+        cons.raw.file = orig_file
+
+    return {
+        "uuid":     case.get_uuid(),
+        "dir":      test_dir,
+        "binaries": binaries,
+        "ppn":      getattr(case, 'ppn', 1),
+    }
+
+
+def build_coverage_cache(  # pylint: disable=too-many-locals,too-many-statements
+    root_dir: str, cases: list, n_jobs: int = None,
+) -> None:
+    """
+    Build the file-level coverage cache by running tests in parallel.
+
+    Phase 1 — Prepare all tests: generate .inp files and resolve binary paths.
+    This happens single-threaded so the parallel phase has zero Python overhead.
+
+    Phase 2 — Run all tests concurrently.  Each worker invokes Fortran binaries
+    directly (no ``./mfc.sh run``, no shell scripts).  Each test's GCOV_PREFIX
+    points to an isolated directory so .gcda files don't collide.
+
+    Phase 3 — For each test, temporarily copy .gcno files from the real build tree
+    into the test's isolated .gcda directory, run gcov to collect which .fpp files
+    had coverage, then remove the .gcno copies.
+
+    Requires a prior ``--gcov`` build: ``./mfc.sh build --gcov -j 8``
+    """
+    gcov_bin = find_gcov_binary()
+    gcno_files = find_gcno_files(root_dir)
+    strip = _compute_gcov_prefix_strip(root_dir)
+
+    if n_jobs is None:
+        n_jobs = max(os.cpu_count() or 1, 1)
+    # Cap Phase 2 test parallelism: each test spawns gcov-instrumented MPI
+    # processes (~2-5 GB each under gcov).  Too many concurrent tests cause OOM.
+    # Phase 3 gcov workers run at full n_jobs (gcov is lightweight by comparison).
+    phase2_jobs = min(n_jobs, 16)
+    cons.print(f"[bold]Building coverage cache for {len(cases)} tests "
+               f"({phase2_jobs} test workers, {n_jobs} gcov workers)...[/bold]")
+    cons.print(f"[dim]Using gcov binary: {gcov_bin}[/dim]")
+    cons.print(f"[dim]Found {len(gcno_files)} .gcno files[/dim]")
+    cons.print(f"[dim]GCOV_PREFIX_STRIP={strip}[/dim]")
+    cons.print()
+
+    # Phase 1: Prepare all tests (single-threaded; scales linearly with test count).
+    cons.print("[bold]Phase 1/3: Preparing tests...[/bold]")
+    test_infos = []
+    for i, case in enumerate(cases):
+        try:
+            test_infos.append(_prepare_test(case, root_dir))
+        except Exception as exc:  # pylint: disable=broad-except
+            cons.print(f"  [yellow]Warning: skipping {case.get_uuid()} — prep failed: {exc}[/yellow]")
+        if (i + 1) % 100 == 0 or (i + 1) == len(cases):
+            cons.print(f"  [{i+1:3d}/{len(cases):3d}] prepared")
+    cons.print()
+
+    gcda_dir = tempfile.mkdtemp(prefix="mfc_gcov_")
+    try:
+        # Phase 2: Run all tests in parallel via direct binary invocation.
+        cons.print("[bold]Phase 2/3: Running tests...[/bold]")
+        test_results: dict = {}
+        all_failures: dict = {}
+        with ThreadPoolExecutor(max_workers=phase2_jobs) as pool:
+            futures = {
+                pool.submit(_run_single_test_direct, info, gcda_dir, strip): info
+                for info in test_infos
+            }
+            for i, future in enumerate(as_completed(futures)):
+                try:
+                    uuid, test_gcda, failures = future.result()
+                except Exception as exc:  # pylint: disable=broad-except
+                    info = futures[future]
+                    cons.print(f"  [yellow]Warning: {info['uuid']} failed to run: {exc}[/yellow]")
+                    continue
+                test_results[uuid] = test_gcda
+                if failures:
+                    all_failures[uuid] = failures
+                if (i + 1) % 50 == 0 or (i + 1) == len(test_infos):
+                    cons.print(f"  [{i+1:3d}/{len(test_infos):3d}] tests completed")
+
+        if all_failures:
+            cons.print()
+            cons.print(f"[bold yellow]Warning: {len(all_failures)} tests had target failures:[/bold yellow]")
+            for uuid, fails in sorted(all_failures.items()):
+                fail_str = ", ".join(f"{t}={rc}" for t, rc, _ in fails)
+                cons.print(f"  [yellow]{uuid}[/yellow]: {fail_str}")
+                for target_name, _rc, tail in fails:
+                    if tail:
+                        cons.print(f"    {target_name} output (last 15 lines):")
+                        for line in tail.splitlines():
+                            cons.print(f"      {line}")
+
+        # Diagnostic: verify .gcda files exist for at least one test.
+        sample_uuid = next(iter(test_results), None)
+        if sample_uuid:
+            sample_gcda = test_results[sample_uuid]
+            sample_build = os.path.join(sample_gcda, "build")
+            if os.path.isdir(sample_build):
+                gcda_count = sum(
+                    1 for _, _, fns in os.walk(sample_build)
+                    for f in fns if f.endswith(".gcda")
+                )
+                cons.print(f"[dim]Sample test {sample_uuid}: "
+                           f"{gcda_count} .gcda files in {sample_build}[/dim]")
+            else:
+                cons.print(f"[yellow]Sample test {sample_uuid}: "
+                           f"no build/ dir in {sample_gcda}[/yellow]")
+
+        # Phase 3: Collect gcov coverage from each test's isolated .gcda directory.
+        # .gcno files are temporarily copied alongside .gcda files, then removed.
+        cons.print()
+        cons.print("[bold]Phase 3/3: Collecting coverage...[/bold]")
+        cache: dict = {}
+        completed = 0
+        with ThreadPoolExecutor(max_workers=n_jobs) as pool:
+            futures = {
+                pool.submit(
+                    _collect_single_test_coverage,
+                    uuid, test_gcda, root_dir, gcov_bin,
+                ): uuid
+                for uuid, test_gcda in test_results.items()
+            }
+            for future in as_completed(futures):
+                try:
+                    uuid, coverage = future.result()
+                except Exception as exc:  # pylint: disable=broad-except
+                    uuid = futures[future]
+                    cons.print(f"  [yellow]Warning: {uuid} coverage failed: {exc}[/yellow]")
+                    coverage = []
+                cache[uuid] = coverage
+                completed += 1
+                if completed % 50 == 0 or completed == len(test_results):
+                    cons.print(f"  [{completed:3d}/{len(test_results):3d}] tests processed")
+    finally:
+        shutil.rmtree(gcda_dir, ignore_errors=True)
+
+    # Sanity check: at least some tests should have non-empty coverage.
+    tests_with_coverage = sum(1 for v in cache.values() if v)
+    if tests_with_coverage == 0:
+        raise MFCException(
+            "Coverage cache build produced zero coverage for all tests. "
+            "Check that the build was done with --gcov and gcov is working correctly."
+        )
+    if tests_with_coverage < len(cases) // 2:
+        cons.print(f"[bold yellow]Warning: Only {tests_with_coverage}/{len(cases)} tests "
+                   f"have coverage data. Cache may be incomplete.[/bold yellow]")
+
+    cases_py_path = Path(root_dir) / "toolchain/mfc/test/cases.py"
+    try:
+        cases_hash = hashlib.sha256(cases_py_path.read_bytes()).hexdigest()
+    except OSError as exc:
+        raise MFCException(
+            f"Failed to read {cases_py_path} for cache metadata: {exc}"
+        ) from exc
+    gcov_version = _get_gcov_version(gcov_bin)
+
+    cache["_meta"] = {
+        "created": datetime.datetime.now(datetime.timezone.utc).isoformat(),
+        "cases_hash": cases_hash,
+        "gcov_version": gcov_version,
+    }
+
+    try:
+        with gzip.open(COVERAGE_CACHE_PATH, "wt", encoding="utf-8") as f:
+            json.dump(cache, f, indent=2)
+    except OSError as exc:
+        raise MFCException(
+            f"Failed to write coverage cache to {COVERAGE_CACHE_PATH}: {exc}\n"
+            "Check disk space and filesystem permissions."
+        ) from exc
+
+    cons.print()
+    cons.print(f"[bold green]Coverage cache written to {COVERAGE_CACHE_PATH}[/bold green]")
+    cons.print(f"[dim]Cache has {len(cases)} test entries.[/dim]")
+
+    # Clean up test output directories from Phase 1/2 (grid files, restart files,
+    # silo output, etc.).  These live on NFS scratch and can total several GB for
+    # the full test suite.  Leaving them behind creates I/O pressure for subsequent
+    # test jobs that share the same scratch filesystem.
+    cons.print("[dim]Cleaning up test output directories...[/dim]")
+    for case in cases:
+        try:
+            case.delete_output()
+        except OSError:
+            pass  # Best-effort; NFS errors are non-fatal here
+
+
+def _normalize_cache(cache: dict) -> dict:
+    """Convert old line-level cache format to file-level if needed.
+
+    Old format: {uuid: {file: [lines], ...}, ...}
+    New format: {uuid: [file, ...], ...}
+    """
+    result = {}
+    for k, v in cache.items():
+        if k == "_meta":
+            result[k] = v
+        elif isinstance(v, dict):
+            result[k] = sorted(v.keys())
+        elif isinstance(v, list):
+            result[k] = v
+        else:
+            cons.print(f"[yellow]Warning: unexpected cache value type for {k}: "
+                       f"{type(v).__name__} — treating as empty.[/yellow]")
+            result[k] = []
+    return result
+
+
+def load_coverage_cache(root_dir: str) -> Optional[dict]:
+    """
+    Load the coverage cache, returning None if missing or stale.
+
+    Staleness is detected by comparing the SHA256 of cases.py at cache-build time
+    against the current cases.py. Auto-converts old line-level format if needed.
+    """
+    if not COVERAGE_CACHE_PATH.exists():
+        return None
+
+    try:
+        with gzip.open(COVERAGE_CACHE_PATH, "rt", encoding="utf-8") as f:
+            cache = json.load(f)
+    except (OSError, gzip.BadGzipFile, json.JSONDecodeError, UnicodeDecodeError) as exc:
+        cons.print(f"[yellow]Warning: Coverage cache is unreadable or corrupt: {exc}[/yellow]")
+        return None
+
+    if not isinstance(cache, dict):
+        cons.print("[yellow]Warning: Coverage cache has unexpected format.[/yellow]")
+        return None
+
+    cases_py = Path(root_dir) / "toolchain/mfc/test/cases.py"
+    try:
+        current_hash = hashlib.sha256(cases_py.read_bytes()).hexdigest()
+    except FileNotFoundError:
+        cons.print("[yellow]Warning: cases.py not found; cannot verify cache staleness.[/yellow]")
+        return None
+    stored_hash = cache.get("_meta", {}).get("cases_hash", "")
+
+    if current_hash != stored_hash:
+        cons.print("[yellow]Warning: Coverage cache is stale (cases.py changed).[/yellow]")
+        return None
+
+    return _normalize_cache(cache)
+
+
+def _parse_diff_files(diff_text: str) -> set:
+    """
+    Parse ``git diff --name-only`` output and return the set of changed file paths.
+    """
+    return {f for f in diff_text.strip().splitlines() if f}
+
+
+def get_changed_files(root_dir: str, compare_branch: str = "master") -> Optional[set]:
+    """
+    Return the set of files changed in this branch relative to the merge-base
+    with compare_branch, or None on git failure.
+
+    Uses merge-base (not master tip) so that unrelated master advances don't
+    appear as "your changes."
+    """
+    try:
+        # Try local branch first, then origin/ remote ref (CI shallow clones).
+        for ref in [compare_branch, f"origin/{compare_branch}"]:
+            merge_base_result = subprocess.run(
+                ["git", "merge-base", ref, "HEAD"],
+                capture_output=True, text=True, cwd=root_dir, timeout=30, check=False
+            )
+            if merge_base_result.returncode == 0:
+                break
+        else:
+            return None
+        merge_base = merge_base_result.stdout.strip()
+        if not merge_base:
+            return None
+
+        diff_result = subprocess.run(
+            ["git", "diff", merge_base, "HEAD", "--name-only", "--no-color"],
+            capture_output=True, text=True, cwd=root_dir, timeout=30, check=False
+        )
+        if diff_result.returncode != 0:
+            return None
+
+        return _parse_diff_files(diff_result.stdout)
+    except (subprocess.TimeoutExpired, OSError):
+        return None
+
+
+def should_run_all_tests(changed_files: set) -> bool:
+    """
+    Return True if any changed file is in ALWAYS_RUN_ALL or under
+    ALWAYS_RUN_ALL_PREFIXES.
+
+    GPU macro files, Fypp includes, and build system files cannot be
+    correctly analyzed by CPU coverage — changes to them must always
+    trigger the full test suite.
+    """
+    if changed_files & ALWAYS_RUN_ALL:
+        return True
+    return any(f.startswith(ALWAYS_RUN_ALL_PREFIXES) for f in changed_files)
+
+
+def filter_tests_by_coverage(
+    cases: list, coverage_cache: dict, changed_files: set
+) -> tuple:
+    """
+    Filter test cases to only those whose covered files overlap with changed files.
+
+    Returns (cases_to_run, skipped_cases).
+
+    Conservative behavior:
+    - Test not in cache (newly added) -> include it
+    - No changed .fpp files -> skip all tests (this branch is unreachable from
+      test.py, which handles the no-changed-fpp case before calling this function;
+      retained as a safe fallback for direct callers)
+    - Test has incomplete coverage (no simulation files recorded but simulation
+      files changed) -> include it (cache build likely failed for this test)
+    """
+    changed_fpp = {f for f in changed_files if f.endswith(".fpp")}
+    if not changed_fpp:
+        return [], list(cases)
+
+    changed_sim = any(f.startswith("src/simulation/") for f in changed_fpp)
+
+    to_run = []
+    skipped = []
+    n_not_in_cache = 0
+    n_no_sim_coverage = 0
+
+    for case in cases:
+        uuid = case.get_uuid()
+        test_files = coverage_cache.get(uuid)
+
+        if test_files is None:
+            # Test not in cache (e.g., newly added) -> conservative: include
+            to_run.append(case)
+            n_not_in_cache += 1
+            continue
+
+        test_file_set = set(test_files)
+
+        # If simulation files changed but this test has no simulation coverage,
+        # include it conservatively — the cache build likely failed for this test.
+        if changed_sim and not any(f.startswith("src/simulation/") for f in test_file_set):
+            to_run.append(case)
+            n_no_sim_coverage += 1
+            continue
+
+        if test_file_set & changed_fpp:
+            to_run.append(case)
+        else:
+            skipped.append(case)
+
+    if n_not_in_cache:
+        cons.print(f"[dim]  {n_not_in_cache} test(s) included conservatively "
+                   f"(not in cache)[/dim]")
+    if n_no_sim_coverage:
+        cons.print(f"[dim]  {n_no_sim_coverage} test(s) included conservatively "
+                   f"(missing sim coverage)[/dim]")
+
+    return to_run, skipped
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 2193e677b4..26e37c669e 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -76,8 +76,8 @@ def is_uuid(term):
     return cases, skipped_cases
 
 
-# pylint: disable=too-many-branches, too-many-statements, trailing-whitespace
-def __filter(cases_) -> typing.List[TestCase]:
+# pylint: disable=too-many-branches,too-many-locals,too-many-statements,trailing-whitespace
+def __filter(cases_) -> typing.Tuple[typing.List[TestCase], typing.List[TestCase]]:
     cases = cases_[:]
     selected_cases = []
     skipped_cases  = []
@@ -108,6 +108,62 @@ def __filter(cases_) -> typing.List[TestCase]:
                 f"Specified: {ARG('only')}. Check that UUIDs/names are valid."
             )
 
+    # --only-changes: filter based on file-level gcov coverage
+    if ARG("only_changes"):
+        from .coverage import (  # pylint: disable=import-outside-toplevel
+            load_coverage_cache, get_changed_files,
+            should_run_all_tests, filter_tests_by_coverage,
+        )
+
+        # Example-based tests cover no unique files beyond non-example tests,
+        # so they add no value to coverage-based pruning. Skip them entirely.
+        example_skipped = [c for c in cases if "Example" in c.trace]
+        cases = [c for c in cases if "Example" not in c.trace]
+        skipped_cases += example_skipped
+        if example_skipped:
+            cons.print(f"[dim]Skipped {len(example_skipped)} example tests "
+                       f"(redundant coverage)[/dim]")
+
+        cache = load_coverage_cache(common.MFC_ROOT_DIR)
+        if cache is None:
+            cons.print("[yellow]Coverage cache missing or stale.[/yellow]")
+            cons.print("[yellow]Run: ./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache[/yellow]")
+            cons.print("[yellow]Falling back to full test suite.[/yellow]")
+        else:
+            changed_files = get_changed_files(common.MFC_ROOT_DIR, ARG("changes_branch"))
+
+            if changed_files is None:
+                cons.print("[yellow]git diff failed — falling back to full test suite.[/yellow]")
+            elif should_run_all_tests(changed_files):
+                cons.print()
+                cons.print("[bold cyan]Coverage Change Analysis[/bold cyan]")
+                cons.print("-" * 50)
+                cons.print("[yellow]Infrastructure or macro file changed — running full test suite.[/yellow]")
+                cons.print("-" * 50)
+            else:
+                changed_fpp = {f for f in changed_files if f.endswith(".fpp")}
+                if not changed_fpp:
+                    cons.print()
+                    cons.print("[bold cyan]Coverage Change Analysis[/bold cyan]")
+                    cons.print("-" * 50)
+                    cons.print("[green]No .fpp source changes detected — skipping all tests.[/green]")
+                    cons.print("-" * 50)
+                    cons.print()
+                    skipped_cases += cases
+                    cases = []
+                else:
+                    cons.print()
+                    cons.print("[bold cyan]Coverage Change Analysis[/bold cyan]")
+                    cons.print("-" * 50)
+                    for fpp_file in sorted(changed_fpp):
+                        cons.print(f"  [green]*[/green] {fpp_file}")
+
+                    cases, new_skipped = filter_tests_by_coverage(cases, cache, changed_files)
+                    skipped_cases += new_skipped
+                    cons.print(f"\n[bold]Tests to run: {len(cases)} / {len(cases) + len(new_skipped)}[/bold]")
+                    cons.print("-" * 50)
+                    cons.print()
+
     for case in cases[:]:
         if case.ppn > 1 and not ARG("mpi"):
             cases.remove(case)
@@ -176,6 +232,27 @@ def test():
 
         return
 
+    if ARG("build_coverage_cache"):
+        from .coverage import build_coverage_cache  # pylint: disable=import-outside-toplevel
+        # Exclude example-based tests: they cover no unique files beyond
+        # non-example tests, so building coverage for them is wasted work.
+        cases = [c for c in cases if "Example" not in c.trace]
+        all_cases = [b.to_case() for b in cases]
+
+        # Build all unique slugs (Chemistry, case-optimization, etc.) so every
+        # test has a compatible binary when run with --no-build.
+        codes = [PRE_PROCESS, SIMULATION, POST_PROCESS]
+        unique_builds = set()
+        for case, code in itertools.product(all_cases, codes):
+            slug = code.get_slug(case.to_input_file())
+            if slug not in unique_builds:
+                build(code, case.to_input_file())
+                unique_builds.add(slug)
+
+        build_coverage_cache(common.MFC_ROOT_DIR, all_cases,
+                             n_jobs=int(ARG("jobs")))
+        return
+
     cases, skipped_cases = __filter(cases)
     cases = [ _.to_case() for _ in cases ]
     total_test_count = len(cases)
diff --git a/toolchain/mfc/test/test_coverage_cache.json.gz b/toolchain/mfc/test/test_coverage_cache.json.gz
new file mode 100644
index 0000000000..0bb928250a
Binary files /dev/null and b/toolchain/mfc/test/test_coverage_cache.json.gz differ
diff --git a/toolchain/mfc/test/test_coverage_unit.py b/toolchain/mfc/test/test_coverage_unit.py
new file mode 100644
index 0000000000..bbe972e0ed
--- /dev/null
+++ b/toolchain/mfc/test/test_coverage_unit.py
@@ -0,0 +1,664 @@
+"""
+Unit tests for toolchain/mfc/test/coverage.py
+
+Run with:
+    python3 -m pytest toolchain/mfc/test/test_coverage_unit.py -v
+
+These tests are fully offline (no build, no git, no gcov binary required).
+They use mocks and in-memory data structures to verify logic.
+"""
+# pylint: disable=protected-access,exec-used,too-few-public-methods,wrong-import-position
+
+import gzip
+import importlib.util
+import json
+import os
+import sys
+import types
+import unittest
+from unittest.mock import patch
+
+# ---------------------------------------------------------------------------
+# Import the module under test.
+# We patch the module-level imports that require the full toolchain.
+# ---------------------------------------------------------------------------
+
+# Create minimal stubs for toolchain modules so coverage.py can be imported
+# without the full MFC toolchain being on sys.path.
+def _make_stub(name):
+    mod = types.ModuleType(name)
+    sys.modules[name] = mod
+    return mod
+
+
+for _mod_name in [
+    "toolchain",
+    "toolchain.mfc",
+    "toolchain.mfc.printer",
+    "toolchain.mfc.common",
+    "toolchain.mfc.build",
+    "toolchain.mfc.test",
+    "toolchain.mfc.test.case",
+]:
+    if _mod_name not in sys.modules:
+        _make_stub(_mod_name)
+
+# Provide the attributes coverage.py needs from its relative imports
+_printer_stub = sys.modules.get("toolchain.mfc.printer", _make_stub("toolchain.mfc.printer"))
+
+
+class _FakeCons:
+    def print(self, *args, **kwargs):
+        pass  # suppress output during tests
+
+
+_printer_stub.cons = _FakeCons()
+
+_common_stub = sys.modules.get("toolchain.mfc.common", _make_stub("toolchain.mfc.common"))
+_common_stub.MFC_ROOT_DIR = "/fake/repo"
+
+
+class _FakeMFCException(Exception):
+    pass
+
+
+_common_stub.MFCException = _FakeMFCException
+
+_build_stub = sys.modules.get("toolchain.mfc.build", _make_stub("toolchain.mfc.build"))
+_build_stub.PRE_PROCESS = "pre_process"
+_build_stub.SIMULATION = "simulation"
+_build_stub.POST_PROCESS = "post_process"
+_build_stub.SYSCHECK = "syscheck"
+
+_case_stub = sys.modules.get("toolchain.mfc.test.case", _make_stub("toolchain.mfc.test.case"))
+_case_stub.input_bubbles_lagrange = lambda case: None
+_case_stub.get_post_process_mods = lambda params: {}
+_case_stub.POST_PROCESS_3D_PARAMS = {
+    'fd_order': 1, 'omega_wrt(1)': 'T', 'omega_wrt(2)': 'T', 'omega_wrt(3)': 'T',
+}
+
+# Load coverage.py by injecting stubs into sys.modules so relative imports resolve.
+_COVERAGE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "coverage.py")
+
+sys.modules.pop("toolchain.mfc.test.coverage", None)  # reset if already loaded
+
+_spec = importlib.util.spec_from_file_location(
+    "toolchain.mfc.test.coverage",
+    _COVERAGE_PATH,
+    submodule_search_locations=[]
+)
+_coverage_mod = importlib.util.module_from_spec(_spec)
+_coverage_mod.__package__ = "toolchain.mfc.test"
+
+sys.modules["toolchain.mfc.test"] = types.ModuleType("toolchain.mfc.test")
+sys.modules["toolchain.mfc.test"].__package__ = "toolchain.mfc.test"
+
+with patch.dict("sys.modules", {
+    "toolchain.mfc.printer":   _printer_stub,
+    "toolchain.mfc.common":    _common_stub,
+    "toolchain.mfc.build":     _build_stub,
+    "toolchain.mfc.test.case": _case_stub,
+}):
+    try:
+        _spec.loader.exec_module(_coverage_mod)
+    except ImportError:
+        pass  # fallback below
+
+# If the importlib approach failed (relative imports unresolvable), fall back to exec.
+try:
+    _parse_diff_files = _coverage_mod._parse_diff_files
+    _parse_gcov_json_output = _coverage_mod._parse_gcov_json_output
+    _normalize_cache = _coverage_mod._normalize_cache
+    should_run_all_tests = _coverage_mod.should_run_all_tests
+    filter_tests_by_coverage = _coverage_mod.filter_tests_by_coverage
+    ALWAYS_RUN_ALL = _coverage_mod.ALWAYS_RUN_ALL
+    COVERAGE_CACHE_PATH = _coverage_mod.COVERAGE_CACHE_PATH
+except AttributeError:
+    _globals = {
+        "__name__": "toolchain.mfc.test.coverage",
+        "__package__": "toolchain.mfc.test",
+        "cons": _printer_stub.cons,
+        "common": _common_stub,
+        "MFCException": _FakeMFCException,
+        "PRE_PROCESS": "pre_process",
+        "SIMULATION": "simulation",
+        "POST_PROCESS": "post_process",
+        "SYSCHECK": "syscheck",
+    }
+    with open(_COVERAGE_PATH, encoding="utf-8") as _f:
+        _src = _f.read()
+
+    _src = (
+        _src
+        .replace("from ..printer import cons", "cons = _globals['cons']")
+        .replace("from .. import common", "")
+        .replace("from ..common import MFCException", "MFCException = _globals['MFCException']")
+        .replace("from ..build import PRE_PROCESS, SIMULATION, POST_PROCESS, SYSCHECK", "")
+        .replace("from .case import (input_bubbles_lagrange, get_post_process_mods,\n"
+                 "                    POST_PROCESS_3D_PARAMS)",
+                 "input_bubbles_lagrange = lambda case: None\n"
+                 "get_post_process_mods = lambda params: {}\n"
+                 "POST_PROCESS_3D_PARAMS = {'fd_order': 1, 'omega_wrt(1)': 'T', "
+                 "'omega_wrt(2)': 'T', 'omega_wrt(3)': 'T'}")
+    )
+    exec(compile(_src, _COVERAGE_PATH, "exec"), _globals)  # noqa: S102
+
+    _parse_diff_files = _globals["_parse_diff_files"]
+    _parse_gcov_json_output = _globals["_parse_gcov_json_output"]
+    _normalize_cache = _globals["_normalize_cache"]
+    should_run_all_tests = _globals["should_run_all_tests"]
+    filter_tests_by_coverage = _globals["filter_tests_by_coverage"]
+    ALWAYS_RUN_ALL = _globals["ALWAYS_RUN_ALL"]
+    COVERAGE_CACHE_PATH = _globals["COVERAGE_CACHE_PATH"]
+
+
+# ---------------------------------------------------------------------------
+# Helper: minimal fake test case
+# ---------------------------------------------------------------------------
+
+class FakeCase:
+    """Minimal stand-in for TestCase — only get_uuid() is needed."""
+
+    def __init__(self, uuid: str):
+        self._uuid = uuid
+
+    def get_uuid(self) -> str:
+        return self._uuid
+
+
+# ===========================================================================
+# Group 1: _parse_diff_files — git diff --name-only parsing
+# ===========================================================================
+
+class TestParseDiffFiles(unittest.TestCase):
+
+    def test_parse_single_file(self):
+        result = _parse_diff_files("src/simulation/m_rhs.fpp\n")
+        assert result == {"src/simulation/m_rhs.fpp"}
+
+    def test_parse_multiple_files(self):
+        text = "src/simulation/m_rhs.fpp\nsrc/simulation/m_weno.fpp\nREADME.md\n"
+        result = _parse_diff_files(text)
+        assert result == {
+            "src/simulation/m_rhs.fpp",
+            "src/simulation/m_weno.fpp",
+            "README.md",
+        }
+
+    def test_parse_empty(self):
+        assert _parse_diff_files("") == set()
+        assert _parse_diff_files("\n") == set()
+
+    def test_parse_ignores_blank_lines(self):
+        text = "src/simulation/m_rhs.fpp\n\n\nsrc/simulation/m_weno.fpp\n"
+        result = _parse_diff_files(text)
+        assert result == {"src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"}
+
+    def test_parse_mixed_extensions(self):
+        text = "src/simulation/m_rhs.fpp\ntoolchain/mfc/test/cases.py\nCMakeLists.txt\n"
+        result = _parse_diff_files(text)
+        assert len(result) == 3
+        assert "toolchain/mfc/test/cases.py" in result
+        assert "CMakeLists.txt" in result
+
+
+# ===========================================================================
+# Group 2: should_run_all_tests — ALWAYS_RUN_ALL detection
+# ===========================================================================
+
+class TestShouldRunAllTests(unittest.TestCase):
+
+    def test_parallel_macros_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/parallel_macros.fpp"}
+        ) is True
+
+    def test_acc_macros_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/acc_macros.fpp"}
+        ) is True
+
+    def test_omp_macros_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/omp_macros.fpp"}
+        ) is True
+
+    def test_shared_parallel_macros_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/shared_parallel_macros.fpp"}
+        ) is True
+
+    def test_macros_fpp_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/macros.fpp"}
+        ) is True
+
+    def test_cases_py_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/test/cases.py"}
+        ) is True
+
+    def test_case_py_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/test/case.py"}
+        ) is True
+
+    def test_definitions_py_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/params/definitions.py"}
+        ) is True
+
+    def test_input_py_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/run/input.py"}
+        ) is True
+
+    def test_case_validator_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/case_validator.py"}
+        ) is True
+
+    def test_cmakelists_does_not_trigger_all(self):
+        assert should_run_all_tests({"CMakeLists.txt"}) is False
+
+    def test_case_fpp_triggers_all(self):
+        assert should_run_all_tests(
+            {"src/common/include/case.fpp"}
+        ) is True
+
+    def test_coverage_py_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/mfc/test/coverage.py"}
+        ) is True
+
+    def test_cmake_dir_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/cmake/FindFFTW.cmake"}
+        ) is True
+
+    def test_cmake_subdir_triggers_all(self):
+        assert should_run_all_tests(
+            {"toolchain/cmake/some/nested/file.cmake"}
+        ) is True
+
+    def test_simulation_module_does_not_trigger_all(self):
+        assert should_run_all_tests(
+            {"src/simulation/m_rhs.fpp"}
+        ) is False
+
+    def test_empty_set_does_not_trigger_all(self):
+        assert should_run_all_tests(set()) is False
+
+    def test_mixed_one_trigger_fires_all(self):
+        assert should_run_all_tests({
+            "src/simulation/m_rhs.fpp",
+            "src/common/include/macros.fpp",
+        }) is True
+
+
+# ===========================================================================
+# Group 3: filter_tests_by_coverage — core file-level selection logic
+# ===========================================================================
+
+class TestFilterTestsByCoverage(unittest.TestCase):
+
+    def test_file_overlap_includes_test(self):
+        cache = {"AAAA0001": ["src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"]}
+        changed = {"src/simulation/m_rhs.fpp"}
+        cases = [FakeCase("AAAA0001")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        assert len(to_run) == 1
+        assert len(skipped) == 0
+
+    def test_no_file_overlap_skips_test(self):
+        cache = {"AAAA0001": ["src/simulation/m_rhs.fpp"]}
+        changed = {"src/simulation/m_weno.fpp"}
+        cases = [FakeCase("AAAA0001")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_uuid_not_in_cache_is_conservative(self):
+        """Newly added test not in cache -> include it (conservative)."""
+        cache = {}
+        changed = {"src/simulation/m_rhs.fpp"}
+        to_run, _ = filter_tests_by_coverage([FakeCase("NEWTEST1")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_no_fpp_changes_skips_all(self):
+        """Only non-.fpp files changed -> skip all tests."""
+        cache = {"AAAA0001": ["src/simulation/m_rhs.fpp"]}
+        changed = {"toolchain/setup.py", "README.md"}
+        cases = [FakeCase("AAAA0001")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_empty_changed_files_skips_all(self):
+        cache = {"AAAA0001": ["src/simulation/m_rhs.fpp"]}
+        changed = set()
+        to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_multiple_tests_partial_selection(self):
+        """Only the test covering the changed file should run."""
+        cache = {
+            "TEST_A": ["src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"],
+            "TEST_B": ["src/simulation/m_bubbles.fpp"],
+            "TEST_C": ["src/simulation/m_rhs.fpp"],
+        }
+        changed = {"src/simulation/m_bubbles.fpp"}
+        cases = [FakeCase("TEST_A"), FakeCase("TEST_B"), FakeCase("TEST_C")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        uuids_run = {c.get_uuid() for c in to_run}
+        assert uuids_run == {"TEST_B"}
+        assert len(skipped) == 2
+
+    def test_multiple_changed_files_union(self):
+        """Changing multiple files includes any test that covers any of them."""
+        cache = {
+            "TEST_A": ["src/simulation/m_rhs.fpp"],
+            "TEST_B": ["src/simulation/m_weno.fpp"],
+            "TEST_C": ["src/simulation/m_bubbles.fpp"],
+        }
+        changed = {"src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"}
+        cases = [FakeCase("TEST_A"), FakeCase("TEST_B"), FakeCase("TEST_C")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        uuids_run = {c.get_uuid() for c in to_run}
+        assert uuids_run == {"TEST_A", "TEST_B"}
+        assert len(skipped) == 1
+
+    def test_test_covering_multiple_files_matched_via_second(self):
+        """Test matched because m_weno.fpp (its second covered file) was changed."""
+        cache = {"AAAA0001": ["src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"]}
+        changed = {"src/simulation/m_weno.fpp"}
+        to_run, _ = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_empty_cache_runs_all_conservatively(self):
+        """Empty coverage cache -> all tests included (conservative)."""
+        cache = {}
+        changed = {"src/simulation/m_rhs.fpp"}
+        cases = [FakeCase("T1"), FakeCase("T2"), FakeCase("T3")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        assert len(to_run) == 3
+        assert len(skipped) == 0
+
+    def test_mixed_fpp_and_nonfpp_changes(self):
+        """Non-.fpp files in changed set are ignored for matching."""
+        cache = {"TEST_A": ["src/simulation/m_rhs.fpp"]}
+        changed = {"src/simulation/m_rhs.fpp", "README.md", "toolchain/setup.py"}
+        to_run, _ = filter_tests_by_coverage([FakeCase("TEST_A")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_incomplete_coverage_included_conservatively(self):
+        """Test with no simulation coverage but simulation file changed -> include."""
+        cache = {
+            "GOOD_T": ["src/simulation/m_rhs.fpp", "src/pre_process/m_start_up.fpp"],
+            "BAD_T":  ["src/pre_process/m_start_up.fpp", "src/common/m_helper.fpp"],
+        }
+        changed = {"src/simulation/m_rhs.fpp"}
+        cases = [FakeCase("GOOD_T"), FakeCase("BAD_T")]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        uuids_run = {c.get_uuid() for c in to_run}
+        assert "GOOD_T" in uuids_run  # direct file overlap
+        assert "BAD_T" in uuids_run   # no sim coverage -> conservative include
+        assert len(skipped) == 0
+
+    def test_incomplete_coverage_not_triggered_by_preprocess(self):
+        """Test with no sim coverage is NOT auto-included for pre_process changes."""
+        cache = {
+            "BAD_T": ["src/pre_process/m_start_up.fpp"],
+        }
+        changed = {"src/pre_process/m_data_output.fpp"}
+        to_run, skipped = filter_tests_by_coverage([FakeCase("BAD_T")], cache, changed)
+        assert len(to_run) == 0  # no sim change, no overlap -> skip
+        assert len(skipped) == 1
+
+
+# ===========================================================================
+# Group 4: Corner cases from design discussion
+# ===========================================================================
+
+class TestDesignCornerCases(unittest.TestCase):
+
+    def test_gpu_ifdef_file_still_triggers_if_covered(self):
+        """
+        GPU-specific code lives in the same .fpp file as CPU code.
+        At file level, changing any part of the file triggers tests that cover it.
+        """
+        cache = {"MUSCL_T": ["src/simulation/m_muscl.fpp"]}
+        changed = {"src/simulation/m_muscl.fpp"}
+        to_run, _ = filter_tests_by_coverage([FakeCase("MUSCL_T")], cache, changed)
+        assert len(to_run) == 1
+
+    def test_macro_file_triggers_all_via_should_run_all(self):
+        """parallel_macros.fpp in changed files -> should_run_all_tests() is True."""
+        assert should_run_all_tests({"src/common/include/parallel_macros.fpp"}) is True
+
+    def test_new_fpp_file_no_coverage_skips(self):
+        """
+        Brand new .fpp file has no coverage in cache.
+        All tests are skipped (no test covers the new file).
+        """
+        cache = {"AAAA0001": ["src/simulation/m_rhs.fpp"]}
+        changed = {"src/simulation/m_brand_new.fpp"}
+        to_run, skipped = filter_tests_by_coverage([FakeCase("AAAA0001")], cache, changed)
+        assert len(to_run) == 0
+        assert len(skipped) == 1
+
+    def test_non_fpp_always_run_all_detected(self):
+        """
+        End-to-end: diff lists only cases.py (non-.fpp) ->
+        _parse_diff_files includes it -> should_run_all_tests fires.
+        """
+        files = _parse_diff_files("toolchain/mfc/test/cases.py\n")
+        assert should_run_all_tests(files) is True
+
+    def test_niche_feature_pruning(self):
+        """
+        Niche features: most tests don't cover m_bubbles.fpp.
+        Changing it skips tests that don't touch it.
+        """
+        cache = {
+            "BUBBLE1": ["src/simulation/m_bubbles.fpp", "src/simulation/m_rhs.fpp"],
+            "BUBBLE2": ["src/simulation/m_bubbles.fpp"],
+            "BASIC_1": ["src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"],
+            "BASIC_2": ["src/simulation/m_rhs.fpp"],
+            "BASIC_3": ["src/simulation/m_weno.fpp"],
+        }
+        changed = {"src/simulation/m_bubbles.fpp"}
+        cases = [FakeCase(u) for u in ["BUBBLE1", "BUBBLE2", "BASIC_1", "BASIC_2", "BASIC_3"]]
+        to_run, skipped = filter_tests_by_coverage(cases, cache, changed)
+        uuids_run = {c.get_uuid() for c in to_run}
+        assert uuids_run == {"BUBBLE1", "BUBBLE2"}
+        assert len(skipped) == 3
+
+
+# ===========================================================================
+# Group 5: _parse_gcov_json_output — gcov JSON parsing (file-level)
+# ===========================================================================
+
+class TestParseGcovJsonOutput(unittest.TestCase):
+
+    def _make_gcov_json(self, files_data: list) -> bytes:
+        """Build a fake gzip-compressed gcov JSON blob."""
+        data = {
+            "format_version": "2",
+            "gcc_version": "15.2.0",
+            "files": files_data,
+        }
+        return gzip.compress(json.dumps(data).encode())
+
+    def test_returns_set_of_covered_fpp_files(self):
+        compressed = self._make_gcov_json([{
+            "file": "/repo/src/simulation/m_rhs.fpp",
+            "lines": [
+                {"line_number": 45, "count": 3},
+                {"line_number": 46, "count": 0},
+                {"line_number": 47, "count": 1},
+            ],
+        }])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp"}
+
+    def test_ignores_file_with_zero_coverage(self):
+        compressed = self._make_gcov_json([{
+            "file": "/repo/src/simulation/m_rhs.fpp",
+            "lines": [
+                {"line_number": 10, "count": 0},
+                {"line_number": 11, "count": 0},
+            ],
+        }])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert result == set()
+
+    def test_ignores_f90_files(self):
+        """Generated .f90 files must not appear in coverage output."""
+        compressed = self._make_gcov_json([
+            {
+                "file": "/repo/build/fypp/simulation/m_rhs.fpp.f90",
+                "lines": [{"line_number": 10, "count": 5}],
+            },
+            {
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [{"line_number": 45, "count": 1}],
+            },
+        ])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp"}
+
+    def test_handles_raw_json_gcov12(self):
+        """gcov 12 outputs raw JSON (not gzip). Must parse correctly."""
+        data = {
+            "format_version": "1",
+            "gcc_version": "12.3.0",
+            "files": [{
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [{"line_number": 45, "count": 3}],
+            }],
+        }
+        raw = json.dumps(data).encode()
+        result = _parse_gcov_json_output(raw, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp"}
+
+    def test_handles_invalid_data_gracefully(self):
+        result = _parse_gcov_json_output(b"not valid gzip or json", "/repo")
+        assert result == set()
+
+    def test_handles_empty_files_list(self):
+        compressed = self._make_gcov_json([])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert result == set()
+
+    def test_multiple_fpp_files(self):
+        compressed = self._make_gcov_json([
+            {
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [{"line_number": 45, "count": 1}],
+            },
+            {
+                "file": "/repo/src/simulation/m_weno.fpp",
+                "lines": [{"line_number": 200, "count": 2}],
+            },
+        ])
+        result = _parse_gcov_json_output(compressed, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"}
+
+    def test_concatenated_json_from_batched_gcov(self):
+        """Batched gcov calls produce concatenated JSON objects (gcov 12)."""
+        obj1 = json.dumps({
+            "format_version": "1",
+            "gcc_version": "12.3.0",
+            "files": [{
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [{"line_number": 45, "count": 3}],
+            }],
+        })
+        obj2 = json.dumps({
+            "format_version": "1",
+            "gcc_version": "12.3.0",
+            "files": [{
+                "file": "/repo/src/simulation/m_weno.fpp",
+                "lines": [{"line_number": 10, "count": 1}],
+            }],
+        })
+        raw = (obj1 + "\n" + obj2).encode()
+        result = _parse_gcov_json_output(raw, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"}
+
+    def test_concatenated_json_skips_zero_coverage(self):
+        """Batched gcov: files with zero coverage are excluded."""
+        obj1 = json.dumps({
+            "format_version": "1",
+            "files": [{
+                "file": "/repo/src/simulation/m_rhs.fpp",
+                "lines": [{"line_number": 45, "count": 3}],
+            }],
+        })
+        obj2 = json.dumps({
+            "format_version": "1",
+            "files": [{
+                "file": "/repo/src/simulation/m_weno.fpp",
+                "lines": [{"line_number": 10, "count": 0}],
+            }],
+        })
+        raw = (obj1 + "\n" + obj2).encode()
+        result = _parse_gcov_json_output(raw, "/repo")
+        assert result == {"src/simulation/m_rhs.fpp"}
+
+
+# ===========================================================================
+# Group 6: _normalize_cache — old format conversion
+# ===========================================================================
+
+class TestNormalizeCache(unittest.TestCase):
+
+    def test_converts_old_line_level_format(self):
+        """Old format {uuid: {file: [lines]}} -> new format {uuid: [files]}."""
+        old_cache = {
+            "TEST_A": {
+                "src/simulation/m_rhs.fpp": [45, 46, 47],
+                "src/simulation/m_weno.fpp": [100, 200],
+            },
+            "TEST_B": {
+                "src/simulation/m_bubbles.fpp": [10],
+            },
+            "_meta": {"cases_hash": "abc123"},
+        }
+        result = _normalize_cache(old_cache)
+        assert isinstance(result["TEST_A"], list)
+        assert set(result["TEST_A"]) == {"src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"}
+        assert result["TEST_B"] == ["src/simulation/m_bubbles.fpp"]
+        assert result["_meta"] == {"cases_hash": "abc123"}
+
+    def test_new_format_unchanged(self):
+        """New format {uuid: [files]} passes through unchanged."""
+        new_cache = {
+            "TEST_A": ["src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"],
+            "_meta": {"cases_hash": "abc123"},
+        }
+        result = _normalize_cache(new_cache)
+        assert result["TEST_A"] == ["src/simulation/m_rhs.fpp", "src/simulation/m_weno.fpp"]
+
+    def test_empty_coverage_dict_becomes_empty_list(self):
+        """Test with 0 coverage (old format: empty dict) -> empty list."""
+        old_cache = {"TEST_A": {}, "_meta": {"cases_hash": "abc"}}
+        result = _normalize_cache(old_cache)
+        assert result["TEST_A"] == []
+
+
+# ===========================================================================
+# Group 7: Cache path format
+# ===========================================================================
+
+class TestCachePath(unittest.TestCase):
+
+    def test_cache_path_is_gzipped(self):
+        """Cache file must use .json.gz so it can be committed to the repo."""
+        assert str(COVERAGE_CACHE_PATH).endswith(".json.gz")
+
+
+if __name__ == "__main__":
+    unittest.main()