diff --git a/src/ops/group.c b/src/ops/group.c
index 46e8097c..063c961d 100644
--- a/src/ops/group.c
+++ b/src/ops/group.c
@@ -288,24 +288,48 @@ static void cd_part_dedup_fn(void* ctx, uint32_t worker_id,
 /* Width-specialised value extraction for the partition pass.  Reading
  * row-by-row through read_col_i64 was the dispatch overhead in the
  * sequential path; specialising on the column width lets the autovec
- * pass tighten the loop. */
+ * pass tighten the loop.
+ *
+ * Indexing note: histograms and cursors are keyed by *task index*, not
+ * worker id.  ray_pool_dispatch's ring is work-stealing — the same
+ * worker_id can claim different tasks across two consecutive
+ * dispatches, so the row range processed by worker w in pass 1
+ * (histogram) need not match the range processed by worker w in pass 2
+ * (scatter).  Using worker_id as the cursor key would let pass 2
+ * scatter writes overshoot the slot reserved by pass 1, mangle the
+ * partition layout, and over- or under-count distinct values
+ * non-deterministically.  Task index is stable across passes because
+ * the row range tied to task t is fixed at dispatch-fill time. */
 typedef struct {
     const void* base;
-    int64_t*    counts;        /* P per-partition row counts (per worker) */
+    int64_t*    counts;        /* P per-partition row counts (per task) */
     uint32_t    p_bits;
     uint64_t    p_mask;
+    int64_t     grain;         /* rows per task (last task may have fewer) */
+    int64_t     total;         /* total row count */
     uint8_t     stride_log2;   /* log2(elem size) for plain int paths */
     uint8_t     is_f64;
     int8_t      type;
     uint8_t     attrs;
 } cd_count_ctx_t;
 
-/* Count rows per partition (per worker, into worker-local slot).  Two
- * passes: this one fills the histograms; the next does the scatter. */
+/* Count rows per partition (per task, into task-local slot).  Two
+ * passes: this one fills the histograms; the next does the scatter.
+ * Dispatched via ray_pool_dispatch_n with start=task_idx so the
+ * cursor key is stable across the histogram and scatter passes. */
 static void cd_hist_fn(void* ctx, uint32_t worker_id,
                        int64_t start, int64_t end) {
+    (void)worker_id;
+    (void)end;
     cd_count_ctx_t* x = (cd_count_ctx_t*)ctx;
-    int64_t* hist = x->counts + (size_t)worker_id * (x->p_mask + 1);
+    int64_t task_idx = start;
+    int64_t row_start = task_idx * x->grain;
+    int64_t row_end = row_start + x->grain;
+    if (row_end > x->total) row_end = x->total;
+    int64_t* hist = x->counts + (size_t)task_idx * (x->p_mask + 1);
+    /* Reuse the existing tight loops by aliasing the local names. */
+    start = row_start;
+    end = row_end;
     const void* base = x->base;
     int8_t in_type = x->type;
     uint8_t in_attrs = x->attrs;
@@ -373,9 +397,11 @@ static void cd_hist_fn(void* ctx, uint32_t worker_id,
 typedef struct {
     const void* base;
     int64_t*    out_buf;       /* concatenated payloads (output) */
-    int64_t*    cursor;        /* per-worker × P; advances per scatter */
+    int64_t*    cursor;        /* per-task × P; advances per scatter */
     uint32_t    p_bits;
     uint64_t    p_mask;
+    int64_t     grain;         /* rows per task (last task may have fewer) */
+    int64_t     total;         /* total row count */
     uint8_t     is_f64;
     int8_t      type;
     uint8_t     attrs;
@@ -383,8 +409,17 @@ typedef struct {
 
 static void cd_scatter_fn(void* ctx, uint32_t worker_id,
                           int64_t start, int64_t end) {
+    (void)worker_id;
+    (void)end;
     cd_scatter_ctx_t* x = (cd_scatter_ctx_t*)ctx;
-    int64_t* cur = x->cursor + (size_t)worker_id * (x->p_mask + 1);
+    int64_t task_idx = start;
+    int64_t row_start = task_idx * x->grain;
+    int64_t row_end = row_start + x->grain;
+    if (row_end > x->total) row_end = x->total;
+    int64_t* cur = x->cursor + (size_t)task_idx * (x->p_mask + 1);
+    /* Reuse the existing tight loops by aliasing the local names. */
+    start = row_start;
+    end = row_end;
     int64_t* out = x->out_buf;
     const void* base = x->base;
     int8_t in_type = x->type;
@@ -541,31 +576,51 @@ ray_t* exec_count_distinct(ray_graph_t* g, ray_op_t* op, ray_t* input) {
     uint64_t P = (uint64_t)1 << p_bits;
     uint64_t p_mask = P - 1;
 
-    /* Pass 1: per-worker histogram (P × nw int64 cells). */
+    /* Histograms and cursors are keyed by *task* index, not worker id, so
+     * pass-2 scatter writes land in the slot that pass-1 histogram
+     * reserved.  A worker may execute different tasks in the two passes
+     * (the dispatch ring is work-stealing); the row range tied to a task
+     * is fixed when ray_pool_dispatch_n fills the ring. */
+    int64_t grain = (int64_t)RAY_DISPATCH_MORSELS * RAY_MORSEL_ELEMS;
+    if (grain <= 0) grain = 8192;
+    int64_t n_tasks_64 = (len + grain - 1) / grain;
+    if (n_tasks_64 <= 0) n_tasks_64 = 1;
+    /* MAX_RING_CAP guards against pathological len; if we'd exceed it,
+     * fall back to the sequential kernel — the cap is high enough that
+     * this only fires on absurd inputs. */
+    if (n_tasks_64 > (1u << 16)) {
+        int64_t cnt = cd_seq_count(in_type, input->attrs, base, len);
+        if (cnt < 0) return ray_error("oom", NULL);
+        return ray_i64(cnt);
+    }
+    uint32_t n_tasks = (uint32_t)n_tasks_64;
+
+    /* Pass 1: per-task histogram (P × n_tasks int64 cells). */
     ray_t* hist_hdr = NULL;
     int64_t* hist = (int64_t*)scratch_calloc(&hist_hdr,
-                                             (size_t)P * nw * sizeof(int64_t));
+                                             (size_t)P * n_tasks * sizeof(int64_t));
     if (!hist) {
         return ray_error("oom", NULL);
     }
     cd_count_ctx_t hctx = {
         .base = base, .counts = hist,
         .p_bits = p_bits, .p_mask = p_mask,
+        .grain = grain, .total = len,
         .stride_log2 = 0, .is_f64 = (in_type == RAY_F64),
         .type = in_type, .attrs = input->attrs,
     };
-    ray_pool_dispatch(pool, cd_hist_fn, &hctx, len);
+    ray_pool_dispatch_n(pool, cd_hist_fn, &hctx, n_tasks);
 
-    /* Convert per-worker histograms into a global prefix sum.  Order:
-     * partition_0_worker_0, partition_0_worker_1, …, partition_1_worker_0, …
-     * so each (worker, partition) range is a contiguous slice of out_buf. */
+    /* Convert per-task histograms into a global prefix sum.  Order:
+     * partition_0_task_0, partition_0_task_1, …, partition_1_task_0, …
+     * so each (task, partition) range is a contiguous slice of out_buf. */
     ray_t* off_hdr = NULL;
     int64_t* part_off = (int64_t*)scratch_alloc(&off_hdr,
                                                 (size_t)(P + 1) * sizeof(int64_t));
     if (!part_off) { scratch_free(hist_hdr); return ray_error("oom", NULL); }
     ray_t* cur_hdr = NULL;
     int64_t* cursor = (int64_t*)scratch_alloc(&cur_hdr,
-                                              (size_t)P * nw * sizeof(int64_t));
+                                              (size_t)P * n_tasks * sizeof(int64_t));
     if (!cursor) {
         scratch_free(off_hdr); scratch_free(hist_hdr);
         return ray_error("oom", NULL);
@@ -574,9 +629,9 @@ ray_t* exec_count_distinct(ray_graph_t* g, ray_op_t* op, ray_t* input) {
     int64_t total = 0;
     for (uint64_t p = 0; p < P; p++) {
         part_off[p] = total;
-        for (uint32_t w = 0; w < nw; w++) {
-            cursor[(size_t)w * P + p] = total;
-            total += hist[(size_t)w * P + p];
+        for (uint32_t t = 0; t < n_tasks; t++) {
+            cursor[(size_t)t * P + p] = total;
+            total += hist[(size_t)t * P + p];
         }
     }
     part_off[P] = total;
@@ -598,10 +653,11 @@ ray_t* exec_count_distinct(ray_graph_t* g, ray_op_t* op, ray_t* input) {
     cd_scatter_ctx_t sctx = {
         .base = base, .out_buf = out_buf, .cursor = cursor,
         .p_bits = p_bits, .p_mask = p_mask,
+        .grain = grain, .total = len,
         .is_f64 = (in_type == RAY_F64),
         .type = in_type, .attrs = input->attrs,
     };
-    ray_pool_dispatch(pool, cd_scatter_fn, &sctx, len);
+    ray_pool_dispatch_n(pool, cd_scatter_fn, &sctx, n_tasks);
 
     /* Pass 3: dedup each partition in parallel.  Each partition gets one
      * task — distinct values land in the same partition, so per-partition
@@ -3414,6 +3470,11 @@ ray_t* exec_group(ray_graph_t* g, ray_op_t* op, ray_t* tbl,
                 key_owned[k] = 1;
             }
         }
+        if (!key_vecs[k]) {
+            for (uint8_t j = 0; j < k; j++)
+                if (key_owned[j] && key_vecs[j]) ray_release(key_vecs[j]);
+            return ray_error("domain", "by: column not found in table");
+        }
     }
 
     /* Resolve agg input columns (VLA — n_aggs ≤ 8; use ≥1 to avoid zero-size VLA UB) */
diff --git a/src/ops/query.c b/src/ops/query.c
index 9086a8d9..fdb9a6e1 100644
--- a/src/ops/query.c
+++ b/src/ops/query.c
@@ -2224,10 +2224,20 @@ static ray_t* atom_broadcast_vec(ray_t* a, int64_t n) {
     int8_t vec_type = (int8_t)(-a->type);
     if (vec_type <= 0) return NULL;
 
+    /* SYM atoms produced by ray_sym(id) carry no width attr (always 0 →
+     * W8), so we can't trust a->attrs when the id exceeds one byte.
+     * Pick the narrowest width that fits a->i64. */
+    uint8_t sym_w = 0;
+    if (vec_type == RAY_SYM) {
+        uint64_t id = (uint64_t)a->i64;
+        sym_w = id <= 0xFFu        ? RAY_SYM_W8
+              : id <= 0xFFFFu      ? RAY_SYM_W16
+              : id <= 0xFFFFFFFFu  ? RAY_SYM_W32
+                                   : RAY_SYM_W64;
+    }
     ray_t* v;
     if (vec_type == RAY_SYM) {
-        uint8_t w = (uint8_t)(a->attrs & RAY_SYM_W_MASK);
-        v = ray_sym_vec_new(w, n);
+        v = ray_sym_vec_new(sym_w, n);
     } else {
         v = ray_vec_new(vec_type, n);
     }
@@ -2269,20 +2279,22 @@ static ray_t* atom_broadcast_vec(ray_t* a, int64_t n) {
         break;
     }
     case RAY_SYM: {
-        /* SYM stores the ID in `i64` regardless of width; truncate per
-         * the vector's width attribute.  Width came from the atom and
-         * was carried by ray_sym_vec_new above. */
-        uint8_t w = (uint8_t)(a->attrs & RAY_SYM_W_MASK);
-        if (w == RAY_SYM_W8) {
+        /* Width was selected above to fit a->i64, not read from a->attrs
+         * (atom-built syms never set the width attr). */
+        if (sym_w == RAY_SYM_W8) {
             memset(dst, (uint8_t)a->i64, (size_t)n);
-        } else if (w == RAY_SYM_W16) {
+        } else if (sym_w == RAY_SYM_W16) {
             uint16_t val = (uint16_t)a->i64;
             uint16_t* d = (uint16_t*)dst;
             for (int64_t i = 0; i < n; i++) d[i] = val;
-        } else { /* W32 — default */
+        } else if (sym_w == RAY_SYM_W32) {
             uint32_t val = (uint32_t)a->i64;
             uint32_t* d = (uint32_t*)dst;
             for (int64_t i = 0; i < n; i++) d[i] = val;
+        } else { /* W64 */
+            int64_t val = a->i64;
+            int64_t* d = (int64_t*)dst;
+            for (int64_t i = 0; i < n; i++) d[i] = val;
         }
         break;
     }
diff --git a/test/main.c b/test/main.c
index 1c3f26f8..4ce12d99 100644
--- a/test/main.c
+++ b/test/main.c
@@ -111,6 +111,7 @@ extern const test_entry_t graph_entries[];
 extern const test_entry_t graph_builtin_entries[];
 extern const test_entry_t group_extra_entries[];
 extern const test_entry_t fused_group_entries[];
+extern const test_entry_t fused_topk_entries[];
 extern const test_entry_t hash_entries[];
 extern const test_entry_t heap_entries[];
 extern const test_entry_t index_entries[];
@@ -155,6 +156,7 @@ static const test_entry_t* const compiled_groups[] = {
     format_entries,   fvec_entries,     graph_entries,    graph_builtin_entries,
     group_extra_entries,
     fused_group_entries,
+    fused_topk_entries,
     hash_entries,
     heap_entries,
     index_entries,    ipc_entries,
diff --git a/test/rfl/agg/count_distinct.rfl b/test/rfl/agg/count_distinct.rfl
new file mode 100644
index 00000000..c5824829
--- /dev/null
+++ b/test/rfl/agg/count_distinct.rfl
@@ -0,0 +1,84 @@
+;; Coverage tests for count(distinct) — exec_count_distinct + the
+;; parallel partitioned kernel (cd_hist_fn / cd_scatter_fn /
+;; cd_part_dedup_fn) in src/ops/group.c.
+;;
+;; Trigger conditions for the parallel path:
+;;   - len >= 65536 (1 << 16) AND a worker pool is available.
+;;
+;; Type coverage (flat numeric only — owned by this agent per the brief):
+;;   I64, I32, I16, U8, BOOL, F64.
+;;   SYM/STR/GUID are owned by other agents.
+;;
+;; Each test feeds (count (distinct ...)) which the optimiser rewrites
+;; to OP_COUNT_DISTINCT (see ops/idiom.c rw_count_distinct), routing
+;; through exec_count_distinct.
+;;
+;; The parallel kernel keys its histograms and scatter cursors by *task*
+;; index (stable across passes), not worker id (work-stealing means a
+;; worker can claim different tasks across two consecutive dispatches).
+;; The earlier worker-id keying caused non-deterministic under-counts;
+;; tests below assert exact distinct counts.
+
+;; ────────────── 1. Sub-threshold sequential path (len < 65536) ──────────────
+;; Guarantees coverage of cd_seq_count for each base type.  Existing
+;; ops/idiom.rfl covers I64; here we add the small-input arms for the
+;; narrow types that were missing.
+(count (distinct (as 'I32 [1 2 3 1 2]))) -- 3
+(count (distinct (as 'I16 [10 20 10 30]))) -- 3
+(count (distinct (as 'U8 [0 1 2 1 0 2]))) -- 3
+(count (distinct (as 'BOOL [true false true false]))) -- 2
+(count (distinct (as 'F64 [1.5 2.5 1.5 3.5 2.5]))) -- 3
+
+;; Just below the threshold: sequential cd_seq_count for I64 / F64 /
+;; narrow ints with a meaningful payload (sub-65536 hits the seq path).
+(count (distinct (til 65535))) -- 65535
+(count (distinct (% (til 65535) 999))) -- 999
+(count (distinct (as 'F64 (til 65535)))) -- 65535
+(count (distinct (as 'I32 (% (til 65535) 500)))) -- 500
+(count (distinct (as 'I16 (% (til 65535) 250)))) -- 250
+(count (distinct (as 'U8  (% (til 65535) 200)))) -- 200
+
+;; ────────────── 2. Parallel path: I64 (len >= 65536) ──────────────
+;; Triggers the cd_hist_fn / cd_scatter_fn I64 arm and cd_part_dedup_fn
+;; over int64 payloads.
+(count (distinct (til 65600))) -- 65600
+(count (distinct (% (til 65600) 1000))) -- 1000
+
+;; ────────────── 3. Parallel path: I32 (len >= 65536) ──────────────
+(count (distinct (as 'I32 (% (til 65600) 500)))) -- 500
+
+;; ────────────── 4. Parallel path: I16 (len >= 65536) ──────────────
+(count (distinct (as 'I16 (% (til 65600) 250)))) -- 250
+
+;; ────────────── 5. Parallel path: U8 (len >= 65536) ──────────────
+;; U8 cannot hold > 255 distinct values, so cap at 200.
+(count (distinct (as 'U8 (% (til 65600) 200)))) -- 200
+
+;; ────────────── 6. Parallel path: BOOL (len >= 65536) ──────────────
+;; BOOL is exactly two values.  Alternating pattern → both values appear.
+(count (distinct (== 0 (% (til 65600) 2)))) -- 2
+
+;; ────────────── 7. Parallel path: F64 (len >= 65536) ──────────────
+;; Triggers cd_hist_fn / cd_scatter_fn F64 arms including the
+;; NaN/0.0 normalisation.
+(count (distinct (as 'F64 (til 65600)))) -- 65600
+(count (distinct (as 'F64 (% (til 65600) 100)))) -- 100
+
+;; ────────────── 8. Per-group count(distinct) over >= 200000 rows ──────────────
+;; Routes through ray_count_distinct_per_group → count_distinct_per_group_parallel
+;; (group.c L840-949: cdpg_hist_fn, cdpg_scat_fn, cdpg_dedup_fn, cdpg_read).
+;; query.c:5703 selects ray_count_distinct_per_group when n_groups > 50000;
+;; group.c:991 dispatches the parallel kernel when n_rows >= 200000.
+;; So we need: n_rows >= 200000 AND n_groups > 50000, with a numeric key.
+;; Note: a more thorough C-level test exists at
+;; test/test_group_extra.c::test_count_distinct_per_group_parallel —
+;; it bypasses rfl plumbing for ~100x speed.  This rfl form provides
+;; end-to-end planner coverage on the same dispatch.
+;; Use exactly the threshold (200000) — anything below skips the parallel
+;; kernel.  Use 51000 groups (>50000 routes through ray_count_distinct_per_group).
+(set Ncdpg 200000)
+(set Tcdpg (table [g v] (list (% (til Ncdpg) 51000) (% (til Ncdpg) 4))))
+;; n_groups = 51000, n_rows = 200000 → count_distinct_per_group_parallel.
+;; Reduced to 4 distinct per group so cdpg_dedup_fn HT settles fast.
+(set Rcdpg (select {n: (count (distinct v)) from: Tcdpg by: g}))
+(count Rcdpg) -- 51000
diff --git a/test/rfl/ops/group_coverage.rfl b/test/rfl/ops/group_coverage.rfl
index f823d99c..cb59f195 100644
--- a/test/rfl/ops/group_coverage.rfl
+++ b/test/rfl/ops/group_coverage.rfl
@@ -892,3 +892,174 @@
 ;; var_pop(v*1): L3832 case OP_VAR_POP
 (set Rhtf_vp (select {vp: (var_pop (* v 1.0)) from: Thtf by: g}))
 (count Rhtf_vp) -- 3
+
+;; ────────────── 75. SYM reduction with selection + nulls (group.c L144-176) ──────────────
+;; reduce_range's SYM branch (group.c L130-179) has 4 sub-paths:
+;;   !has_nulls && !idx     : already covered (large SYM agg sans where:)
+;;   !has_nulls &&  idx     : SYM agg with where: clause (selection only)
+;;    has_nulls && !idx     : SYM agg without where: but column has nulls
+;;    has_nulls &&  idx     : SYM agg WITH where: AND nulls in column (L165-176)
+;;
+;; The combined-null+selection arm (L165-176) is the deepest uncovered.
+;; Trigger via scalar agg (no by:) on a SYM column carrying ' (null sym)
+;; with a where: filter so g->selection is installed and exec_reduction
+;; routes through reduce_range with sel_idx != NULL.
+;; Use a literal SYM vector with nulls; min on SYM walks through L165-176.
+(set Tsymsel (table [s v] (list ['a 'b 'c 'a 'b ' 'c 'a 'b 'c] [0 1 2 3 4 5 6 7 8 9])))
+;; Without where: — has_nulls=true, idx=NULL → L154-164 branch
+(at (at (select {c: (count s) from: Tsymsel}) 'c) 0) -- 10
+;; With where: — has_nulls=true, idx!=NULL → L165-176 branch (deepest)
+(at (at (select {c: (count s) from: Tsymsel where: (< v 8)}) 'c) 0) -- 8
+;; SYM count via selection that includes the null row
+(at (at (select {c: (count s) from: Tsymsel where: (>= v 5)}) 'c) 0) -- 5
+
+;; Sized just below parallel threshold to keep the sequential reduce_range
+;; path with sel_idx; covers the !has_nulls && idx branch L143-153.
+(set Tsymsel2 (table [s v] (list (take ['p 'q 'r] 1000) (til 1000))))
+(at (at (select {c: (count s) from: Tsymsel2 where: (< v 500)}) 'c) 0) -- 500
+(at (at (select {c: (count s) from: Tsymsel2 where: (>= v 700)}) 'c) 0) -- 300
+
+;; Larger SYM column with nulls AND where: — exercises parallel reduce
+;; with sel_idx + has_nulls (groups.c L154/L165 but in parallel via
+;; par_reduce_fn).  Use 70k rows to cross RAY_PARALLEL_THRESHOLD.
+(set Nlsel 70000)
+(set Tslsel (table [s v] (list (take ['x 'y 'z '] Nlsel) (til Nlsel))))
+;; count(s) where v<10000 — selection-aware reduction on SYM with nulls.
+(at (at (select {c: (count s) from: Tslsel where: (< v 10000)}) 'c) 0) -- 10000
+(at (at (select {c: (count s) from: Tslsel where: (>= v 60000)}) 'c) 0) -- 10000
+
+;; ────────────── 76. DA-path F64 affine SUM (group.c L2326) ──────────────
+;; Section 28 covered I64 affine SUM emit at L2365.  Line 2326
+;; (`v += affine[a].bias_f64 * counts[gi]` for F64 SUM) needs a
+;; DA-path group-by with F64 agg input + affine bias.  Use a narrow-int
+;; key (DA-eligible) and a F64 agg column for sum(f + const).
+(set TafF (table [g f] (list ['a 'b 'a 'b 'a 'b] (as 'F64 [1.0 2.0 3.0 4.0 5.0 6.0]))))
+;; Group 'a: f∈{1,3,5}, sum(f+10) = 9 + 30 = 39
+;; Group 'b: f∈{2,4,6}, sum(f+10) = 12 + 30 = 42
+(sum (at (select {s: (sum (+ f 10.0)) from: TafF by: g}) 's)) -- 81.0
+;; Verify count is 2 groups
+(count (select {s: (sum (+ f 10.0)) from: TafF by: g})) -- 2
+
+;; ────────────── 77. Scalar agg parallel: F64 affine sum + count combined ──────────────
+;; Parallel scalar path with F64 affine SUM: scalar_sum_f64_fn (or
+;; scalar_accum_fn for multi-agg) over (sum (+ f const)).
+(set Tafs (table [f] (list (as 'F64 (til 70000)))))
+;; sum(f+1) over til 70000 = sum(f) + 70000 = (70000*69999/2) + 70000 = 2449965000 + 70000
+(at (at (select {s: (sum (+ f 1.0)) from: Tafs}) 's) 0) -- 2450035000.0
+
+;; ────────────── 78. (chunk 8) FIRST/LAST + by: with I16/U8 KEY (DA path) ──────────────
+;; The narrow-int side is the GROUP KEY, with I64 agg input — exercises
+;; DA composite GID with esz=2/esz=1 keys + first/last accumulator.
+;; (Existing tests in section 30/32 don't combine the narrow-int key
+;; with first/last in the same select.)
+(set NkfI16 100)
+(set TkfI16 (table [k v] (list (as 'I16 (% (til NkfI16) 4)) (til NkfI16))))
+(count (select {fi: (first v) la: (last v) from: TkfI16 by: k})) -- 4
+;; group k=0 sees rows 0,4,8,...,96 → first=0, last=96
+;; group k=1 sees rows 1,5,9,...,97 → first=1, last=97
+;; ...; sum of firsts = 0+1+2+3 = 6; sum of lasts = 4*(NkfI16-4)+6 = 390
+(sum (at (select {fi: (first v) from: TkfI16 by: k}) 'fi)) -- 6
+(sum (at (select {la: (last v) from: TkfI16 by: k}) 'la)) -- 390
+
+;; U8 narrow-int KEY with FIRST/LAST + by: (DA path, esz=1)
+(set NkfU8 100)
+(set TkfU8 (table [k v] (list (as 'U8 (% (til NkfU8) 5)) (til NkfU8))))
+(count (select {fi: (first v) la: (last v) from: TkfU8 by: k})) -- 5
+(sum (at (select {fi: (first v) from: TkfU8 by: k}) 'fi)) -- 10
+(sum (at (select {la: (last v) from: TkfU8 by: k}) 'la)) -- 485
+
+;; ────────────── 79. Scalar narrow-int agg (no by) — BOOL/U8/I16 (chunk 7) ──────────────
+;; Scalar reduction on narrow-int columns; coverage of the read_col_i64
+;; dispatch in reduce_range's SYM/narrow paths.
+(set Tnar (table [b u s] (list (take [true false true false] 200) (as 'U8 (% (til 200) 17)) (as 'I16 (% (til 200) 30)))))
+;; sum on BOOL: counts true bits
+(at (at (select {sb: (sum b) from: Tnar}) 'sb) 0) -- 100
+;; sum on U8 — values 0..16 cycling.  200/17 = 11 full cycles (sum 11*136
+;; = 1496) + 13 leftover rows with values 0..12 (sum 78) → total 1574.
+(at (at (select {su: (sum u) from: Tnar}) 'su) 0) -- 1574
+;; min/max on U8 (U8 atoms format with 0x prefix; compare via type-cast)
+(at (at (select {mn: (min u) from: Tnar}) 'mn) 0) -- 0x00
+(at (at (select {mx: (max u) from: Tnar}) 'mx) 0) -- 0x10
+;; min/max on I16
+(at (at (select {mn: (min s) from: Tnar}) 'mn) 0) -- 0h
+(at (at (select {mx: (max s) from: Tnar}) 'mx) 0) -- 29h
+
+;; ────────────── 80. Scalar narrow agg parallel (>= 65k rows) ──────────────
+;; Parallel scalar reduction on narrow-int columns (esz=1, esz=2, esz=4).
+;; Drives the per-worker reduce_range with non-RAY_F64/RAY_I64 base type
+;; via DISPATCH_I in scalar_accum_fn.
+(set Nnar 70000)
+(set TnarP (table [b u h i] (list (take [true false] Nnar) (as 'U8 (% (til Nnar) 100)) (as 'I16 (% (til Nnar) 1000)) (as 'I32 (% (til Nnar) 10000)))))
+;; sum on BOOL parallel: 70000/2 = 35000 trues
+(at (at (select {sb: (sum b) from: TnarP}) 'sb) 0) -- 35000
+;; min/max on U8/I16/I32 in parallel (atom literals carry the typed
+;; suffix when promoted by min/max; compare with the matching literal)
+(at (at (select {mn: (min u) mx: (max u) from: TnarP}) 'mn) 0) -- 0x00
+(at (at (select {mn: (min u) mx: (max u) from: TnarP}) 'mx) 0) -- 0x63
+(at (at (select {mn: (min h) mx: (max h) from: TnarP}) 'mn) 0) -- 0h
+(at (at (select {mn: (min h) mx: (max h) from: TnarP}) 'mx) 0) -- 999h
+(at (at (select {mn: (min i) mx: (max i) from: TnarP}) 'mn) 0) -- 0i
+(at (at (select {mn: (min i) mx: (max i) from: TnarP}) 'mx) 0) -- 9999i
+
+;; ────────────── 81. Group-by narrow-int key with stat aggs (chunk 5 — DA path) ──────────────
+;; DA path n_slots small + narrow-int key + multiple stat aggs (sum,
+;; sumsq, count) — exercises the F64 emit branches for var/stddev with
+;; narrow-int agg input through DA accumulation.
+(set TdaN (table [k v] (list (as 'I16 (% (til 600) 30)) (% (til 600) 50))))
+(count (select {s: (sum v) v: (var v) sd: (stddev v) from: TdaN by: k})) -- 30
+;; Via U8 key: hits da_accum_fn / da_merge_fn arms with esz=1 keys
+(set TdaU (table [k v] (list (as 'U8 (% (til 600) 30)) (% (til 600) 50))))
+(count (select {s: (sum v) v: (var v) sd: (stddev v) from: TdaU by: k})) -- 30
+
+;; ────────────── 82. Per-group count(distinct) with low cardinality (≤50000 groups) ──────────────
+;; Routes through count_distinct_per_group_buf (query.c L1849), the
+;; cdpg_buf_par_fn parallel variant when n_groups ≥ 4.  Smaller scale
+;; than chunks 9/10 above so the buf path (not group.c's parallel) fires.
+(set Tcdb (table [g v] (list (% (til 1000) 10) (% (til 1000) 20))))
+(set Rcdb (select {n: (count (distinct v)) from: Tcdb by: g}))
+(count Rcdb) -- 10
+;; Group g sees rows {g, g+10, g+20, ..., g+990}; values are
+;; (g)%20, (g+10)%20, (g+20)%20=g%20, ...; with 100 rows per group and
+;; values cycling g%20 and (g+10)%20 → 2 distinct per group.
+(min (at Rcdb 'n)) -- 2
+(max (at Rcdb 'n)) -- 2
+
+;; ────────────── 83. Group HT grow/rehash mid-grouping ──────────────
+;; Force a rehash by exceeding the initial HT capacity.  Default initial
+;; capacity is around 1024 entries; using 4096 distinct keys with a
+;; non-DA-eligible key (F64) forces the HT path → group_ht_grow +
+;; group_ht_rehash (group.c L1482-1547).  Multi-agg drives the per-row
+;; HT probe so growth latency is amortised.
+(set Nrh 4096)
+(set Trh (table [k v] (list (as 'F64 (til Nrh)) (til Nrh))))
+(count (select {s: (sum v) c: (count v) from: Trh by: k})) -- Nrh
+(sum (at (select {s: (sum v) from: Trh by: k}) 's)) -- 8386560
+
+;; ────────────── 84. Empty/null group handling — agg input column is all-null ──────────────
+;; (chunk 6) When an agg input column has every row null, the
+;; insufficient-cnt path fires for every group → grp_set_null is called
+;; per group (group.c L1332/L2344) to mark NULL.
+(set Tnu (table [g v] (list ['x 'y 'x 'y] [0Nl 0Nl 0Nl 0Nl])))
+(count (select {s: (sum v) from: Tnu by: g})) -- 2
+;; sum of all-null grouped column: each group gets sum=0 (count==0 path).
+(sum (at (select {s: (sum v) from: Tnu by: g}) 's)) -- 0
+;; var/stddev with all-null group: result column has nulls or zeros.
+;; Test the count to verify the path runs; exact null-vs-zero depends
+;; on fast-path vs. full-eval — both are valid, both hit emit code.
+(count (select {v: (var v) sd: (stddev v) from: Tnu by: g})) -- 2
+
+;; Mixed-null agg input: one group has all nulls, another has values.
+(set Tnm (table [g v] (list ['a 'a 'b 'b] [0Nl 0Nl 1 2])))
+(count (select {s: (sum v) c: (count v) from: Tnm by: g})) -- 2
+;; group 'a: all nulls → sum=0; group 'b: sum=3
+(sum (at (select {s: (sum v) from: Tnm by: g}) 's)) -- 3
+
+;; ────────────── 85. Group HT growth on multi-key composite + F64 ──────────────
+;; F64 + I64 composite keys with cardinality > initial HT cap → forces
+;; both group_ht_grow (rows) and group_ht_rehash (slots) at scale.
+(set Nrh2 2000)
+(set Trh2 (table [a b v] (list (as 'F64 (til Nrh2)) (til Nrh2) (til Nrh2))))
+(count (select {s: (sum v) from: Trh2 by: [a b]})) -- Nrh2
+;; sum 0..1999 = 1999*2000/2
+(sum (at (select {s: (sum v) from: Trh2 by: [a b]}) 's)) -- 1999000
+
diff --git a/test/rfl/ops/query_coverage.rfl b/test/rfl/ops/query_coverage.rfl
index 98b8154c..fa52262d 100644
--- a/test/rfl/ops/query_coverage.rfl
+++ b/test/rfl/ops/query_coverage.rfl
@@ -834,3 +834,261 @@
 ;; Non-symbol key in list → type error (line 5342-5345).
 (left-join TJl TJr (list 1 2)) !- type
 (inner-join TJl TJr (list 'a 5)) !- type
+
+;; ====================================================================
+;; CHUNK 2: atom_broadcast_vec — query.c:2222-2301
+;;
+;; Triggered when DAG group-by produces all-literal non-agg columns:
+;;   - by-clause with a SCALAR (numeric/SYM) by-key (DAG path)
+;;   - one or more non-agg projection expressions are LITERAL ATOMS
+;;     with no NAME attr and no column refs
+;;   - all_broadcastable=1 → atom_broadcast_vec called per literal
+;;
+;; This is the Q35 fast path: `{one: 1, c: count(URL), by: URL}`-shape
+;; queries.  Each atom-type arm of the switch is exercised here.
+;; ====================================================================
+
+;; Setup: SYM by-key forces DAG path (NOT GUID/STR which go eval).
+(set TBcast (table [g v] (list ['a 'b 'a 'b 'a 'b] [10 20 30 40 50 60])))
+
+;; I64 literal broadcast (RAY_I64 arm at 2258-2264).
+;; Trigger: {one: 1 c: (count v) by: g}.  one=1 is I64 atom, count is agg.
+(count (at (select {one: 1 c: (count v) from: TBcast by: g}) 'one)) -- 2
+(at (at (select {one: 1 c: (count v) from: TBcast by: g}) 'one) 0) -- 1
+(at (at (select {one: 1 c: (count v) from: TBcast by: g}) 'one) 1) -- 1
+
+;; F64 literal broadcast (RAY_F64 arm at 2265-2270).
+(at (at (select {pi: 3.14 c: (count v) from: TBcast by: g}) 'pi) 0) -- 3.14
+(at (at (select {pi: 3.14 c: (count v) from: TBcast by: g}) 'pi) 1) -- 3.14
+
+;; SYM literal broadcast (RAY_SYM arm at 2271-2293).
+;; Width is selected from the sym ID magnitude, not a->attrs (atom-built
+;; syms never set the width attr).  Verifies value, not just length.
+(at (at (select {tag: 'hello c: (count v) from: TBcast by: g}) 'tag) 0) -- 'hello
+(at (at (select {tag: 'hello c: (count v) from: TBcast by: g}) 'tag) 1) -- 'hello
+
+;; BOOL literal broadcast (RAY_BOOL arm at 2239-2243, memset path).
+(at (at (select {flag: true c: (count v) from: TBcast by: g}) 'flag) 0) -- true
+(at (at (select {flag: true c: (count v) from: TBcast by: g}) 'flag) 1) -- true
+
+;; DATE literal broadcast (RAY_DATE arm at 2250-2256, int32 path).
+(at (at (select {d: 2024.06.15 c: (count v) from: TBcast by: g}) 'd) 0) -- 2024.06.15
+(at (at (select {d: 2024.06.15 c: (count v) from: TBcast by: g}) 'd) 1) -- 2024.06.15
+
+;; TIMESTAMP literal broadcast (RAY_TIMESTAMP arm at 2258-2264, int64 path).
+(at (at (select {ts: 2024.06.15D12:00:00.000 c: (count v) from: TBcast by: g}) 'ts) 0) -- 2024.06.15D12:00:00.000
+
+;; Multiple literals together — each goes through atom_broadcast_vec.
+(at (at (select {a: 7 b: 1.5 d: false s: 'mark n: (count v) from: TBcast by: g}) 'a) 0) -- 7
+(at (at (select {a: 7 b: 1.5 d: false s: 'mark n: (count v) from: TBcast by: g}) 'b) 0) -- 1.5
+(at (at (select {a: 7 b: 1.5 d: false s: 'mark n: (count v) from: TBcast by: g}) 's) 0) -- 'mark
+
+;; Numeric scalar by-key (I64) also drives DAG path.
+(set TBcastI (table [g v] (list [1 2 1 2 1 2] [10 20 30 40 50 60])))
+(at (at (select {k: 99 c: (count v) from: TBcastI by: g}) 'k) 0) -- 99
+(at (at (select {k: 99 c: (count v) from: TBcastI by: g}) 'k) 1) -- 99
+
+;; ====================================================================
+;; CHUNK 5: collect_col_refs dotted-name — query.c:1224-1228
+;;
+;; Reachability note: collect_col_refs is called from
+;; nonagg_eval_per_group_core (1296) and eval_expr_per_row (1443).
+;; The dotted branch fires when a SYM-with-NAME-attr is NOT a column
+;; but its head segment IS.  All known query-level paths to that
+;; branch go through the row-aligned scatter (gather_by_idx) instead,
+;; or hit a separate eval-binding bug with named lambdas + dotted
+;; refs (see UPDATE FN/dotted-ref combo).  Marking this branch as
+;; unreachable from query-level RFL fixtures today.
+;;
+;; The companion expr_refs_row_column dotted branch (1093-1097) IS
+;; covered below by direct dotted-ref non-agg.
+;; ====================================================================
+
+;; ====================================================================
+;; aggr_unary_per_group_buf computed-source path — query.c:1571-1576
+;;
+;; Triggered when:
+;;   - DAG path with by: and mixed agg+non-agg (n_nonaggs>0, by_expr set)
+;;   - a non-agg is a streaming-aggr-unary call (med/dev/var/...)
+;;   - the inner expression of that aggr is NOT a direct column ref
+;;     (so col_expr->type != -RAY_SYM with NAME attr)
+;;
+;; The eval branch (1571-1576) binds table cols and ray_evals the
+;; computed sub-expression to produce the source vec.
+;; ====================================================================
+
+;; med of computed expr (* v 2).  Mixed with sum to force scatter.
+(set TaggC (table [g v] (list ['a 'b 'a 'b 'a 'b 'a 'b] [1 2 3 4 5 6 7 8])))
+;; Group 'a vals=[1,3,5,7] → (* v 2)=[2,6,10,14] → med=8.0
+;; Group 'b vals=[2,4,6,8] → (* v 2)=[4,8,12,16] → med=10.0
+(count (at (select {s: (sum v) m: (med (* v 2)) from: TaggC by: g}) 'm)) -- 2
+(at (at (select {s: (sum v) m: (med (* v 2)) from: TaggC by: g}) 'm) 0) -- 8.0
+
+;; dev of computed expr — different streaming agg fn.
+(count (at (select {s: (sum v) d: (dev (+ v 10)) from: TaggC by: g}) 'd)) -- 2
+
+;; ====================================================================
+;; key_read_i64 narrow-int dispatch — query.c:2032-2050
+;;
+;; key_read_i64 is the type-aware key reader used by rgid_probe_fn (the
+;; PARALLEL probe at >=200K rows) AND by the SERIAL probe via the same
+;; dispatch.  The serial probe at line 5475 uses a KEY_READ macro that
+;; mirrors the same dispatch but inline; nonetheless tests that hit the
+;; serial path with non-I64 keys ensure the same correctness invariants.
+;;
+;; Each narrow-int arm exercised: I32, I16, U8, BOOL, F64, F32 keys.
+;; ====================================================================
+
+;; I32 key (line 2038-2040 arm).  Mixed agg+non-agg forces scatter path.
+(set TKi32 (table [g v] (list (as 'I32 [1 2 1 2 1 2 1 2]) [10 20 30 40 50 60 70 80])))
+(count (at (select {s: (sum v) lit: 9 from: TKi32 by: g}) 'lit)) -- 2
+
+;; I16 key (line 2037 arm).
+(set TKi16 (table [g v] (list (as 'I16 [1 2 1 2 1 2]) [10 20 30 40 50 60])))
+(count (at (select {s: (sum v) lit: 9 from: TKi16 by: g}) 'lit)) -- 2
+
+;; U8 key (line 2036 arm).
+(set TKu8 (table [g v] (list (as 'U8 [1 2 1 2 1 2]) [10 20 30 40 50 60])))
+(count (at (select {s: (sum v) lit: 9 from: TKu8 by: g}) 'lit)) -- 2
+
+;; BOOL key (line 2035 arm).
+(set TKbool (table [g v] (list [true false true false true false] [10 20 30 40 50 60])))
+(count (at (select {s: (sum v) lit: 9 from: TKbool by: g}) 'lit)) -- 2
+
+;; F64 key (line 2046-2048 arm).
+(set TKf64 (table [g v] (list (as 'F64 [1.0 2.0 1.0 2.0 1.0 2.0]) [10 20 30 40 50 60])))
+(count (at (select {s: (sum v) lit: 9 from: TKf64 by: g}) 'lit)) -- 2
+
+;; DATE key (line 2039 arm via I32 dispatch).
+(set TKdate (table [g v] (list [2024.01.01 2024.01.02 2024.01.01 2024.01.02] [10 20 30 40])))
+(count (at (select {s: (sum v) lit: 9 from: TKdate by: g}) 'lit)) -- 2
+
+;; TIMESTAMP key (line 2042 arm via I64 dispatch).
+(set TKts (table [g v] (list [2024.01.01D00:00:00.000 2024.06.01D00:00:00.000 2024.01.01D00:00:00.000 2024.06.01D00:00:00.000] [10 20 30 40])))
+(count (at (select {s: (sum v) lit: 9 from: TKts by: g}) 'lit)) -- 2
+
+;; ====================================================================
+;; expr_refs_row_column dotted-name branch — query.c:1093-1097
+;;
+;; The dotted-name walk inside expr_refs_row_column.  Triggered when:
+;;   - non-agg expr has a SYM with NAME attr but it's NOT a column
+;;   - the sym is dotted, head segment IS a column
+;;   - thus expr returns 1 (refs row column)
+;; This in turn drives the row-aligned scatter decision.
+;; ====================================================================
+
+;; Direct dotted ref as non-agg.  Routes through expr_refs_row_column,
+;; finds dotted ts.yyyy, head ts is a column → returns 1 → row-aligned
+;; scatter via gather_by_idx.
+(set TDot2 (table [g ts] (list ['a 'b 'a 'b] [2024.01.15D00:00:00.000 2024.03.20D00:00:00.000 2024.06.10D00:00:00.000 2024.09.05D00:00:00.000])))
+(count (at (select {s: (count ts) y: ts.yyyy from: TDot2 by: g}) 'y)) -- 2
+
+;; ====================================================================
+;; nonagg_eval_per_group_core — typed_vec demotion (query.c:1366-1378)
+;;
+;; First group returns a collapsable scalar of one type, subsequent
+;; group returns DIFFERENT type → typed-direct mode demotes to LIST
+;; via typed_vec_to_list.
+;;
+;; Use named lambda + GUID by-key (eval-level) so we go through
+;; nonagg_eval_per_group_core, not the DAG buf variant.  Lambda
+;; returns I64 for first group, F64 for second.
+;; ====================================================================
+
+(set my_first_typed (fn [v] (if (<= (first v) 5) (first v) (as 'F64 (first v)))))
+(set Gtypa (guid 1))
+(set Gtypb (guid 1))
+(set TTyp (table [G v] (list (concat (take Gtypa 3) (take Gtypb 3)) [1 2 3 10 20 30])))
+(count (at (select {r: (my_first_typed v) from: TTyp by: G}) 'r)) -- 2
+
+;; ====================================================================
+;; Empty-rows error scenarios — eval-level group with column-not-found
+;; ====================================================================
+
+;; by: list referencing nonexistent column → domain error.
+(set TEMPTY (table [a b] (list (til 5) (+ 100 (til 5)))))
+(select {by: [a nonexistent_col] from: TEMPTY})  !- domain
+(select {by: [nonexistent_col a] from: TEMPTY})  !- domain
+(select {by: [nonexistent_col]    from: TEMPTY}) !- domain
+
+;; ====================================================================
+;; nearest: take with negative atom — query.c:1623-1633 family.
+;; ====================================================================
+
+(set TE6 (table [id Vec] (list [0 1 2] (list [1.0 0.0] [0.0 1.0] [0.5 0.5]))))
+;; Negative take → domain error (k must be > 0).
+(select {from: TE6 nearest: (knn Vec [1.0 0.0]) take: -1}) !- domain
+
+;; ====================================================================
+;; Update with by: + non-existent agg result column type promotion
+;; ====================================================================
+
+(set TupBy (table [g v] (list ['a 'a 'b 'b 'c 'c] [1 2 3 4 5 6])))
+;; avg returns F64 even from I64 input — exercises broadcast back to F64.
+(set TupBy2 (update {a: (avg v) from: TupBy by: g}))
+(at (at TupBy2 'a) 0) -- 1.5
+(at (at TupBy2 'a) 5) -- 5.5
+
+;; min aggregator on F64 column.
+(set TupByF (table [g v] (list ['a 'a 'b 'b] (as 'F64 [1.5 2.5 3.5 4.5]))))
+(set TupByF2 (update {mn: (min v) from: TupByF by: g}))
+(at (at TupByF2 'mn) 0) -- 1.5
+(at (at TupByF2 'mn) 2) -- 3.5
+
+;; ====================================================================
+;; SYM W8 / W16 broadcast variants — atom_broadcast_vec SYM widths.
+;;
+;; A `(rand-sym 200 8)` etc. produces sym vecs of various widths.
+;; Wider sym vecs need >= 256/65536 distinct syms — too expensive to
+;; force here.  Default W32 is exercised above.  Skip narrower widths:
+;; their existence path is gated by the column's attrs, but the
+;; literal atom inside `{tag: 'foo}` always carries W32 attrs because
+;; rfl-level sym literals use the global pool's full width.
+;; ====================================================================
+
+;; ====================================================================
+;; nonagg lambda with multi-column refs — collect_col_refs deduplication
+;; query.c:1230 (the dedupe early-return when same sym already collected)
+;; ====================================================================
+
+;; Lambda referencing same column twice — exercises the dedupe path
+;; at line 1230 (for-loop returns n unchanged when duplicate found).
+(set my_dup (fn [a b] (+ (first a) (first b))))
+(set Tdup (table [G x] (list (take (guid 2) 6) [1 2 3 4 5 6])))
+;; Pass x twice → collect_col_refs called twice with x; second hit returns early.
+(count (at (select {r: (my_dup x x) from: Tdup by: G}) 'r)) -- 2
+
+;; ====================================================================
+;; Nested LIST in expression — collect_col_refs RAY_LIST recursion
+;; query.c:1235-1240
+;; ====================================================================
+
+;; Nested call: outer fn wraps inner fn, both reference column x.
+;; Recursive collect_col_refs handles nested LIST elements.
+(set my_outer (fn [a] (* (first a) 2)))
+(set my_combine (fn [a b] (+ (my_outer a) (first b))))
+(set Tnest (table [G x y] (list (take (guid 2) 6) [10 20 30 40 50 60] [1 2 3 4 5 6])))
+(count (at (select {r: (my_combine x y) from: Tnest by: G}) 'r)) -- 2
+
+;; ====================================================================
+;; eval-level group LIST/STR paths — multi-column non-key columns
+;; query.c:3636-3699 area.  Already partially covered; ensure both
+;; STR and SYM branches fire alongside numeric typed-vec branches.
+;; ====================================================================
+
+(set TGmix (table [G iv fv sv strc lst] (list (take (guid 2) 6) [10 20 30 40 50 60] (as 'F64 [1.5 2.5 3.5 4.5 5.5 6.5]) ['AAPL 'GOOG 'AAPL 'GOOG 'AAPL 'GOOG] (list "alpha" "beta" "gamma" "delta" "eps" "zeta") (list (list 1) (list 2) (list 3) (list 4) (list 5) (list 6)))))
+;; First-of-group fast path with all column types: I64, F64, SYM, STR, LIST.
+(count (select {from: TGmix by: G})) -- 2
+(count (at (select {from: TGmix by: G}) 'iv)) -- 2
+(count (at (select {from: TGmix by: G}) 'fv)) -- 2
+(count (at (select {from: TGmix by: G}) 'sv)) -- 2
+(count (at (select {from: TGmix by: G}) 'strc)) -- 2
+(count (at (select {from: TGmix by: G}) 'lst)) -- 2
+
+;; ====================================================================
+;; eval-level group with WHERE, multi-column non-key columns
+;; ====================================================================
+
+(set TGwhere (table [G v lst] (list (take (guid 3) 9) [10 20 30 40 50 60 70 80 90] (list (list 1) (list 2) (list 3) (list 4) (list 5) (list 6) (list 7) (list 8) (list 9)))))
+;; WHERE keeps subset; first-of-group + LIST gather still works.
+(count (select {from: TGwhere by: G where: (> v 30)})) -- 3
diff --git a/test/rfl/strop/like.rfl b/test/rfl/strop/like.rfl
index 5495cfc9..a1c8e6a5 100644
--- a/test/rfl/strop/like.rfl
+++ b/test/rfl/strop/like.rfl
@@ -87,3 +87,88 @@
 (count (select {from: Tlike where: (like s "h*")})) -- 3
 (count (select {from: Tlike where: (like s "h?llo")})) -- 1
 (count (select {from: Tlike where: (like s "[hw]*")})) -- 4
+
+;; ────────────── chunk 7: glob compile shape branches ──────────────
+;; Each shape exercises a distinct dispatch case in ray_glob_match_compiled.
+(like "abc" "abc")        -- true   ;; SHAPE_EXACT
+(like "abc" "abd")        -- false  ;; SHAPE_EXACT mismatch
+(like "abcdef" "abc*")    -- true   ;; SHAPE_PREFIX hit
+(like "abx"    "abc*")    -- false  ;; SHAPE_PREFIX miss
+(like "abcdef" "*def")    -- true   ;; SHAPE_SUFFIX hit
+(like "abcdex" "*def")    -- false  ;; SHAPE_SUFFIX miss
+(like "fooBARbaz" "*BAR*") -- true  ;; SHAPE_CONTAINS hit (memmem)
+(like "foobaz"    "*BAR*") -- false ;; SHAPE_CONTAINS miss
+(like "anything" "*")     -- true   ;; SHAPE_ANY (lit_len==0, both stars)
+(like ""         "*")     -- true   ;; SHAPE_ANY empty input
+(like "a"        "**")    -- true   ;; SHAPE_NONE — interior '*' forces general matcher
+(like "abc"      "a*c")   -- true   ;; SHAPE_NONE — interior '*' forces general matcher
+(like "abc"      "a?c")   -- true   ;; SHAPE_NONE — '?' forces general matcher
+(like "abc"      "[a]bc") -- true   ;; SHAPE_NONE — '[' forces general matcher
+
+;; ────────────── chunk 8: match_class edge cases ──────────────
+;; Sole `-` in a class (must match literal '-').
+(like "a-z" "a[-]z")      -- true
+;; Embedded `]` first → literal ']' allowed (after optional '!').
+(like "a]b" "a[]]b")      -- true
+;; `[!]]` — negation with `]` immediately after.
+(like "a]b" "a[!]]b")     -- false
+(like "axb" "a[!]]b")     -- true
+;; Range plus single chars in same class.
+(like "k"   "[abck]")     -- true
+(like "z"   "[abck]")     -- false
+;; Hyphen at end of class (treated as literal).
+(like "a-" "a[a-]")       -- true
+;; Bracket class with literal special chars (escape via class).
+(like "*"  "[*]")         -- true
+(like "?"  "[?]")         -- true
+
+;; ────────────── chunk 9: ILIKE — case-insensitive eval-via-select ──────────────
+;; ILIKE is registered only as a DAG/query op; surface via select-where.
+(set Til (table [s] (list ["Apple" "BANANA" "cherry" "Apricot"])))
+(count (select {from: Til where: (ilike s "a*")}))     -- 2  ;; SHAPE_PREFIX, ci
+(count (select {from: Til where: (ilike s "*RY")}))    -- 1  ;; SHAPE_SUFFIX, ci
+(count (select {from: Til where: (ilike s "*an*")}))   -- 1  ;; SHAPE_CONTAINS, ci ("BANANA")
+(count (select {from: Til where: (ilike s "BANANA")})) -- 1  ;; SHAPE_EXACT, ci
+(count (select {from: Til where: (ilike s "?pple")}))  -- 1  ;; ?, ci
+(count (select {from: Til where: (ilike s "[a-z]*")})) -- 4  ;; class+star, all ci-fold to lower
+(count (select {from: Til where: (ilike s "[A-Z]pple")})) -- 1 ;; class range, ci
+
+;; ────────────── chunk 6: ILIKE on SYM column (dict-cache LUT) ──────────────
+;; exec_ilike SYM-vec branch (string.c:565-579): seen[]/lut[] short-circuits
+;; per-row matcher when sym_id repeats.
+(set TilS (table [s] (list ['Foo 'Bar 'foo 'BAR 'Baz])))
+(count (select {from: TilS where: (ilike s "foo")}))  -- 2  ;; 'Foo + 'foo
+(count (select {from: TilS where: (ilike s "bar")}))  -- 2  ;; 'Bar + 'BAR
+(count (select {from: TilS where: (ilike s "ba*")}))  -- 3  ;; 'Bar 'BAR 'Baz
+(count (select {from: TilS where: (ilike s "*OO")}))  -- 2  ;; suffix ci
+
+;; ────────────── chunk 3: rowsel-aware exec_like (chained where+like) ──────────────
+;; exec_like with g->selection set by an earlier conjunct hits the
+;; selection-aware branch in like_seen_fn / like_proj_fn.
+;; Cheap conjunct (>) is sorted before LIKE, so by the time exec_like
+;; runs g->selection is a refined rowsel.
+(set Tcs (table [s v] (list ['Apple 'Banana 'Cherry 'Apricot 'Berry] [1 2 3 4 5])))
+(count (select {from: Tcs where: (and (> v 1) (like s "B*"))}))     -- 2  ;; v>1 then B*
+(count (select {from: Tcs where: (and (> v 0) (like s "*err*"))}))  -- 2  ;; 'Berry 'Cherry
+(count (select {from: Tcs where: (and (> v 0) (ilike s "*err*"))})) -- 2  ;; same — already lowercase err
+(count (select {from: Tcs where: (and (>= v 3) (like s "[BC]*"))})) -- 2  ;; class via rowsel-aware
+
+;; ────────────── chunk 4: ILIKE on STR vector (string.c:545-552) ──────────────
+;; STR-vec branch of exec_ilike — different from SYM-vec branch above.
+(set TilStr (table [s] (list ["Hello" "WORLD" "hi" "Help" "help"])))
+(count (select {from: TilStr where: (ilike s "h*")}))    -- 4  ;; ci prefix
+(count (select {from: TilStr where: (ilike s "*P")}))    -- 2  ;; ci suffix ("Help","help")
+(count (select {from: TilStr where: (ilike s "*ELL*")})) -- 1  ;; ci contains ("Hello")
+(count (select {from: TilStr where: (ilike s "WORLD")})) -- 1  ;; ci exact
+(count (select {from: TilStr where: (ilike s "h?")}))    -- 1  ;; ci '?' — only "hi"
+(count (select {from: TilStr where: (ilike s "[h-w]*")})) -- 5 ;; ci class range
+
+;; ────────────── empty-input matches against shapes ──────────────
+;; SHAPE_PREFIX/SUFFIX/CONTAINS with literal — empty input fails when lit_len>0.
+(like "" "abc")   -- false   ;; SHAPE_EXACT, sn=0 vs lit_len=3
+(like "" "a*")    -- false   ;; SHAPE_PREFIX, sn=0 < lit_len
+(like "" "*a")    -- false   ;; SHAPE_SUFFIX, sn=0 < lit_len
+(like "" "*a*")   -- false   ;; SHAPE_CONTAINS, sn=0 < lit_len
+
+;; SHAPE_PREFIX with empty lit ("`*`" alone is SHAPE_ANY, not PREFIX, so use trailing only)
+;; — covered by `(like "" "*")` above (SHAPE_ANY).
diff --git a/test/test_fused_group.c b/test/test_fused_group.c
index 6b067d4d..e5e29095 100644
--- a/test/test_fused_group.c
+++ b/test/test_fused_group.c
@@ -425,6 +425,657 @@ static test_result_t test_count1_rejects_nullable_key(void) {
     PASS();
 }
 
+/* ─────────────────────────────────────────────────────────────────────
+ *  Coverage chunks for fused_group.c
+ * ──────────────────────────────────────────────────────────────────── */
+
+/* Chunk 8: fp_compile_cmp out-of-range fold — exercise EQ/NE/LT/LE/GT/GE
+ * × narrow types (U8/I16/I32) with the constant outside the column's
+ * representable range.  Each call routes through the FP_FOLD_TRUE /
+ * FP_FOLD_FALSE branch in fp_compile_cmp (fused_group.c:471-481), which
+ * memsets the bits in fp_eval_cmp via the `if (p->fold)` early return.
+ *
+ * Test grid:
+ *   U8 col with value -1 (below): LT → all-false (col < below ⇒ false),
+ *                                  GT → all-true.
+ *   U8 col with value 300 (above): LE → all-true, GE → all-false.
+ *   I16 col with value 100000 (above INT16_MAX): LT → all-true, GE → all-false.
+ *   I32 col with value 5e9 (above INT32_MAX): EQ → 0 rows, NE → all rows. */
+static test_result_t test_fold_u8_lt_below(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    ray_t* tbl = make_adv_u8_table();  /* values {0,1,0,2,0,1,44} */
+    TEST_ASSERT_NOT_NULL(tbl);
+
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_k    = ray_scan(g, "AdvU8");
+    ray_op_t* scan_pred = ray_scan(g, "AdvU8");
+    ray_op_t* below     = ray_const_i64(g, -1);  /* below U8 min */
+    ray_op_t* pred      = ray_binop(g, OP_LT, scan_pred, below);
+    uint16_t  agg_ops[] = { OP_COUNT };
+    ray_op_t* agg_ins[] = { scan_k };
+    ray_op_t* keys[]    = { scan_k };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 1);
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    /* col < (below_min) ⇒ FOLD_FALSE → no rows match → empty result. */
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), 0);
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
+static test_result_t test_fold_u8_gt_below(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    ray_t* tbl = make_adv_u8_table();
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_k    = ray_scan(g, "AdvU8");
+    ray_op_t* scan_pred = ray_scan(g, "AdvU8");
+    ray_op_t* below     = ray_const_i64(g, -1);
+    ray_op_t* pred      = ray_binop(g, OP_GT, scan_pred, below);  /* col > -1 ⇒ FOLD_TRUE */
+    uint16_t  agg_ops[] = { OP_COUNT };
+    ray_op_t* agg_ins[] = { scan_k };
+    ray_op_t* keys[]    = { scan_k };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 1);
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    /* All 7 rows match.  4 distinct keys: {0:3, 1:2, 2:1, 44:1}. */
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), 4);
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
+static test_result_t test_fold_u8_le_above(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    ray_t* tbl = make_adv_u8_table();
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_k    = ray_scan(g, "AdvU8");
+    ray_op_t* scan_pred = ray_scan(g, "AdvU8");
+    ray_op_t* above     = ray_const_i64(g, 300);
+    ray_op_t* pred      = ray_binop(g, OP_LE, scan_pred, above);  /* col <= 300 ⇒ FOLD_TRUE */
+    uint16_t  agg_ops[] = { OP_COUNT };
+    ray_op_t* agg_ins[] = { scan_k };
+    ray_op_t* keys[]    = { scan_k };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 1);
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), 4);
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
+static test_result_t test_fold_u8_ge_above(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    ray_t* tbl = make_adv_u8_table();
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_k    = ray_scan(g, "AdvU8");
+    ray_op_t* scan_pred = ray_scan(g, "AdvU8");
+    ray_op_t* above     = ray_const_i64(g, 300);
+    ray_op_t* pred      = ray_binop(g, OP_GE, scan_pred, above);  /* col >= 300 ⇒ FOLD_FALSE */
+    uint16_t  agg_ops[] = { OP_COUNT };
+    ray_op_t* agg_ins[] = { scan_k };
+    ray_op_t* keys[]    = { scan_k };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 1);
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), 0);
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
+static test_result_t test_fold_i16_lt_above(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    ray_t* col = ray_vec_new(RAY_I16, 5);
+    col->len = 5;
+    int16_t v[] = {-2, -1, 0, 1, 2};
+    memcpy(ray_data(col), v, sizeof(v));
+    int64_t k_sym = ray_sym_intern("k", 1);
+    ray_t* tbl = ray_table_new(1);
+    tbl = ray_table_add_col(tbl, k_sym, col); ray_release(col);
+
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_k    = ray_scan(g, "k");
+    ray_op_t* scan_pred = ray_scan(g, "k");
+    ray_op_t* above     = ray_const_i64(g, 100000);  /* > INT16_MAX */
+    ray_op_t* pred      = ray_binop(g, OP_LT, scan_pred, above);  /* FOLD_TRUE */
+    uint16_t  agg_ops[] = { OP_COUNT };
+    ray_op_t* agg_ins[] = { scan_k };
+    ray_op_t* keys[]    = { scan_k };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 1);
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    /* All 5 rows pass — 5 distinct I16 keys. */
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), 5);
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
+static test_result_t test_fold_i32_eq_above(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    ray_t* col = ray_vec_new(RAY_I32, 4);
+    col->len = 4;
+    int32_t v[] = {1, 2, 3, 4};
+    memcpy(ray_data(col), v, sizeof(v));
+    int64_t k_sym = ray_sym_intern("k", 1);
+    ray_t* tbl = ray_table_new(1);
+    tbl = ray_table_add_col(tbl, k_sym, col); ray_release(col);
+
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_k    = ray_scan(g, "k");
+    ray_op_t* scan_pred = ray_scan(g, "k");
+    ray_op_t* above     = ray_const_i64(g, 5000000000LL);  /* > INT32_MAX */
+    ray_op_t* pred      = ray_binop(g, OP_EQ, scan_pred, above);  /* FOLD_FALSE */
+    uint16_t  agg_ops[] = { OP_COUNT };
+    ray_op_t* agg_ins[] = { scan_k };
+    ray_op_t* keys[]    = { scan_k };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 1);
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), 0);
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
+static test_result_t test_fold_i32_ne_above(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    ray_t* col = ray_vec_new(RAY_I32, 4);
+    col->len = 4;
+    int32_t v[] = {0, 0, 1, 1};
+    memcpy(ray_data(col), v, sizeof(v));
+    int64_t k_sym = ray_sym_intern("k", 1);
+    ray_t* tbl = ray_table_new(1);
+    tbl = ray_table_add_col(tbl, k_sym, col); ray_release(col);
+
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_k    = ray_scan(g, "k");
+    ray_op_t* scan_pred = ray_scan(g, "k");
+    ray_op_t* above     = ray_const_i64(g, 5000000000LL);
+    ray_op_t* pred      = ray_binop(g, OP_NE, scan_pred, above);  /* FOLD_TRUE */
+    uint16_t  agg_ops[] = { OP_COUNT };
+    ray_op_t* agg_ins[] = { scan_k };
+    ray_op_t* keys[]    = { scan_k };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 1);
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    /* All rows pass; 2 distinct I32 keys. */
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), 2);
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
+/* Chunk 1: multi-agg / multi-key path — fused_group.c:2475-2518.
+ *
+ * Two keys (g1, g2) of total width <= 8 bytes (narrow path) and three
+ * aggregates (COUNT, SUM, AVG).  This routes through
+ * exec_filtered_group_multi → mk_compile → mk_par_fn → wide=0 path;
+ * and triggers mk_state_merge in the global combine via AVG fold. */
+static test_result_t test_multi_agg_multi_key(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    /* g1 (I32) + g2 (I32) → 8 bytes total → narrow path. */
+    ray_t* g1c = ray_vec_new(RAY_I32, 8); g1c->len = 8;
+    ray_t* g2c = ray_vec_new(RAY_I32, 8); g2c->len = 8;
+    ray_t* vc  = ray_vec_new(RAY_I64, 8); vc->len  = 8;
+    int32_t g1[] = {1, 1, 2, 2, 1, 1, 2, 2};
+    int32_t g2[] = {1, 2, 1, 2, 1, 2, 1, 2};
+    int64_t v[]  = {10, 20, 30, 40, 50, 60, 70, 80};
+    memcpy(ray_data(g1c), g1, sizeof(g1));
+    memcpy(ray_data(g2c), g2, sizeof(g2));
+    memcpy(ray_data(vc),  v,  sizeof(v));
+
+    int64_t s_g1 = ray_sym_intern("g1", 2);
+    int64_t s_g2 = ray_sym_intern("g2", 2);
+    int64_t s_v  = ray_sym_intern("v",  1);
+    ray_t* tbl = ray_table_new(3);
+    tbl = ray_table_add_col(tbl, s_g1, g1c); ray_release(g1c);
+    tbl = ray_table_add_col(tbl, s_g2, g2c); ray_release(g2c);
+    tbl = ray_table_add_col(tbl, s_v,  vc);  ray_release(vc);
+
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_g1 = ray_scan(g, "g1");
+    ray_op_t* scan_g2 = ray_scan(g, "g2");
+    ray_op_t* scan_v  = ray_scan(g, "v");
+    ray_op_t* scan_g1p= ray_scan(g, "g1");
+    ray_op_t* zero    = ray_const_i64(g, 0);
+    ray_op_t* pred    = ray_binop(g, OP_GE, scan_g1p, zero);  /* always-true */
+
+    uint16_t  agg_ops[] = { OP_COUNT, OP_SUM, OP_AVG };
+    ray_op_t* agg_ins[] = { scan_v, scan_v, scan_v };
+    ray_op_t* keys[]    = { scan_g1, scan_g2 };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 2, agg_ops, agg_ins, 3);
+    TEST_ASSERT_NOT_NULL(fused);
+
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    /* 4 distinct (g1, g2) pairs. */
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), 4);
+
+    /* Total of all sums = sum of all v = 360.  Total of all counts = 8. */
+    int64_t sum_sym = ray_sym_intern("sum", 3);
+    int64_t cnt_sym = ray_sym_intern("count", 5);
+    ray_t* sum_col = ray_table_get_col(res, sum_sym);
+    ray_t* cnt_col = ray_table_get_col(res, cnt_sym);
+    int64_t total_sum = 0, total_cnt = 0;
+    for (int64_t i = 0; i < ray_table_nrows(res); i++) {
+        total_sum += ((int64_t*)ray_data(sum_col))[i];
+        total_cnt += ((int64_t*)ray_data(cnt_col))[i];
+    }
+    TEST_ASSERT_EQ_I(total_sum, 360);
+    TEST_ASSERT_EQ_I(total_cnt, 8);
+
+    /* AVG column is RAY_F64.  Sum of all per-group averages × count
+     * weights should equal total_sum.  Cheaper sanity: each avg is in
+     * [10,80].  (Use ncols loop because avg may not be by sym=avg if
+     * disambiguation differs.) */
+    int64_t avg_sym = ray_sym_intern("avg", 3);
+    ray_t* avg_col = ray_table_get_col(res, avg_sym);
+    TEST_ASSERT_NOT_NULL(avg_col);
+    TEST_ASSERT_EQ_I(avg_col->type, RAY_F64);
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
+/* Chunk 2 + 3 + 4: wide multi-key path (mk_shard_grow wide branch,
+ * mk_compose_key2, mk_hash_lo_hi, mk_state_merge AVG).
+ *
+ * Three I32 keys → 12 bytes total > 8 → wide=1 path.  Use distinct
+ * key triples so the HT has many entries; with 100K rows we cross
+ * FP_COMBINE_PAR_MIN to also trigger the parallel combine (chunk 5
+ * hist/scat/dedup wide branches). */
+static test_result_t test_wide_multi_key(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    /* 100,000 rows, each (i % 100, i % 200, i % 50) → triple key.
+     * 100 * 200 * 50 = 1,000,000 possible distinct keys but constrained
+     * by row count, so we get plenty of distinct buckets to grow shards. */
+    int64_t N = 100000;
+    ray_t* k1c = ray_vec_new(RAY_I32, N); k1c->len = N;
+    ray_t* k2c = ray_vec_new(RAY_I32, N); k2c->len = N;
+    ray_t* k3c = ray_vec_new(RAY_I32, N); k3c->len = N;
+    ray_t* vc  = ray_vec_new(RAY_I64, N); vc->len  = N;
+    int32_t* k1 = (int32_t*)ray_data(k1c);
+    int32_t* k2 = (int32_t*)ray_data(k2c);
+    int32_t* k3 = (int32_t*)ray_data(k3c);
+    int64_t* v  = (int64_t*)ray_data(vc);
+    for (int64_t i = 0; i < N; i++) {
+        k1[i] = (int32_t)(i % 100);
+        k2[i] = (int32_t)(i % 200);
+        k3[i] = (int32_t)(i % 50);
+        v[i]  = i + 1;
+    }
+
+    int64_t s_k1 = ray_sym_intern("k1", 2);
+    int64_t s_k2 = ray_sym_intern("k2", 2);
+    int64_t s_k3 = ray_sym_intern("k3", 2);
+    int64_t s_v  = ray_sym_intern("v",  1);
+    ray_t* tbl = ray_table_new(4);
+    tbl = ray_table_add_col(tbl, s_k1, k1c); ray_release(k1c);
+    tbl = ray_table_add_col(tbl, s_k2, k2c); ray_release(k2c);
+    tbl = ray_table_add_col(tbl, s_k3, k3c); ray_release(k3c);
+    tbl = ray_table_add_col(tbl, s_v,  vc);  ray_release(vc);
+
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_k1 = ray_scan(g, "k1");
+    ray_op_t* scan_k2 = ray_scan(g, "k2");
+    ray_op_t* scan_k3 = ray_scan(g, "k3");
+    ray_op_t* scan_v  = ray_scan(g, "v");
+    ray_op_t* scan_k1p= ray_scan(g, "k1");
+    ray_op_t* zero    = ray_const_i64(g, 0);
+    /* Non-trivial WHERE: filter half the rows. */
+    ray_op_t* pred    = ray_binop(g, OP_GE, scan_k1p, zero);
+
+    /* COUNT + AVG ⇒ AVG state contributes 2 ints per slot (sum + cnt),
+     * exercising mk_state_merge AVG branch in the parallel combine. */
+    uint16_t  agg_ops[] = { OP_COUNT, OP_AVG };
+    ray_op_t* agg_ins[] = { scan_v, scan_v };
+    ray_op_t* keys[]    = { scan_k1, scan_k2, scan_k3 };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 3, agg_ops, agg_ins, 2);
+    TEST_ASSERT_NOT_NULL(fused);
+
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    int64_t nrows = ray_table_nrows(res);
+    /* Number of distinct (k1, k2, k3) triples is bounded by lcm-derived
+     * count.  With i=0..99999, gcd(100,200)=100, gcd(100,50)=50,
+     * gcd(200,50)=50.  Just sanity: result has multiple groups, total
+     * count = N. */
+    TEST_ASSERT_TRUE(nrows > 100);
+
+    int64_t cnt_sym = ray_sym_intern("count", 5);
+    ray_t* cnt_col = ray_table_get_col(res, cnt_sym);
+    int64_t total_cnt = 0;
+    for (int64_t i = 0; i < nrows; i++)
+        total_cnt += ((int64_t*)ray_data(cnt_col))[i];
+    TEST_ASSERT_EQ_I(total_cnt, N);
+
+    /* AVG result column should be F64. */
+    int64_t avg_sym = ray_sym_intern("avg", 3);
+    ray_t* avg_col = ray_table_get_col(res, avg_sym);
+    TEST_ASSERT_NOT_NULL(avg_col);
+    TEST_ASSERT_EQ_I(avg_col->type, RAY_F64);
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
+/* Chunk 7: count1 parallel combine path — fp_combine_hist_fn /
+ * _scat_fn / _dedup_fn (fused_group.c:710-836).
+ *
+ * Drive enough distinct keys past FP_COMBINE_PAR_MIN (50000) so the
+ * parallel combine fires.  Single key + single COUNT routes through
+ * exec_filtered_group_count1 → fp_combine_and_materialize → parallel
+ * combine fork. */
+static test_result_t test_count1_parallel_combine(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    /* 200,000 rows with 100,000 distinct I64 keys (i / 2). */
+    int64_t N = 200000;
+    ray_t* kc = ray_vec_new(RAY_I64, N); kc->len = N;
+    int64_t* k = (int64_t*)ray_data(kc);
+    for (int64_t i = 0; i < N; i++) k[i] = i / 2;
+    int64_t s_k = ray_sym_intern("k", 1);
+    ray_t* tbl = ray_table_new(1);
+    tbl = ray_table_add_col(tbl, s_k, kc); ray_release(kc);
+
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_k    = ray_scan(g, "k");
+    ray_op_t* scan_pred = ray_scan(g, "k");
+    ray_op_t* zero      = ray_const_i64(g, 0);
+    /* Non-trivial WHERE that admits everything (≥ 0). */
+    ray_op_t* pred      = ray_binop(g, OP_GE, scan_pred, zero);
+    uint16_t  agg_ops[] = { OP_COUNT };
+    ray_op_t* agg_ins[] = { scan_k };
+    ray_op_t* keys[]    = { scan_k };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 1);
+
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    /* 100,000 distinct keys. */
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), 100000);
+
+    int64_t cnt_sym = ray_sym_intern("count", 5);
+    ray_t* cnt_col = ray_table_get_col(res, cnt_sym);
+    int64_t total = 0;
+    for (int64_t i = 0; i < ray_table_nrows(res); i++)
+        total += ((int64_t*)ray_data(cnt_col))[i];
+    TEST_ASSERT_EQ_I(total, N);
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
+/* Chunk 6: fp_shard_grow — drive the count1 shard past initial cap so
+ * fp_shard_grow runs at least once on each worker.
+ *
+ * INIT_CAP = 1024; load >= 0.5 triggers grow.  With 10,000 distinct
+ * keys spread across workers, every shard will grow several times. */
+static test_result_t test_count1_shard_grow(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    int64_t N = 30000;
+    ray_t* kc = ray_vec_new(RAY_I64, N); kc->len = N;
+    int64_t* k = (int64_t*)ray_data(kc);
+    for (int64_t i = 0; i < N; i++) k[i] = i;  /* all distinct */
+    int64_t s_k = ray_sym_intern("k", 1);
+    ray_t* tbl = ray_table_new(1);
+    tbl = ray_table_add_col(tbl, s_k, kc); ray_release(kc);
+
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_k    = ray_scan(g, "k");
+    ray_op_t* scan_pred = ray_scan(g, "k");
+    ray_op_t* zero      = ray_const_i64(g, 0);
+    ray_op_t* pred      = ray_binop(g, OP_GE, scan_pred, zero);
+    uint16_t  agg_ops[] = { OP_COUNT };
+    ray_op_t* agg_ins[] = { scan_k };
+    ray_op_t* keys[]    = { scan_k };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 1);
+
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), N);
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
+/* Multi-agg + AND predicate to exercise fp_eval_pred multi-child branch
+ * (n_children > 1) and AND-of-cmps compile path
+ * (fp_compile_pred_dag recursion). */
+static test_result_t test_multi_agg_and_pred(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    int64_t N = 200;
+    ray_t* gc = ray_vec_new(RAY_I64, N); gc->len = N;
+    ray_t* vc = ray_vec_new(RAY_I64, N); vc->len = N;
+    int64_t* g_data = (int64_t*)ray_data(gc);
+    int64_t* v_data = (int64_t*)ray_data(vc);
+    for (int64_t i = 0; i < N; i++) {
+        g_data[i] = i % 4;     /* 4 groups */
+        v_data[i] = i + 1;
+    }
+    int64_t s_g = ray_sym_intern("g", 1);
+    int64_t s_v = ray_sym_intern("v", 1);
+    ray_t* tbl = ray_table_new(2);
+    tbl = ray_table_add_col(tbl, s_g, gc); ray_release(gc);
+    tbl = ray_table_add_col(tbl, s_v, vc); ray_release(vc);
+
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_g  = ray_scan(g, "g");
+    ray_op_t* scan_v  = ray_scan(g, "v");
+    ray_op_t* scan_vp = ray_scan(g, "v");
+    ray_op_t* scan_vp2= ray_scan(g, "v");
+    ray_op_t* lo      = ray_const_i64(g, 50);
+    ray_op_t* hi      = ray_const_i64(g, 150);
+    ray_op_t* p1      = ray_binop(g, OP_GE, scan_vp, lo);
+    ray_op_t* p2      = ray_binop(g, OP_LE, scan_vp2, hi);
+    ray_op_t* pred    = ray_binop(g, OP_AND, p1, p2);
+
+    /* MIN + MAX → exercise both MIN_INT64/MAX_INT64 init in mk_par_fn,
+     * and mk_state_merge MIN/MAX branches. */
+    uint16_t  agg_ops[] = { OP_MIN, OP_MAX, OP_SUM };
+    ray_op_t* agg_ins[] = { scan_v, scan_v, scan_v };
+    ray_op_t* keys[]    = { scan_g };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 3);
+
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    /* 4 groups, all should have at least one row in [50, 150]. */
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), 4);
+
+    int64_t min_sym = ray_sym_intern("min", 3);
+    int64_t max_sym = ray_sym_intern("max", 3);
+    ray_t* min_col = ray_table_get_col(res, min_sym);
+    ray_t* max_col = ray_table_get_col(res, max_sym);
+    TEST_ASSERT_NOT_NULL(min_col);
+    TEST_ASSERT_NOT_NULL(max_col);
+    /* Every per-group min must be ≥ 50 and every per-group max ≤ 150. */
+    for (int64_t i = 0; i < 4; i++) {
+        int64_t mn = ((int64_t*)ray_data(min_col))[i];
+        int64_t mx = ((int64_t*)ray_data(max_col))[i];
+        TEST_ASSERT_TRUE(mn >= 50);
+        TEST_ASSERT_TRUE(mx <= 150);
+    }
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
+/* Multi-agg path with BOOL/U8 inputs to exercise the in_unsigned=1
+ * branch in mk_compile (line 2431).  Use SUM over BOOL (which counts
+ * trues) and over U8 (sum of all values). */
+static test_result_t test_multi_agg_unsigned_inputs(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    int64_t N = 50;
+    ray_t* gc  = ray_vec_new(RAY_I64,  N); gc->len  = N;
+    ray_t* bc  = ray_vec_new(RAY_BOOL, N); bc->len  = N;
+    ray_t* uc  = ray_vec_new(RAY_U8,   N); uc->len  = N;
+    int64_t* gd = (int64_t*)ray_data(gc);
+    uint8_t* bd = (uint8_t*)ray_data(bc);
+    uint8_t* ud = (uint8_t*)ray_data(uc);
+    int64_t total_b = 0, total_u = 0;
+    for (int64_t i = 0; i < N; i++) {
+        gd[i] = i % 2;
+        bd[i] = (uint8_t)(i & 1);
+        ud[i] = (uint8_t)(i % 200);
+        total_b += bd[i];
+        total_u += ud[i];
+    }
+    int64_t s_g = ray_sym_intern("g", 1);
+    int64_t s_b = ray_sym_intern("b", 1);
+    int64_t s_u = ray_sym_intern("u", 1);
+    ray_t* tbl = ray_table_new(3);
+    tbl = ray_table_add_col(tbl, s_g, gc); ray_release(gc);
+    tbl = ray_table_add_col(tbl, s_b, bc); ray_release(bc);
+    tbl = ray_table_add_col(tbl, s_u, uc); ray_release(uc);
+
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_g  = ray_scan(g, "g");
+    ray_op_t* scan_b  = ray_scan(g, "b");
+    ray_op_t* scan_u  = ray_scan(g, "u");
+    ray_op_t* scan_gp = ray_scan(g, "g");
+    ray_op_t* zero    = ray_const_i64(g, 0);
+    ray_op_t* pred    = ray_binop(g, OP_GE, scan_gp, zero);  /* WHERE that always passes */
+    uint16_t  agg_ops[] = { OP_SUM, OP_SUM };
+    ray_op_t* agg_ins[] = { scan_b, scan_u };
+    ray_op_t* keys[]    = { scan_g };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 2);
+
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), 2);
+
+    /* Total of all sums across both groups equals total_b and total_u. */
+    int64_t sum_sym = ray_sym_intern("sum", 3);
+    /* There are two columns named "sum" — fetch by index. */
+    int64_t cnt = ray_table_ncols(res);
+    int64_t got_b = 0, got_u = 0;
+    int seen_first_sum = 0;
+    for (int64_t c = 0; c < cnt; c++) {
+        ray_t* col = ray_table_get_col_idx(res, c);
+        if (!col || col->type != RAY_I64) continue;
+        if (col->len != 2) continue;
+        /* Skip the key column g (which is also I64 with len 2). */
+        int64_t a = ((int64_t*)ray_data(col))[0];
+        int64_t b = ((int64_t*)ray_data(col))[1];
+        if (a == 0 && b == 1) continue;  /* key column: g has values {0,1} */
+        if (!seen_first_sum) { got_b = a + b; seen_first_sum = 1; }
+        else                  { got_u = a + b; }
+    }
+    (void)sum_sym;
+    TEST_ASSERT_EQ_I(got_b, total_b);
+    TEST_ASSERT_EQ_I(got_u, total_u);
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
+/* SYM key + count1 to cover the SYM-id path inside fp_par_fn: walks
+ * through ray_sym_elem_size and read_by_esz with W32-width SYM. */
+static test_result_t test_count1_sym_key_w32(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    int64_t N = 100;
+    /* RAY_SYM_W32 column. */
+    ray_t* sc = ray_sym_vec_new(RAY_SYM_W32, N);
+    sc->len = N;
+    /* Three distinct symbols. */
+    int64_t s_a = ray_sym_intern("alpha", 5);
+    int64_t s_b = ray_sym_intern("beta",  4);
+    int64_t s_c = ray_sym_intern("gamma", 5);
+    int64_t syms[3] = { s_a, s_b, s_c };
+    int32_t* d = (int32_t*)ray_data(sc);
+    for (int64_t i = 0; i < N; i++) d[i] = (int32_t)syms[i % 3];
+    int64_t s_k = ray_sym_intern("k", 1);
+    ray_t* tbl = ray_table_new(1);
+    tbl = ray_table_add_col(tbl, s_k, sc); ray_release(sc);
+
+    ray_graph_t* g = ray_graph_new(tbl);
+    ray_op_t* scan_k    = ray_scan(g, "k");
+    ray_op_t* scan_pred = ray_scan(g, "k");
+    ray_op_t* aatm      = ray_const_i64(g, s_a);
+    /* Construct atom of -RAY_SYM via ray_const_atom is harder; use NE
+     * against a sym-id that's encoded as I64 — but the column is SYM,
+     * which requires atom_type compatibility check.  Use OP_EQ against
+     * a known sym id by stashing a SYM atom.  Simpler: bypass and do
+     * a no-WHERE direct group via no-pred path... but the brief says
+     * every fused-path test MUST include a non-trivial WHERE.  Use
+     * a different non-SYM column for the predicate. */
+    (void)aatm; (void)scan_pred;
+    /* Add a numeric `sel` column to use as predicate. */
+    ray_t* selc = ray_vec_new(RAY_I64, N); selc->len = N;
+    int64_t* sd = (int64_t*)ray_data(selc);
+    for (int64_t i = 0; i < N; i++) sd[i] = i;
+    int64_t s_sel = ray_sym_intern("sel", 3);
+    tbl = ray_table_add_col(tbl, s_sel, selc); ray_release(selc);
+
+    ray_graph_free(g);
+    g = ray_graph_new(tbl);
+    scan_k = ray_scan(g, "k");
+    ray_op_t* scan_sel  = ray_scan(g, "sel");
+    ray_op_t* zero      = ray_const_i64(g, 0);
+    ray_op_t* pred      = ray_binop(g, OP_GE, scan_sel, zero);  /* keep all */
+    uint16_t  agg_ops[] = { OP_COUNT };
+    ray_op_t* agg_ins[] = { scan_k };
+    ray_op_t* keys[]    = { scan_k };
+    ray_op_t* fused     = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 1);
+
+    ray_t* res = ray_execute(g, fused);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), 3);
+
+    int64_t cnt_sym = ray_sym_intern("count", 5);
+    ray_t* cnt_col = ray_table_get_col(res, cnt_sym);
+    int64_t total = 0;
+    for (int64_t i = 0; i < ray_table_nrows(res); i++)
+        total += ((int64_t*)ray_data(cnt_col))[i];
+    TEST_ASSERT_EQ_I(total, N);
+
+    ray_release(res); ray_graph_free(g); ray_release(tbl);
+    ray_sym_destroy(); ray_heap_destroy();
+    PASS();
+}
+
 /* (== AdvEngineID 99) → no rows pass → empty result. */
 static test_result_t test_eq_no_match(void) {
     ray_heap_init();
@@ -465,5 +1116,20 @@ const test_entry_t fused_group_entries[] = {
     { "fused_group/sum_negative_i16",            test_sum_negative_i16,            NULL, NULL },
     { "fused_group/fallback_filter_honored",     test_fallback_filter_honored,     NULL, NULL },
     { "fused_group/count1_rejects_nullable_key", test_count1_rejects_nullable_key, NULL, NULL },
+    /* Coverage extensions for chunks 1-8. */
+    { "fused_group/fold_u8_lt_below",            test_fold_u8_lt_below,            NULL, NULL },
+    { "fused_group/fold_u8_gt_below",            test_fold_u8_gt_below,            NULL, NULL },
+    { "fused_group/fold_u8_le_above",            test_fold_u8_le_above,            NULL, NULL },
+    { "fused_group/fold_u8_ge_above",            test_fold_u8_ge_above,            NULL, NULL },
+    { "fused_group/fold_i16_lt_above",           test_fold_i16_lt_above,           NULL, NULL },
+    { "fused_group/fold_i32_eq_above",           test_fold_i32_eq_above,           NULL, NULL },
+    { "fused_group/fold_i32_ne_above",           test_fold_i32_ne_above,           NULL, NULL },
+    { "fused_group/multi_agg_multi_key",         test_multi_agg_multi_key,         NULL, NULL },
+    { "fused_group/wide_multi_key",              test_wide_multi_key,              NULL, NULL },
+    { "fused_group/count1_parallel_combine",     test_count1_parallel_combine,     NULL, NULL },
+    { "fused_group/count1_shard_grow",           test_count1_shard_grow,           NULL, NULL },
+    { "fused_group/multi_agg_and_pred",          test_multi_agg_and_pred,          NULL, NULL },
+    { "fused_group/multi_agg_unsigned_inputs",   test_multi_agg_unsigned_inputs,   NULL, NULL },
+    { "fused_group/count1_sym_key_w32",          test_count1_sym_key_w32,          NULL, NULL },
     { NULL, NULL, NULL, NULL },
 };
diff --git a/test/test_fused_topk.c b/test/test_fused_topk.c
new file mode 100644
index 00000000..523b3c54
--- /dev/null
+++ b/test/test_fused_topk.c
@@ -0,0 +1,717 @@
+/*
+ *   Copyright (c) 2025-2026 Anton Kundenko <singaraiona@gmail.com>
+ *   All rights reserved.
+ *
+ *   Permission is hereby granted, free of charge, to any person obtaining a copy
+ *   of this software and associated documentation files (the "Software"), to deal
+ *   in the Software without restriction, including without limitation the rights
+ *   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *   copies of the Software, and to permit persons to whom the Software is
+ *   furnished to do so, subject to the following conditions:
+ *
+ *   The above copyright notice and this permission notice shall be included in all
+ *   copies or substantial portions of the Software.
+ *
+ *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *   SOFTWARE.
+ */
+
+/*
+ * test_fused_topk.c — coverage for src/ops/fused_topk.c.
+ *
+ * Tests target:
+ *   Chunk 9  — fpk_cmp multi-key + DESC + nulls + SYM-string compare
+ *              (fused_topk.c:122-187)
+ *   Chunk 10 — fpk_par_fn heap-reject-fast path (fused_topk.c:232-236)
+ *   Chunk 11 — ray_fused_topk_select gate rejections (fused_topk.c:266-298)
+ *
+ * The C-level path calls ray_fused_topk_select directly with a parsed
+ * where_expr (constructed via ray_parse).  ray_parse requires a runtime
+ * (parser maintains the symbol table); each test uses a runtime
+ * setup/teardown pair so symbols are interned consistently.
+ *
+ * Per the brief, every fused-path test includes a non-trivial WHERE
+ * (planner gate also requires `where:` to route into fused_topk).
+ */
+
+#include "test.h"
+#include <rayforce.h>
+#include "lang/parse.h"
+#include "lang/eval.h"
+#include "ops/fused_topk.h"
+#include "table/sym.h"
+#include <string.h>
+
+/* Forward-declare runtime API — mirrors test_lang.c pattern. */
+struct ray_runtime_s;
+typedef struct ray_runtime_s ray_runtime_t;
+extern ray_runtime_t* ray_runtime_create(int argc, char** argv);
+extern void           ray_runtime_destroy(ray_runtime_t* rt);
+extern ray_runtime_t* __RUNTIME;
+
+/* ─── Setup / Teardown ─────────────────────────────────────────────── */
+
+static void topk_setup(void)    { ray_runtime_create(0, NULL); }
+static void topk_teardown(void) { ray_runtime_destroy(__RUNTIME); }
+
+/* ─── Helpers ──────────────────────────────────────────────────────── */
+
+/* Build an I64-keyed table with N rows: g[i] = i % 4, v[i] = i. */
+static ray_t* make_i64_table(int64_t N) {
+    ray_t* gc = ray_vec_new(RAY_I64, N); gc->len = N;
+    ray_t* vc = ray_vec_new(RAY_I64, N); vc->len = N;
+    int64_t* gd = (int64_t*)ray_data(gc);
+    int64_t* vd = (int64_t*)ray_data(vc);
+    for (int64_t i = 0; i < N; i++) { gd[i] = i % 4; vd[i] = i; }
+    int64_t s_g = ray_sym_intern("g", 1);
+    int64_t s_v = ray_sym_intern("v", 1);
+    ray_t* tbl = ray_table_new(2);
+    tbl = ray_table_add_col(tbl, s_g, gc); ray_release(gc);
+    tbl = ray_table_add_col(tbl, s_v, vc); ray_release(vc);
+    return tbl;
+}
+
+/* Build a multi-column table with diverse types for testing key/output
+ * combinations.  Returns table with columns:
+ *   id   I64  : 0..N-1
+ *   k    I64  : (i * 7) % 100  -- non-monotone numeric key
+ *   k2   I32  : i % 5
+ *   sel  I64  : i               -- predicate column
+ */
+static ray_t* make_multi_col_table(int64_t N) {
+    ray_t* idc = ray_vec_new(RAY_I64, N); idc->len = N;
+    ray_t* kc  = ray_vec_new(RAY_I64, N); kc->len  = N;
+    ray_t* k2c = ray_vec_new(RAY_I32, N); k2c->len = N;
+    ray_t* sc  = ray_vec_new(RAY_I64, N); sc->len  = N;
+    int64_t* idd = (int64_t*)ray_data(idc);
+    int64_t* kd  = (int64_t*)ray_data(kc);
+    int32_t* k2d = (int32_t*)ray_data(k2c);
+    int64_t* sd  = (int64_t*)ray_data(sc);
+    for (int64_t i = 0; i < N; i++) {
+        idd[i] = i;
+        kd[i]  = (i * 7) % 100;
+        k2d[i] = (int32_t)(i % 5);
+        sd[i]  = i;
+    }
+    int64_t s_id  = ray_sym_intern("id",  2);
+    int64_t s_k   = ray_sym_intern("k",   1);
+    int64_t s_k2  = ray_sym_intern("k2",  2);
+    int64_t s_sel = ray_sym_intern("sel", 3);
+    ray_t* tbl = ray_table_new(4);
+    tbl = ray_table_add_col(tbl, s_id,  idc); ray_release(idc);
+    tbl = ray_table_add_col(tbl, s_k,   kc);  ray_release(kc);
+    tbl = ray_table_add_col(tbl, s_k2,  k2c); ray_release(k2c);
+    tbl = ray_table_add_col(tbl, s_sel, sc);  ray_release(sc);
+    return tbl;
+}
+
+/* ─── Tests ────────────────────────────────────────────────────────── */
+
+/* Chunk 10: heap-reject-fast path.  With N=10000 rows and K=8, after
+ * the first K rows fill the heap the next ~9992 rows must each compare
+ * against heap[0] (the current worst).  ASC order on a random-ish key
+ * makes most subsequent rows worse than the heap root, so the
+ * `fpk_cmp(...) >= 0` branch (line 233) fires repeatedly. */
+static test_result_t test_topk_heap_reject_fast(void) {
+    int64_t N = 10000;
+    ray_t* tbl = make_multi_col_table(N);
+    TEST_ASSERT_NOT_NULL(tbl);
+
+    /* WHERE sel >= 0 — non-trivial WHERE that admits all rows. */
+    ray_t* where_expr = ray_parse("(>= sel 0)");
+    TEST_ASSERT_NOT_NULL(where_expr);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(where_expr));
+
+    int64_t s_k  = ray_sym_intern("k",  1);
+    int64_t s_id = ray_sym_intern("id", 2);
+    int64_t sort_keys[1] = { s_k };
+    uint8_t sort_descs[1] = { 0 };  /* ASC */
+    int64_t out_syms[2] = { s_id, s_k };
+    int64_t k_pick = 8;
+
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 1, k_pick,
+                                       out_syms, NULL, 2);
+    TEST_ASSERT_NOT_NULL(res);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), k_pick);
+
+    /* Result rows should have the K smallest k values; since k = (i*7)%100,
+     * the smallest k=0 occurs at rows i where 7i ≡ 0 mod 100 → i ∈ {0,
+     * 100, 200, ...}.  K=8 with stable tie-break (lower id first) → top
+     * 8 should all have k=0 with ids = {0, 100, 200, 300, 400, 500, 600, 700}. */
+    ray_t* k_col = ray_table_get_col(res, s_k);
+    TEST_ASSERT_NOT_NULL(k_col);
+    for (int64_t i = 0; i < k_pick; i++) {
+        TEST_ASSERT_EQ_I(((int64_t*)ray_data(k_col))[i], 0);
+    }
+
+    ray_release(res); ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+/* Chunk 9 part 1: multi-key + DESC.  Two sort keys (k2 ASC, k DESC) —
+ * ties on first key broken by second key in opposite direction.  This
+ * walks both `if (cmp != 0) return ks->desc ? -cmp : cmp;` (line 166)
+ * and the multi-key `for` loop (line 124-167). */
+static test_result_t test_topk_multi_key_desc(void) {
+    int64_t N = 200;
+    ray_t* tbl = make_multi_col_table(N);
+
+    ray_t* where_expr = ray_parse("(>= sel 0)");
+    int64_t s_k   = ray_sym_intern("k",   1);
+    int64_t s_k2  = ray_sym_intern("k2",  2);
+    int64_t s_id  = ray_sym_intern("id",  2);
+    /* k2 ASC, k DESC — primary asc, secondary desc. */
+    int64_t sort_keys[2]  = { s_k2, s_k };
+    uint8_t sort_descs[2] = { 0, 1 };
+    int64_t out_syms[3]   = { s_id, s_k, s_k2 };
+    int64_t k_pick = 5;
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 2, k_pick,
+                                       out_syms, NULL, 3);
+    TEST_ASSERT_NOT_NULL(res);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), k_pick);
+
+    /* The K smallest are at k2=0; among those, k DESC → rows with
+     * largest k first.  k2=0 occurs at i where i % 5 == 0, i.e. i ∈
+     * {0,5,10,15,...,195}.  Their k values = (i*7) % 100.  With i
+     * stepping by 5, k = (5*7*j) % 100 = (35j) % 100 for j=0..39.
+     * Largest k value among 40 entries: j=11 → 35*11 % 100 = 85.
+     * j=27 → 945%100 = 45.  j=15 → 525%100 = 25.  Compute the max. */
+    ray_t* k2_col = ray_table_get_col(res, s_k2);
+    ray_t* k_col  = ray_table_get_col(res, s_k);
+    TEST_ASSERT_NOT_NULL(k2_col);
+    TEST_ASSERT_NOT_NULL(k_col);
+    /* All result rows must have k2 == 0. */
+    for (int64_t i = 0; i < k_pick; i++) {
+        TEST_ASSERT_EQ_I((int64_t)((int32_t*)ray_data(k2_col))[i], 0);
+    }
+    /* k values must be in non-increasing order (DESC). */
+    int64_t prev = INT64_MAX;
+    for (int64_t i = 0; i < k_pick; i++) {
+        int64_t cur = ((int64_t*)ray_data(k_col))[i];
+        TEST_ASSERT_TRUE(cur <= prev);
+        prev = cur;
+    }
+
+    ray_release(res); ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+/* Chunk 9 part 2: SYM key — exercises the SYM branch of fpk_cmp
+ * (lines 140-148) including ray_str_cmp.  Use SYM_W32 width per
+ * brief constraint. */
+static test_result_t test_topk_sym_key(void) {
+    int64_t N = 100;
+    /* SYM column with 5 distinct symbols spelled differently. */
+    ray_t* sc = ray_sym_vec_new(RAY_SYM_W32, N);
+    sc->len = N;
+    int64_t s_alpha = ray_sym_intern("alpha", 5);
+    int64_t s_beta  = ray_sym_intern("beta",  4);
+    int64_t s_gamma = ray_sym_intern("gamma", 5);
+    int64_t s_delta = ray_sym_intern("delta", 5);
+    int64_t s_epsi  = ray_sym_intern("epsilon", 7);
+    int64_t syms[5] = { s_alpha, s_beta, s_gamma, s_delta, s_epsi };
+    int32_t* d = (int32_t*)ray_data(sc);
+    for (int64_t i = 0; i < N; i++) d[i] = (int32_t)syms[i % 5];
+
+    ray_t* selc = ray_vec_new(RAY_I64, N); selc->len = N;
+    int64_t* sd = (int64_t*)ray_data(selc);
+    for (int64_t i = 0; i < N; i++) sd[i] = i;
+
+    int64_t s_name = ray_sym_intern("name", 4);
+    int64_t s_sel  = ray_sym_intern("sel",  3);
+    ray_t* tbl = ray_table_new(2);
+    tbl = ray_table_add_col(tbl, s_name, sc);   ray_release(sc);
+    tbl = ray_table_add_col(tbl, s_sel,  selc); ray_release(selc);
+
+    ray_t* where_expr = ray_parse("(>= sel 0)");
+    int64_t sort_keys[1]  = { s_name };
+    uint8_t sort_descs[1] = { 0 };  /* ASC by name string */
+    int64_t out_syms[1]   = { s_name };
+    int64_t k_pick = 4;
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 1, k_pick,
+                                       out_syms, NULL, 1);
+    TEST_ASSERT_NOT_NULL(res);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), k_pick);
+
+    /* Top 4 ASC by string: "alpha", "beta", "delta", "epsilon" —
+     * "alpha" first (with 20 ties broken by source row index). */
+    ray_t* name_col = ray_table_get_col(res, s_name);
+    TEST_ASSERT_NOT_NULL(name_col);
+    /* Lexicographic order: alpha < beta < delta < epsilon < gamma.
+     * The K=4 smallest ALL fit "alpha" in the first slot since alpha
+     * has 20 occurrences in the table (rows 0,5,10,...,95).  The
+     * next 3 should be 4 more "alpha" rows — actually all top 4 must
+     * be "alpha" because there are 20 alpha rows. */
+    int32_t got = ((int32_t*)ray_data(name_col))[0];
+    TEST_ASSERT_EQ_I((int64_t)got, s_alpha);
+
+    ray_release(res); ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+/* Chunk 9 part 3: nulls in sort key — exercises the has_nulls leg
+ * (lines 133-139).  ASC: NULLS LAST; DESC: NULLS FIRST. */
+static test_result_t test_topk_null_sort_key_asc(void) {
+    int64_t N = 20;
+    ray_t* kc = ray_vec_new(RAY_I64, N); kc->len = N;
+    int64_t* kd = (int64_t*)ray_data(kc);
+    for (int64_t i = 0; i < N; i++) kd[i] = i;
+    /* Mark rows {0, 5, 10} as null. */
+    ray_vec_set_null(kc, 0, true);
+    ray_vec_set_null(kc, 5, true);
+    ray_vec_set_null(kc, 10, true);
+
+    ray_t* selc = ray_vec_new(RAY_I64, N); selc->len = N;
+    int64_t* sd = (int64_t*)ray_data(selc);
+    for (int64_t i = 0; i < N; i++) sd[i] = i;
+
+    int64_t s_k   = ray_sym_intern("k",   1);
+    int64_t s_sel = ray_sym_intern("sel", 3);
+    ray_t* tbl = ray_table_new(2);
+    tbl = ray_table_add_col(tbl, s_k,   kc);   ray_release(kc);
+    tbl = ray_table_add_col(tbl, s_sel, selc); ray_release(selc);
+
+    ray_t* where_expr = ray_parse("(>= sel 0)");
+    int64_t sort_keys[1]  = { s_k };
+    uint8_t sort_descs[1] = { 0 };  /* ASC, NULLS LAST */
+    int64_t out_syms[2]   = { s_k, s_sel };
+    int64_t k_pick = 5;
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 1, k_pick,
+                                       out_syms, NULL, 2);
+    TEST_ASSERT_NOT_NULL(res);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), k_pick);
+
+    /* ASC, NULLS LAST: K=5 smallest non-null k.  Non-null rows have
+     * k ∈ {1,2,3,4,6,7,8,9,11,...19}.  Top 5 ASC = {1,2,3,4,6}. */
+    ray_t* k_col = ray_table_get_col(res, s_k);
+    TEST_ASSERT_NOT_NULL(k_col);
+    int64_t expected[5] = {1, 2, 3, 4, 6};
+    for (int64_t i = 0; i < 5; i++) {
+        TEST_ASSERT_FALSE(ray_vec_is_null(k_col, i));
+        TEST_ASSERT_EQ_I(((int64_t*)ray_data(k_col))[i], expected[i]);
+    }
+
+    ray_release(res); ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+static test_result_t test_topk_null_sort_key_desc(void) {
+    int64_t N = 20;
+    ray_t* kc = ray_vec_new(RAY_I64, N); kc->len = N;
+    int64_t* kd = (int64_t*)ray_data(kc);
+    for (int64_t i = 0; i < N; i++) kd[i] = i;
+    ray_vec_set_null(kc, 0, true);
+    ray_vec_set_null(kc, 5, true);
+    ray_vec_set_null(kc, 10, true);
+
+    ray_t* selc = ray_vec_new(RAY_I64, N); selc->len = N;
+    int64_t* sd = (int64_t*)ray_data(selc);
+    for (int64_t i = 0; i < N; i++) sd[i] = i;
+
+    int64_t s_k   = ray_sym_intern("k",   1);
+    int64_t s_sel = ray_sym_intern("sel", 3);
+    ray_t* tbl = ray_table_new(2);
+    tbl = ray_table_add_col(tbl, s_k,   kc);   ray_release(kc);
+    tbl = ray_table_add_col(tbl, s_sel, selc); ray_release(selc);
+
+    ray_t* where_expr = ray_parse("(>= sel 0)");
+    int64_t sort_keys[1]  = { s_k };
+    uint8_t sort_descs[1] = { 1 };  /* DESC, NULLS FIRST */
+    int64_t out_syms[2]   = { s_k, s_sel };
+    int64_t k_pick = 5;
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 1, k_pick,
+                                       out_syms, NULL, 2);
+    TEST_ASSERT_NOT_NULL(res);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), k_pick);
+
+    /* DESC, NULLS FIRST: K=5 results — first 3 must be the null rows
+     * (rows 0, 5, 10) — followed by the 2 highest non-null values
+     * (k=19, k=18). */
+    ray_t* k_col = ray_table_get_col(res, s_k);
+    TEST_ASSERT_NOT_NULL(k_col);
+    /* Check that the first 3 rows are null. */
+    TEST_ASSERT_TRUE(ray_vec_is_null(k_col, 0));
+    TEST_ASSERT_TRUE(ray_vec_is_null(k_col, 1));
+    TEST_ASSERT_TRUE(ray_vec_is_null(k_col, 2));
+    /* Slots 3 and 4 are non-null with k=19 and k=18 (DESC). */
+    TEST_ASSERT_FALSE(ray_vec_is_null(k_col, 3));
+    TEST_ASSERT_FALSE(ray_vec_is_null(k_col, 4));
+    TEST_ASSERT_EQ_I(((int64_t*)ray_data(k_col))[3], 19);
+    TEST_ASSERT_EQ_I(((int64_t*)ray_data(k_col))[4], 18);
+
+    ray_release(res); ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+/* Chunk 11: gate rejections.  Each call below must return NULL because
+ * the inputs fail one of the runtime gates in ray_fused_topk_select.
+ * The C-API shape lets us hit gates the planner already filters out
+ * (where ray-level callers never arrive). */
+
+static test_result_t test_topk_gate_null_tbl(void) {
+    ray_t* where_expr = ray_parse("(>= sel 0)");
+    int64_t sort_keys[1]  = { 1 };
+    uint8_t sort_descs[1] = { 0 };
+    int64_t out_syms[1]   = { 1 };
+    /* Pass NULL table → first-line gate. */
+    ray_t* res = ray_fused_topk_select(NULL, where_expr,
+                                       sort_keys, sort_descs, 1, 5,
+                                       out_syms, NULL, 1);
+    TEST_ASSERT_NULL(res);
+    ray_release(where_expr);
+    PASS();
+}
+
+static test_result_t test_topk_gate_k_too_large(void) {
+    int64_t N = 100;
+    ray_t* tbl = make_i64_table(N);
+    ray_t* where_expr = ray_parse("(>= v 0)");
+    int64_t s_v = ray_sym_intern("v", 1);
+    int64_t sort_keys[1]  = { s_v };
+    uint8_t sort_descs[1] = { 0 };
+    int64_t out_syms[1]   = { s_v };
+    /* k > FPK_MAX_K (8192). */
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 1, 9000,
+                                       out_syms, NULL, 1);
+    TEST_ASSERT_NULL(res);
+    ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+static test_result_t test_topk_gate_zero_sort_keys(void) {
+    int64_t N = 100;
+    ray_t* tbl = make_i64_table(N);
+    ray_t* where_expr = ray_parse("(>= v 0)");
+    int64_t s_v = ray_sym_intern("v", 1);
+    int64_t sort_keys[1]  = { s_v };
+    uint8_t sort_descs[1] = { 0 };
+    int64_t out_syms[1]   = { s_v };
+    /* n_sort_keys == 0. */
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 0, 5,
+                                       out_syms, NULL, 1);
+    TEST_ASSERT_NULL(res);
+    ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+static test_result_t test_topk_gate_k_ge_nrows(void) {
+    int64_t N = 5;
+    ray_t* tbl = make_i64_table(N);
+    ray_t* where_expr = ray_parse("(>= v 0)");
+    int64_t s_v = ray_sym_intern("v", 1);
+    int64_t sort_keys[1]  = { s_v };
+    uint8_t sort_descs[1] = { 0 };
+    int64_t out_syms[1]   = { s_v };
+    /* k (10) >= nrows (5). */
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 1, 10,
+                                       out_syms, NULL, 1);
+    TEST_ASSERT_NULL(res);
+    ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+static test_result_t test_topk_gate_unsupported_out_col_type(void) {
+    /* Build a table with an F64 column — F64 is not in the supported
+     * output set, so the out-col gate must reject. */
+    int64_t N = 50;
+    ray_t* fc = ray_vec_new(RAY_F64, N); fc->len = N;
+    double* fd = (double*)ray_data(fc);
+    for (int64_t i = 0; i < N; i++) fd[i] = (double)i;
+    ray_t* sc = ray_vec_new(RAY_I64, N); sc->len = N;
+    int64_t* sd = (int64_t*)ray_data(sc);
+    for (int64_t i = 0; i < N; i++) sd[i] = i;
+    int64_t s_f   = ray_sym_intern("f",   1);
+    int64_t s_sel = ray_sym_intern("sel", 3);
+    ray_t* tbl = ray_table_new(2);
+    tbl = ray_table_add_col(tbl, s_f,   fc); ray_release(fc);
+    tbl = ray_table_add_col(tbl, s_sel, sc); ray_release(sc);
+
+    ray_t* where_expr = ray_parse("(>= sel 0)");
+    int64_t sort_keys[1]  = { s_sel };
+    uint8_t sort_descs[1] = { 0 };
+    int64_t out_syms[2]   = { s_sel, s_f };  /* f is F64 → unsupported */
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 1, 5,
+                                       out_syms, NULL, 2);
+    TEST_ASSERT_NULL(res);
+
+    ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+static test_result_t test_topk_gate_unsupported_sort_key_type(void) {
+    /* F64 sort key → rejected at the sort-key gate. */
+    int64_t N = 50;
+    ray_t* fc = ray_vec_new(RAY_F64, N); fc->len = N;
+    double* fd = (double*)ray_data(fc);
+    for (int64_t i = 0; i < N; i++) fd[i] = (double)i;
+    ray_t* sc = ray_vec_new(RAY_I64, N); sc->len = N;
+    int64_t* sd = (int64_t*)ray_data(sc);
+    for (int64_t i = 0; i < N; i++) sd[i] = i;
+    int64_t s_f   = ray_sym_intern("f",   1);
+    int64_t s_sel = ray_sym_intern("sel", 3);
+    ray_t* tbl = ray_table_new(2);
+    tbl = ray_table_add_col(tbl, s_f,   fc); ray_release(fc);
+    tbl = ray_table_add_col(tbl, s_sel, sc); ray_release(sc);
+
+    ray_t* where_expr = ray_parse("(>= sel 0)");
+    int64_t sort_keys[1]  = { s_f };  /* F64 → reject */
+    uint8_t sort_descs[1] = { 0 };
+    int64_t out_syms[1]   = { s_sel };
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 1, 5,
+                                       out_syms, NULL, 1);
+    TEST_ASSERT_NULL(res);
+
+    ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+static test_result_t test_topk_gate_n_out_zero(void) {
+    int64_t N = 50;
+    ray_t* tbl = make_i64_table(N);
+    ray_t* where_expr = ray_parse("(>= v 0)");
+    int64_t s_v = ray_sym_intern("v", 1);
+    int64_t sort_keys[1]  = { s_v };
+    uint8_t sort_descs[1] = { 0 };
+    /* n_out == 0 — first-line gate. */
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 1, 5,
+                                       NULL, NULL, 0);
+    TEST_ASSERT_NULL(res);
+    ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+static test_result_t test_topk_gate_too_many_sort_keys(void) {
+    int64_t N = 50;
+    ray_t* tbl = make_i64_table(N);
+    ray_t* where_expr = ray_parse("(>= v 0)");
+    int64_t s_v = ray_sym_intern("v", 1);
+    /* FPK_MAX_KEYS == 16; pass 17 keys to trip the gate. */
+    int64_t sort_keys[17];
+    uint8_t sort_descs[17];
+    for (int i = 0; i < 17; i++) { sort_keys[i] = s_v; sort_descs[i] = 0; }
+    int64_t out_syms[1] = { s_v };
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 17, 5,
+                                       out_syms, NULL, 1);
+    TEST_ASSERT_NULL(res);
+    ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+static test_result_t test_topk_gate_negative_k(void) {
+    int64_t N = 50;
+    ray_t* tbl = make_i64_table(N);
+    ray_t* where_expr = ray_parse("(>= v 0)");
+    int64_t s_v = ray_sym_intern("v", 1);
+    int64_t sort_keys[1]  = { s_v };
+    uint8_t sort_descs[1] = { 0 };
+    int64_t out_syms[1]   = { s_v };
+    /* k <= 0 first-line gate. */
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 1, -3,
+                                       out_syms, NULL, 1);
+    TEST_ASSERT_NULL(res);
+    /* k = 0 also rejected. */
+    res = ray_fused_topk_select(tbl, where_expr,
+                                sort_keys, sort_descs, 1, 0,
+                                out_syms, NULL, 1);
+    TEST_ASSERT_NULL(res);
+    ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+/* End-to-end fused topk select with simple I64 sort key.  Ensures the
+ * happy path through fpk_par_fn → final merge → materialize works for
+ * the smallest non-trivial input.  Also exercises the n_out=multi
+ * gather path (lines 412-440) including ray_vec_set_null short-circuit
+ * (no nulls in source). */
+static test_result_t test_topk_basic_i64_asc(void) {
+    int64_t N = 100;
+    ray_t* tbl = make_i64_table(N);
+    ray_t* where_expr = ray_parse("(>= v 0)");
+    int64_t s_v = ray_sym_intern("v", 1);
+    int64_t s_g = ray_sym_intern("g", 1);
+    int64_t sort_keys[1]  = { s_v };
+    uint8_t sort_descs[1] = { 0 };
+    int64_t out_syms[2]   = { s_v, s_g };
+    int64_t k_pick = 3;
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 1, k_pick,
+                                       out_syms, NULL, 2);
+    TEST_ASSERT_NOT_NULL(res);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), k_pick);
+    /* Smallest 3: v = 0, 1, 2. */
+    ray_t* v_col = ray_table_get_col(res, s_v);
+    for (int64_t i = 0; i < k_pick; i++)
+        TEST_ASSERT_EQ_I(((int64_t*)ray_data(v_col))[i], i);
+
+    ray_release(res); ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+/* Aliased output columns — alias array non-NULL.  Exercises line 414
+ * `int64_t alias = out_alias_syms ? out_alias_syms[c] : cs;`. */
+static test_result_t test_topk_aliased_out(void) {
+    int64_t N = 50;
+    ray_t* tbl = make_i64_table(N);
+    ray_t* where_expr = ray_parse("(>= v 0)");
+    int64_t s_v = ray_sym_intern("v", 1);
+    int64_t s_alias = ray_sym_intern("vv", 2);
+    int64_t sort_keys[1]   = { s_v };
+    uint8_t sort_descs[1]  = { 0 };
+    int64_t out_syms[1]    = { s_v };
+    int64_t out_aliases[1] = { s_alias };
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 1, 3,
+                                       out_syms, out_aliases, 1);
+    TEST_ASSERT_NOT_NULL(res);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    /* Result column must be published under the alias name. */
+    ray_t* col = ray_table_get_col(res, s_alias);
+    TEST_ASSERT_NOT_NULL(col);
+    TEST_ASSERT_EQ_I(col->len, 3);
+
+    ray_release(res); ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+/* Source nullmap propagated to result.  Exercises lines 432-437. */
+static test_result_t test_topk_propagates_nullmap(void) {
+    int64_t N = 20;
+    ray_t* gc = ray_vec_new(RAY_I64, N); gc->len = N;
+    ray_t* vc = ray_vec_new(RAY_I64, N); vc->len = N;
+    int64_t* gd = (int64_t*)ray_data(gc);
+    int64_t* vd = (int64_t*)ray_data(vc);
+    for (int64_t i = 0; i < N; i++) { gd[i] = i % 4; vd[i] = i; }
+    /* Mark some g rows as null — these will be carried into the
+     * result via the nullmap propagation block. */
+    ray_vec_set_null(gc, 1, true);
+    ray_vec_set_null(gc, 2, true);
+    int64_t s_g = ray_sym_intern("g", 1);
+    int64_t s_v = ray_sym_intern("v", 1);
+    ray_t* tbl = ray_table_new(2);
+    tbl = ray_table_add_col(tbl, s_g, gc); ray_release(gc);
+    tbl = ray_table_add_col(tbl, s_v, vc); ray_release(vc);
+
+    ray_t* where_expr = ray_parse("(>= v 0)");
+    int64_t sort_keys[1]  = { s_v };
+    uint8_t sort_descs[1] = { 0 };
+    int64_t out_syms[2]   = { s_v, s_g };
+    int64_t k_pick = 5;
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 1, k_pick,
+                                       out_syms, NULL, 2);
+    TEST_ASSERT_NOT_NULL(res);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    /* Top 5 by v ASC = rows 0..4.  g[1] and g[2] are null; result
+     * positions 1 and 2 must be marked null. */
+    ray_t* g_col = ray_table_get_col(res, s_g);
+    TEST_ASSERT_NOT_NULL(g_col);
+    TEST_ASSERT_FALSE(ray_vec_is_null(g_col, 0));
+    TEST_ASSERT_TRUE(ray_vec_is_null(g_col, 1));
+    TEST_ASSERT_TRUE(ray_vec_is_null(g_col, 2));
+    TEST_ASSERT_FALSE(ray_vec_is_null(g_col, 3));
+    TEST_ASSERT_FALSE(ray_vec_is_null(g_col, 4));
+
+    ray_release(res); ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+/* Multi-key path that crosses the 64-bit composite key boundary —
+ * three I32 keys at offsets 0/32/64 (96 bits total).  Exercises the
+ * inner mk-style key-resolution path in fpk_cmp via a tall enough
+ * table that fpk_par_fn uses multiple workers and the global merge
+ * runs.  (Note: fused_topk does not pack composite keys; it loops
+ * over n_keys per compare, so this hits the multi-key for-loop in
+ * fpk_cmp.) */
+static test_result_t test_topk_three_keys(void) {
+    int64_t N = 1000;
+    ray_t* k1c = ray_vec_new(RAY_I32, N); k1c->len = N;
+    ray_t* k2c = ray_vec_new(RAY_I32, N); k2c->len = N;
+    ray_t* k3c = ray_vec_new(RAY_I32, N); k3c->len = N;
+    ray_t* sc  = ray_vec_new(RAY_I64, N); sc->len  = N;
+    int32_t* k1 = (int32_t*)ray_data(k1c);
+    int32_t* k2 = (int32_t*)ray_data(k2c);
+    int32_t* k3 = (int32_t*)ray_data(k3c);
+    int64_t* sd = (int64_t*)ray_data(sc);
+    for (int64_t i = 0; i < N; i++) {
+        k1[i] = (int32_t)(i % 7);
+        k2[i] = (int32_t)(i % 11);
+        k3[i] = (int32_t)(i % 13);
+        sd[i] = i;
+    }
+    int64_t s_k1  = ray_sym_intern("k1",  2);
+    int64_t s_k2  = ray_sym_intern("k2",  2);
+    int64_t s_k3  = ray_sym_intern("k3",  2);
+    int64_t s_sel = ray_sym_intern("sel", 3);
+    ray_t* tbl = ray_table_new(4);
+    tbl = ray_table_add_col(tbl, s_k1,  k1c); ray_release(k1c);
+    tbl = ray_table_add_col(tbl, s_k2,  k2c); ray_release(k2c);
+    tbl = ray_table_add_col(tbl, s_k3,  k3c); ray_release(k3c);
+    tbl = ray_table_add_col(tbl, s_sel, sc);  ray_release(sc);
+
+    ray_t* where_expr = ray_parse("(>= sel 0)");
+    int64_t sort_keys[3]  = { s_k1, s_k2, s_k3 };
+    uint8_t sort_descs[3] = { 0, 1, 0 };  /* mixed asc/desc */
+    int64_t out_syms[1]   = { s_sel };
+    int64_t k_pick = 10;
+    ray_t* res = ray_fused_topk_select(tbl, where_expr,
+                                       sort_keys, sort_descs, 3, k_pick,
+                                       out_syms, NULL, 1);
+    TEST_ASSERT_NOT_NULL(res);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+    TEST_ASSERT_EQ_I(ray_table_nrows(res), k_pick);
+
+    ray_release(res); ray_release(where_expr); ray_release(tbl);
+    PASS();
+}
+
+/* ─── Entry table ──────────────────────────────────────────────────── */
+
+const test_entry_t fused_topk_entries[] = {
+    /* Chunk 10 — heap-reject-fast path */
+    { "fused_topk/heap_reject_fast",          test_topk_heap_reject_fast,          topk_setup, topk_teardown },
+    /* Chunk 9 — multi-key compare paths */
+    { "fused_topk/multi_key_desc",            test_topk_multi_key_desc,            topk_setup, topk_teardown },
+    { "fused_topk/sym_key_w32",               test_topk_sym_key,                   topk_setup, topk_teardown },
+    { "fused_topk/null_sort_key_asc",         test_topk_null_sort_key_asc,         topk_setup, topk_teardown },
+    { "fused_topk/null_sort_key_desc",        test_topk_null_sort_key_desc,        topk_setup, topk_teardown },
+    /* Chunk 11 — gate rejections */
+    { "fused_topk/gate_null_tbl",             test_topk_gate_null_tbl,             topk_setup, topk_teardown },
+    { "fused_topk/gate_k_too_large",          test_topk_gate_k_too_large,          topk_setup, topk_teardown },
+    { "fused_topk/gate_zero_sort_keys",       test_topk_gate_zero_sort_keys,       topk_setup, topk_teardown },
+    { "fused_topk/gate_k_ge_nrows",           test_topk_gate_k_ge_nrows,           topk_setup, topk_teardown },
+    { "fused_topk/gate_unsupported_out_col",  test_topk_gate_unsupported_out_col_type,  topk_setup, topk_teardown },
+    { "fused_topk/gate_unsupported_sort_key", test_topk_gate_unsupported_sort_key_type, topk_setup, topk_teardown },
+    { "fused_topk/gate_n_out_zero",           test_topk_gate_n_out_zero,           topk_setup, topk_teardown },
+    { "fused_topk/gate_too_many_sort_keys",   test_topk_gate_too_many_sort_keys,   topk_setup, topk_teardown },
+    { "fused_topk/gate_negative_k",           test_topk_gate_negative_k,           topk_setup, topk_teardown },
+    /* Happy paths */
+    { "fused_topk/basic_i64_asc",             test_topk_basic_i64_asc,             topk_setup, topk_teardown },
+    { "fused_topk/aliased_out",               test_topk_aliased_out,               topk_setup, topk_teardown },
+    { "fused_topk/propagates_nullmap",        test_topk_propagates_nullmap,        topk_setup, topk_teardown },
+    { "fused_topk/three_keys",                test_topk_three_keys,                topk_setup, topk_teardown },
+    { NULL, NULL, NULL, NULL },
+};
diff --git a/test/test_group_extra.c b/test/test_group_extra.c
index 1ad9c4d0..729fb1d9 100644
--- a/test/test_group_extra.c
+++ b/test/test_group_extra.c
@@ -45,6 +45,7 @@
 #include <rayforce.h>
 #include "mem/heap.h"
 #include "ops/ops.h"
+#include "ops/internal.h"
 #include "table/sym.h"
 #include <math.h>
 #include <string.h>
@@ -818,6 +819,227 @@ static test_result_t test_reduction_var_i64_parallel(void) {
     PASS();
 }
 
+/* --------------------------------------------------------------------------
+ * Test 13: count_distinct parallel path runs on every flat numeric type
+ *
+ * exec_count_distinct's parallel kernel (group.c L490+, len >= 65536)
+ * dispatches cd_hist_fn / cd_scatter_fn / cd_part_dedup_fn over every
+ * flat numeric type.  These per-type arms are the focus of chunk 1 in
+ * the coverage plan.
+ *
+ * After the task-keyed cursor fix, exact distinct counts are stable
+ * across runs.
+ * -------------------------------------------------------------------------- */
+static test_result_t test_count_distinct_parallel_types(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    int64_t name_v = ray_sym_intern("v", 1);
+
+    /* I64: 70000 rows, ascending values → 70000 distinct. */
+    {
+        ray_t* vec = ray_vec_new(RAY_I64, N);
+        TEST_ASSERT_NOT_NULL(vec);
+        vec->len = N;
+        int64_t* p = (int64_t*)ray_data(vec);
+        for (int64_t i = 0; i < N; i++) p[i] = i;
+        ray_t* tbl = ray_table_new(1);
+        tbl = ray_table_add_col(tbl, name_v, vec);
+        ray_release(vec);
+
+        ray_graph_t* g = ray_graph_new(tbl);
+        TEST_ASSERT_NOT_NULL(g);
+        ray_op_t* cd = ray_count_distinct(g, ray_scan(g, "v"));
+        ray_t* res = ray_execute(g, cd);
+        TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+        TEST_ASSERT(res->type == -RAY_I64, "count_distinct returns I64 atom");
+        TEST_ASSERT_EQ_I(res->i64, N);
+        ray_release(res);
+        ray_graph_free(g);
+        ray_release(tbl);
+    }
+
+    /* F64: 70000 rows → 70000 distinct. */
+    {
+        ray_t* vec = ray_vec_new(RAY_F64, N);
+        TEST_ASSERT_NOT_NULL(vec);
+        vec->len = N;
+        double* p = (double*)ray_data(vec);
+        for (int64_t i = 0; i < N; i++) p[i] = (double)i;
+        ray_t* tbl = ray_table_new(1);
+        tbl = ray_table_add_col(tbl, name_v, vec);
+        ray_release(vec);
+
+        ray_graph_t* g = ray_graph_new(tbl);
+        ray_op_t* cd = ray_count_distinct(g, ray_scan(g, "v"));
+        ray_t* res = ray_execute(g, cd);
+        TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+        TEST_ASSERT_EQ_I(res->i64, N);
+        ray_release(res);
+        ray_graph_free(g);
+        ray_release(tbl);
+    }
+
+    /* I32 mod 1000 → exactly 1000 distinct. */
+    {
+        ray_t* vec = ray_vec_new(RAY_I32, N);
+        TEST_ASSERT_NOT_NULL(vec);
+        vec->len = N;
+        int32_t* p = (int32_t*)ray_data(vec);
+        for (int64_t i = 0; i < N; i++) p[i] = (int32_t)(i % 1000);
+        ray_t* tbl = ray_table_new(1);
+        tbl = ray_table_add_col(tbl, name_v, vec);
+        ray_release(vec);
+
+        ray_graph_t* g = ray_graph_new(tbl);
+        ray_op_t* cd = ray_count_distinct(g, ray_scan(g, "v"));
+        ray_t* res = ray_execute(g, cd);
+        TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+        TEST_ASSERT_EQ_I(res->i64, 1000);
+        ray_release(res);
+        ray_graph_free(g);
+        ray_release(tbl);
+    }
+
+    /* I16 mod 250 → 250 distinct. */
+    {
+        ray_t* vec = ray_vec_new(RAY_I16, N);
+        TEST_ASSERT_NOT_NULL(vec);
+        vec->len = N;
+        int16_t* p = (int16_t*)ray_data(vec);
+        for (int64_t i = 0; i < N; i++) p[i] = (int16_t)(i % 250);
+        ray_t* tbl = ray_table_new(1);
+        tbl = ray_table_add_col(tbl, name_v, vec);
+        ray_release(vec);
+
+        ray_graph_t* g = ray_graph_new(tbl);
+        ray_op_t* cd = ray_count_distinct(g, ray_scan(g, "v"));
+        ray_t* res = ray_execute(g, cd);
+        TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+        TEST_ASSERT_EQ_I(res->i64, 250);
+        ray_release(res);
+        ray_graph_free(g);
+        ray_release(tbl);
+    }
+
+    /* U8 mod 200 → 200 distinct. */
+    {
+        ray_t* vec = ray_vec_new(RAY_U8, N);
+        TEST_ASSERT_NOT_NULL(vec);
+        vec->len = N;
+        uint8_t* p = (uint8_t*)ray_data(vec);
+        for (int64_t i = 0; i < N; i++) p[i] = (uint8_t)(i % 200);
+        ray_t* tbl = ray_table_new(1);
+        tbl = ray_table_add_col(tbl, name_v, vec);
+        ray_release(vec);
+
+        ray_graph_t* g = ray_graph_new(tbl);
+        ray_op_t* cd = ray_count_distinct(g, ray_scan(g, "v"));
+        ray_t* res = ray_execute(g, cd);
+        TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+        TEST_ASSERT_EQ_I(res->i64, 200);
+        ray_release(res);
+        ray_graph_free(g);
+        ray_release(tbl);
+    }
+
+    /* BOOL alternating → exactly 2 distinct. */
+    {
+        ray_t* vec = ray_vec_new(RAY_BOOL, N);
+        TEST_ASSERT_NOT_NULL(vec);
+        vec->len = N;
+        uint8_t* p = (uint8_t*)ray_data(vec);
+        for (int64_t i = 0; i < N; i++) p[i] = (uint8_t)(i & 1);
+        ray_t* tbl = ray_table_new(1);
+        tbl = ray_table_add_col(tbl, name_v, vec);
+        ray_release(vec);
+
+        ray_graph_t* g = ray_graph_new(tbl);
+        ray_op_t* cd = ray_count_distinct(g, ray_scan(g, "v"));
+        ray_t* res = ray_execute(g, cd);
+        TEST_ASSERT_FALSE(RAY_IS_ERR(res));
+        TEST_ASSERT_EQ_I(res->i64, 2);
+        ray_release(res);
+        ray_graph_free(g);
+        ray_release(tbl);
+    }
+
+    ray_sym_destroy();
+    ray_heap_destroy();
+    PASS();
+}
+
+/* --------------------------------------------------------------------------
+ * Test 14: ray_count_distinct_per_group parallel path (chunk 2)
+ *
+ * Direct C invocation of ray_count_distinct_per_group with n_rows >=
+ * 200000 to reach the parallel branch (group.c:991-997 →
+ * count_distinct_per_group_parallel L840-949).  This path is otherwise
+ * gated by query.c's n_groups > 50000 check and the >=200000 row count.
+ *
+ * Bypasses the rfl pipeline, so any planner / select fast-path
+ * optimisations don't kick in.  Uses the exact API entry point that
+ * production code calls when the `(count (distinct col)) by k` shape
+ * lands on the global-hash kernel.
+ *
+ * Verifies that the kernel returns a non-error I64 vec of length
+ * n_groups and that every entry equals the expected per-group distinct
+ * count.  The 200000-row dataset uses gid = i % 51000 and val = i % 16,
+ * so each group sees ⌈200000/51000⌉ ≤ 4 rows and at most 4 distinct vals.
+ * -------------------------------------------------------------------------- */
+static test_result_t test_count_distinct_per_group_parallel(void) {
+    ray_heap_init();
+    (void)ray_sym_init();
+
+    const int64_t NROWS = 200000;
+    const int64_t NGROUPS = 51000;
+
+    ray_t* vec = ray_vec_new(RAY_I64, NROWS);
+    TEST_ASSERT_NOT_NULL(vec);
+    vec->len = NROWS;
+    int64_t* p = (int64_t*)ray_data(vec);
+    for (int64_t i = 0; i < NROWS; i++) p[i] = i % 16;
+
+    ray_t* gids = ray_vec_new(RAY_I64, NROWS);
+    TEST_ASSERT_NOT_NULL(gids);
+    gids->len = NROWS;
+    int64_t* gp = (int64_t*)ray_data(gids);
+    for (int64_t i = 0; i < NROWS; i++) gp[i] = i % NGROUPS;
+
+    ray_t* out = ray_count_distinct_per_group(vec, gp, NROWS, NGROUPS);
+    TEST_ASSERT_NOT_NULL(out);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(out));
+    TEST_ASSERT_EQ_I(out->type, RAY_I64);
+    TEST_ASSERT_EQ_I(out->len, NGROUPS);
+
+    /* Each group sees the values gid+0*51000, gid+1*51000, gid+2*51000, gid+3*51000
+     * filtered to NROWS — i.e. up to ⌈(NROWS-gid)/NGROUPS⌉ rows.  Within
+     * those rows the val column is i % 16, so the distinct count per
+     * group equals the number of distinct (i%16) values across the rows
+     * landed in that group. */
+    int64_t* od = (int64_t*)ray_data(out);
+    for (int64_t g = 0; g < NGROUPS; g++) {
+        /* Reproduce the build-side logic: rows i ∈ [0, NROWS) with i%NGROUPS == g.
+         * Their vals are (i % 16); count distinct. */
+        uint8_t seen[16] = {0};
+        int64_t expected = 0;
+        for (int64_t i = g; i < NROWS; i += NGROUPS) {
+            uint8_t v = (uint8_t)(i % 16);
+            if (!seen[v]) { seen[v] = 1; expected++; }
+        }
+        TEST_ASSERT_FMT(od[g] == expected,
+                        "group %lld: got %lld, expected %lld",
+                        (long long)g, (long long)od[g], (long long)expected);
+    }
+
+    ray_release(out);
+    ray_release(gids);
+    ray_release(vec);
+    ray_sym_destroy();
+    ray_heap_destroy();
+    PASS();
+}
+
 /* --------------------------------------------------------------------------
  * Test registry
  * -------------------------------------------------------------------------- */
@@ -835,5 +1057,7 @@ const test_entry_t group_extra_entries[] = {
     { "group_extra/count_distinct_small_types",    test_count_distinct_small_types,    NULL, NULL },
     { "group_extra/reduction_prod_parallel",       test_reduction_prod_parallel,       NULL, NULL },
     { "group_extra/reduction_var_i64_parallel",    test_reduction_var_i64_parallel,    NULL, NULL },
+    { "group_extra/count_distinct_parallel_types", test_count_distinct_parallel_types, NULL, NULL },
+    { "group_extra/count_distinct_per_group_parallel", test_count_distinct_per_group_parallel, NULL, NULL },
     { NULL, NULL, NULL, NULL },
 };
diff --git a/test/test_sym.c b/test/test_sym.c
index 56d6fd6a..44712f6f 100644
--- a/test/test_sym.c
+++ b/test/test_sym.c
@@ -30,6 +30,7 @@
 #include "store/col.h"
 #include "lang/internal.h"
 #include "ops/hash.h"
+#include "ops/glob.h"
 #include <string.h>
 #include <stdio.h>
 
@@ -1710,6 +1711,418 @@ static test_result_t test_like_fn_empty_pattern(void) {
     PASS();
 }
 
+/* ══════════════════════════════════════════
+ * SYM-LIKE width-matrix coverage (chunks 1, 2, 5)
+ *
+ * The dict-cached LIKE path in src/ops/strop.c::ray_like_fn dispatches
+ * to width-specialised DICT_PASS/ROW_PASS for W8/W16/W32/W64. Default
+ * `ray_vec_from_raw(RAY_SYM,…)` produces W64 only, leaving the W8/W16/W32
+ * cases (lines 317-328) at zero coverage. These tests build the column
+ * directly via ray_sym_vec_new(width, capacity) and drive ray_like_fn
+ * across each width.
+ *
+ * `attrs & RAY_SYM_W_MASK` is asserted post-construction to confirm the
+ * width bits actually took, otherwise the switch case under test wouldn't
+ * fire.
+ * ══════════════════════════════════════════ */
+
+/* Helper: build a SYM vector at `width` whose i'th cell is sym_ids[i].
+ * Caller-managed lifetime. */
+static ray_t* build_sym_vec(uint8_t width, const int64_t* sym_ids, int64_t n) {
+    ray_t* v = ray_sym_vec_new(width, n);
+    if (!v || RAY_IS_ERR(v)) return v;
+    v->len = n;
+    void* d = ray_data(v);
+    for (int64_t i = 0; i < n; i++)
+        ray_write_sym(d, i, (uint64_t)sym_ids[i], RAY_SYM, width);
+    return v;
+}
+
+/* --- like_fn: SYM-vec W8 width — DICT_PASS / ROW_PASS u8 case --------- */
+static test_result_t test_like_fn_sym_vec_w8(void) {
+    int64_t a = ray_sym_intern("alpha", 5);
+    int64_t b = ray_sym_intern("beta", 4);
+    int64_t c = ray_sym_intern("gamma", 5);
+    int64_t ids[6] = { a, b, c, a, b, c };  /* repeats hit the seen-cache */
+    ray_t* x   = build_sym_vec(RAY_SYM_W8, ids, 6);
+    TEST_ASSERT_NOT_NULL(x);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(x));
+    /* Verify width actually took (chunk-1/2 contract). */
+    TEST_ASSERT_EQ_U(x->attrs & RAY_SYM_W_MASK, RAY_SYM_W8);
+
+    ray_t* pat = ray_str("a*", 2);
+    ray_t* out = ray_like_fn(x, pat);
+    TEST_ASSERT_NOT_NULL(out);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(out));
+    TEST_ASSERT_EQ_I(out->len, 6);
+    uint8_t* d = (uint8_t*)ray_data(out);
+    TEST_ASSERT_EQ_I(d[0], 1); /* alpha */
+    TEST_ASSERT_EQ_I(d[1], 0); /* beta */
+    TEST_ASSERT_EQ_I(d[2], 0); /* gamma */
+    TEST_ASSERT_EQ_I(d[3], 1); /* alpha (lut[a] cached) */
+    TEST_ASSERT_EQ_I(d[4], 0); /* beta */
+    TEST_ASSERT_EQ_I(d[5], 0); /* gamma */
+    ray_release(out);
+    ray_release(x);
+    ray_release(pat);
+    PASS();
+}
+
+/* --- like_fn: SYM-vec W16 width — DICT_PASS / ROW_PASS u16 case ------- */
+static test_result_t test_like_fn_sym_vec_w16(void) {
+    int64_t a = ray_sym_intern("alpha", 5);
+    int64_t b = ray_sym_intern("beta", 4);
+    int64_t c = ray_sym_intern("gamma", 5);
+    int64_t ids[5] = { a, b, c, a, b };
+    ray_t* x   = build_sym_vec(RAY_SYM_W16, ids, 5);
+    TEST_ASSERT_NOT_NULL(x);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(x));
+    TEST_ASSERT_EQ_U(x->attrs & RAY_SYM_W_MASK, RAY_SYM_W16);
+
+    ray_t* pat = ray_str("*a", 2);
+    ray_t* out = ray_like_fn(x, pat);
+    TEST_ASSERT_NOT_NULL(out);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(out));
+    uint8_t* d = (uint8_t*)ray_data(out);
+    TEST_ASSERT_EQ_I(d[0], 1); /* alpha */
+    TEST_ASSERT_EQ_I(d[1], 1); /* beta */
+    TEST_ASSERT_EQ_I(d[2], 1); /* gamma */
+    TEST_ASSERT_EQ_I(d[3], 1);
+    TEST_ASSERT_EQ_I(d[4], 1);
+    ray_release(out);
+    ray_release(x);
+    ray_release(pat);
+    PASS();
+}
+
+/* --- like_fn: SYM-vec W32 width — DICT_PASS / ROW_PASS u32 case ------- */
+static test_result_t test_like_fn_sym_vec_w32(void) {
+    int64_t a = ray_sym_intern("alpha", 5);
+    int64_t b = ray_sym_intern("beta", 4);
+    int64_t c = ray_sym_intern("gamma", 5);
+    int64_t ids[4] = { a, b, c, a };
+    ray_t* x   = build_sym_vec(RAY_SYM_W32, ids, 4);
+    TEST_ASSERT_NOT_NULL(x);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(x));
+    TEST_ASSERT_EQ_U(x->attrs & RAY_SYM_W_MASK, RAY_SYM_W32);
+
+    ray_t* pat = ray_str("*am*", 4);
+    ray_t* out = ray_like_fn(x, pat);
+    TEST_ASSERT_NOT_NULL(out);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(out));
+    uint8_t* d = (uint8_t*)ray_data(out);
+    TEST_ASSERT_EQ_I(d[0], 0); /* alpha */
+    TEST_ASSERT_EQ_I(d[1], 0); /* beta */
+    TEST_ASSERT_EQ_I(d[2], 1); /* gamma */
+    TEST_ASSERT_EQ_I(d[3], 0);
+    ray_release(out);
+    ray_release(x);
+    ray_release(pat);
+    PASS();
+}
+
+/* --- like_fn: SYM-vec W8 with empty-SYM (sym 0) mixed in --------------
+ * Chunk 5: sym 0 is reserved as the canonical empty string post-`84d6f4dd`.
+ * Mixing sid=0 with valid sids exercises the "small string" path inside
+ * DICT_PASS where lut[0] is computed against the empty string. */
+static test_result_t test_like_fn_sym_vec_w8_empty_sym(void) {
+    int64_t alpha = ray_sym_intern("alpha", 5);
+    int64_t ids[4] = { alpha, 0, alpha, 0 };  /* 0 = empty SYM */
+    ray_t* x = build_sym_vec(RAY_SYM_W8, ids, 4);
+    TEST_ASSERT_NOT_NULL(x);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(x));
+    TEST_ASSERT_EQ_U(x->attrs & RAY_SYM_W_MASK, RAY_SYM_W8);
+
+    /* "*" matches anything including empty → all rows true */
+    ray_t* pat_any = ray_str("*", 1);
+    ray_t* out_any = ray_like_fn(x, pat_any);
+    TEST_ASSERT_NOT_NULL(out_any);
+    uint8_t* da = (uint8_t*)ray_data(out_any);
+    TEST_ASSERT_EQ_I(da[0], 1);
+    TEST_ASSERT_EQ_I(da[1], 1);
+    TEST_ASSERT_EQ_I(da[2], 1);
+    TEST_ASSERT_EQ_I(da[3], 1);
+    ray_release(out_any);
+    ray_release(pat_any);
+
+    /* "" pattern (SHAPE_EXACT, lit_len=0) matches only the empty string */
+    ray_t* pat_empty = ray_str("", 0);
+    ray_t* out_empty = ray_like_fn(x, pat_empty);
+    TEST_ASSERT_NOT_NULL(out_empty);
+    uint8_t* de = (uint8_t*)ray_data(out_empty);
+    TEST_ASSERT_EQ_I(de[0], 0); /* alpha */
+    TEST_ASSERT_EQ_I(de[1], 1); /* "" sym */
+    TEST_ASSERT_EQ_I(de[2], 0);
+    TEST_ASSERT_EQ_I(de[3], 1);
+    ray_release(out_empty);
+    ray_release(pat_empty);
+
+    ray_release(x);
+    PASS();
+}
+
+/* --- like_fn: SYM-atom — sym 0 (empty) match against "*" --------------
+ * The atom path at strop.c:217-223 reads sym_str(0) which now returns the
+ * empty interned string instead of NULL (post-84d6f4dd). */
+static test_result_t test_like_fn_sym_atom_empty(void) {
+    ray_t* x_empty = ray_sym(0);   /* empty SYM, valid atom */
+    ray_t* pat_any = ray_str("*", 1);
+    ray_t* out = ray_like_fn(x_empty, pat_any);
+    TEST_ASSERT_NOT_NULL(out);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(out));
+    TEST_ASSERT_EQ_I(out->i64, 1); /* "*" matches "" */
+    ray_release(out);
+    ray_release(pat_any);
+
+    /* Empty pattern also matches empty atom */
+    ray_t* pat_empty = ray_str("", 0);
+    ray_t* out2 = ray_like_fn(x_empty, pat_empty);
+    TEST_ASSERT_EQ_I(out2->i64, 1);
+    ray_release(out2);
+    ray_release(pat_empty);
+
+    /* Non-empty pattern fails */
+    ray_t* pat_x = ray_str("x", 1);
+    ray_t* out3 = ray_like_fn(x_empty, pat_x);
+    TEST_ASSERT_EQ_I(out3->i64, 0);
+    ray_release(out3);
+    ray_release(pat_x);
+
+    ray_release(x_empty);
+    PASS();
+}
+
+/* --- like_fn: SYM-vec W64 with sym 0 mixed in (formerly null_sym_vec) --
+ * Re-exercises the rewritten W64 case where sid=0 is now valid (sym table
+ * always returns a non-NULL string for sid=0 since b1de30cd). */
+static test_result_t test_like_fn_sym_vec_w64_zero(void) {
+    int64_t alpha = ray_sym_intern("alpha", 5);
+    int64_t beta  = ray_sym_intern("beta",  4);
+    int64_t ids[5] = { alpha, 0, beta, 0, alpha };
+    ray_t* x = ray_vec_from_raw(RAY_SYM, ids, 5); /* default W64 */
+    TEST_ASSERT_NOT_NULL(x);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(x));
+    TEST_ASSERT_EQ_U(x->attrs & RAY_SYM_W_MASK, RAY_SYM_W64);
+
+    /* "a*" matches alpha, not "" or beta */
+    ray_t* pat = ray_str("a*", 2);
+    ray_t* out = ray_like_fn(x, pat);
+    TEST_ASSERT_NOT_NULL(out);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(out));
+    uint8_t* d = (uint8_t*)ray_data(out);
+    TEST_ASSERT_EQ_I(d[0], 1); /* alpha */
+    TEST_ASSERT_EQ_I(d[1], 0); /* "" */
+    TEST_ASSERT_EQ_I(d[2], 0); /* beta */
+    TEST_ASSERT_EQ_I(d[3], 0); /* "" */
+    TEST_ASSERT_EQ_I(d[4], 1); /* alpha (cached) */
+    ray_release(out);
+    ray_release(x);
+    ray_release(pat);
+    PASS();
+}
+
+/* --- like_fn: SYM-vec width matrix with out-of-range sid -------------
+ * Forces the out-of-range branch in DICT_PASS (line 297) and ROW_PASS
+ * (line 312, falling through to empty_match) for W8 (and by symmetry
+ * the W16/W32/W64 paths via the macro expansion).  Out-of-range for W8
+ * means sid >= dict_n in the global sym table; we set a sentinel byte
+ * value 254 (255 reserved by some platforms; 254 is safely > all
+ * interned ids in this test's setup). */
+static test_result_t test_like_fn_sym_vec_w8_out_of_range(void) {
+    int64_t a = ray_sym_intern("a", 1);
+    /* Build manually with a raw out-of-range byte sid (254). */
+    ray_t* x = ray_sym_vec_new(RAY_SYM_W8, 3);
+    TEST_ASSERT_NOT_NULL(x);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(x));
+    x->len = 3;
+    uint8_t* d = (uint8_t*)ray_data(x);
+    d[0] = (uint8_t)a;
+    d[1] = 254;          /* sid >= dict_n */
+    d[2] = (uint8_t)a;
+    TEST_ASSERT_EQ_U(x->attrs & RAY_SYM_W_MASK, RAY_SYM_W8);
+    /* Sanity: 254 must be out of dict range to drive the branch. */
+    TEST_ASSERT((uint64_t)ray_sym_count() < 254ULL,
+                "dict_n must be < 254 to drive the OOR branch");
+
+    /* "*" matches anything → empty_match==1, OOR row falls through to
+     * empty_match (line 314 in strop.c). */
+    ray_t* pat_any = ray_str("*", 1);
+    ray_t* out = ray_like_fn(x, pat_any);
+    TEST_ASSERT_NOT_NULL(out);
+    uint8_t* o = (uint8_t*)ray_data(out);
+    TEST_ASSERT_EQ_I(o[0], 1);
+    TEST_ASSERT_EQ_I(o[1], 1); /* OOR → empty_match=1 since "*" matches "" */
+    TEST_ASSERT_EQ_I(o[2], 1);
+    ray_release(out);
+    ray_release(pat_any);
+
+    /* Pattern that does NOT match empty: empty_match==0 → OOR row=0. */
+    ray_t* pat_a = ray_str("a", 1);
+    ray_t* out2 = ray_like_fn(x, pat_a);
+    TEST_ASSERT_NOT_NULL(out2);
+    uint8_t* o2 = (uint8_t*)ray_data(out2);
+    TEST_ASSERT_EQ_I(o2[0], 1); /* "a" matches "a" */
+    TEST_ASSERT_EQ_I(o2[1], 0); /* OOR → empty_match=0 since "a"!="" */
+    TEST_ASSERT_EQ_I(o2[2], 1);
+    ray_release(out2);
+    ray_release(pat_a);
+
+    ray_release(x);
+    PASS();
+}
+
+/* --- like_fn: SYM-vec — long pattern forces general matcher (use_simple=false)
+ * Triggers the `ray_glob_match` arm of DICT_PASS / ROW_PASS rather than
+ * the compiled fast path.  Pattern with an interior wildcard (`a*b*c`) has
+ * SHAPE_NONE → use_simple=false. */
+static test_result_t test_like_fn_sym_vec_general_matcher(void) {
+    int64_t s1 = ray_sym_intern("axbyc",   5);
+    int64_t s2 = ray_sym_intern("a-b-c",   5);
+    int64_t s3 = ray_sym_intern("nope",    4);
+    int64_t ids[4] = { s1, s2, s3, s1 };
+    ray_t* x = build_sym_vec(RAY_SYM_W8, ids, 4);
+    TEST_ASSERT_NOT_NULL(x);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(x));
+    TEST_ASSERT_EQ_U(x->attrs & RAY_SYM_W_MASK, RAY_SYM_W8);
+
+    /* Interior `*` between literal a/b/c → SHAPE_NONE, exercises
+     * ray_glob_match (general matcher) inside DICT_PASS. */
+    ray_t* pat = ray_str("a*b*c", 5);
+    ray_t* out = ray_like_fn(x, pat);
+    TEST_ASSERT_NOT_NULL(out);
+    TEST_ASSERT_FALSE(RAY_IS_ERR(out));
+    uint8_t* d = (uint8_t*)ray_data(out);
+    TEST_ASSERT_EQ_I(d[0], 1);
+    TEST_ASSERT_EQ_I(d[1], 1);
+    TEST_ASSERT_EQ_I(d[2], 0);
+    TEST_ASSERT_EQ_I(d[3], 1);
+    ray_release(out);
+    ray_release(x);
+    ray_release(pat);
+    PASS();
+}
+
+/* ══════════════════════════════════════════
+ * src/ops/glob.[ch] direct coverage — chunks 7-9
+ *
+ * These tests poke ray_glob_match{,_ci} and ray_glob_compile directly,
+ * driving cases that the higher-level rfl tests cannot reach precisely:
+ *   • RAY_GLOB_SHAPE_ANY through ray_glob_match_compiled
+ *   • RAY_GLOB_SHAPE_NONE default fall-through (caller-contract guard)
+ *   • match_class CI branches (line 36 ci=true)
+ * ══════════════════════════════════════════ */
+
+/* --- glob_match_compiled: SHAPE_ANY direct hit -----------------------
+ * Note: `*` alone compiles to SHAPE_SUFFIX (the trailing-star flag is
+ * suppressed when the same `*` is also the leading char — see
+ * glob.c:121-123).  To trigger SHAPE_ANY we need both a leading and
+ * trailing `*` with empty literal in between, i.e. `**`. */
+static test_result_t test_glob_match_compiled_shape_any(void) {
+    ray_glob_compiled_t pc = ray_glob_compile("**", 2);
+    TEST_ASSERT_EQ_I(pc.shape, RAY_GLOB_SHAPE_ANY);
+    /* SHAPE_ANY: every input matches, including empty. */
+    TEST_ASSERT_TRUE(ray_glob_match_compiled(&pc, "anything", 8));
+    TEST_ASSERT_TRUE(ray_glob_match_compiled(&pc, "", 0));
+
+    /* SHAPE_SUFFIX with empty lit (single `*`) hits the lit_len==0
+     * branch in case RAY_GLOB_SHAPE_SUFFIX (line 165 of glob.c). */
+    ray_glob_compiled_t pc_star = ray_glob_compile("*", 1);
+    TEST_ASSERT_EQ_I(pc_star.shape, RAY_GLOB_SHAPE_SUFFIX);
+    TEST_ASSERT_TRUE(ray_glob_match_compiled(&pc_star, "anything", 8));
+    TEST_ASSERT_TRUE(ray_glob_match_compiled(&pc_star, "", 0));
+    PASS();
+}
+
+/* --- glob_match_compiled: SHAPE_NONE caller-contract guard ----------
+ * Pattern with interior `*` → SHAPE_NONE.  Calling
+ * ray_glob_match_compiled with such a shape is a contract violation;
+ * the function must fall through to `return false` (line 196), not
+ * silently match everything. */
+static test_result_t test_glob_match_compiled_shape_none(void) {
+    ray_glob_compiled_t pc = ray_glob_compile("a*b*c", 5);
+    TEST_ASSERT_EQ_I(pc.shape, RAY_GLOB_SHAPE_NONE);
+    TEST_ASSERT_FALSE(ray_glob_match_compiled(&pc, "axbyc", 5));
+    TEST_ASSERT_FALSE(ray_glob_match_compiled(&pc, "anything", 8));
+    PASS();
+}
+
+/* --- glob_match_compiled: SHAPE_EXACT empty / non-empty -------------- */
+static test_result_t test_glob_match_compiled_shape_exact(void) {
+    ray_glob_compiled_t pc_empty = ray_glob_compile("", 0);
+    TEST_ASSERT_EQ_I(pc_empty.shape, RAY_GLOB_SHAPE_EXACT);
+    TEST_ASSERT_TRUE(ray_glob_match_compiled(&pc_empty, "", 0));
+    TEST_ASSERT_FALSE(ray_glob_match_compiled(&pc_empty, "x", 1));
+
+    ray_glob_compiled_t pc = ray_glob_compile("hello", 5);
+    TEST_ASSERT_EQ_I(pc.shape, RAY_GLOB_SHAPE_EXACT);
+    TEST_ASSERT_TRUE(ray_glob_match_compiled(&pc, "hello", 5));
+    TEST_ASSERT_FALSE(ray_glob_match_compiled(&pc, "hellx", 5));
+    TEST_ASSERT_FALSE(ray_glob_match_compiled(&pc, "hell", 4));
+    PASS();
+}
+
+/* --- glob_match_ci: case-insensitive class with mixed-case input ---
+ * Drives match_class with ci=true (line 34/36 in glob.c).  The
+ * ray_glob_match_compiled path skips classes (SHAPE_NONE forces general
+ * matcher), so we use ray_glob_match_ci which routes through glob_impl
+ * with ci=true. */
+static test_result_t test_glob_match_ci_class_branches(void) {
+    /* `[A-Z]ello` with ci=true must match lowercase 'h' too. */
+    TEST_ASSERT_TRUE (ray_glob_match_ci("hello", 5, "[A-Z]ello", 9));
+    TEST_ASSERT_TRUE (ray_glob_match_ci("Hello", 5, "[A-Z]ello", 9));
+    TEST_ASSERT_FALSE(ray_glob_match_ci("3ello", 5, "[A-Z]ello", 9));
+
+    /* Negated class with ci. */
+    TEST_ASSERT_TRUE (ray_glob_match_ci("3ello", 5, "[!A-Z]ello", 10));
+    TEST_ASSERT_FALSE(ray_glob_match_ci("hello", 5, "[!A-Z]ello", 10));
+    TEST_ASSERT_FALSE(ray_glob_match_ci("Hello", 5, "[!A-Z]ello", 10));
+
+    /* Single-char class — no range, ci-fold matters. */
+    TEST_ASSERT_TRUE(ray_glob_match_ci("Apple", 5, "[a]pple", 7));
+    TEST_ASSERT_TRUE(ray_glob_match_ci("apple", 5, "[A]pple", 7));
+
+    /* Mixed-case inside `[]` with ci: both cases match either input. */
+    TEST_ASSERT_TRUE(ray_glob_match_ci("Apple", 5, "[Aa]pple", 8));
+    TEST_ASSERT_TRUE(ray_glob_match_ci("apple", 5, "[Aa]pple", 8));
+    PASS();
+}
+
+/* --- glob_match: punctuation / digit / non-ascii classes ----------- */
+static test_result_t test_glob_match_class_edge(void) {
+    /* Digit range. */
+    TEST_ASSERT_TRUE (ray_glob_match("5", 1, "[0-9]", 5));
+    TEST_ASSERT_FALSE(ray_glob_match("a", 1, "[0-9]", 5));
+
+    /* Single-char class containing meta-char. */
+    TEST_ASSERT_TRUE (ray_glob_match("?", 1, "[?]", 3));
+    TEST_ASSERT_TRUE (ray_glob_match("*", 1, "[*]", 3));
+
+    /* Empty class-content — no chars between `[` and `]`.  After `[`
+     * the matcher loops while `first || p[i] != ']'`, sees `]` after
+     * first iteration… actually `[]` is the open bracket immediately
+     * followed by `]` — the matcher accepts `]` first then fails to
+     * find the closing `]`.  Documented behaviour: no match. */
+    TEST_ASSERT_FALSE(ray_glob_match("a", 1, "[]", 2));
+
+    /* ']' as first char of class is literal (allowed by spec). */
+    TEST_ASSERT_TRUE(ray_glob_match("]", 1, "[]]", 3));
+
+    /* Hyphen-trailing class `[a-]` — `-` is literal because there is
+     * no third char.  Loop hits the else branch (not a range). */
+    TEST_ASSERT_TRUE (ray_glob_match("-", 1, "[a-]", 4));
+    TEST_ASSERT_TRUE (ray_glob_match("a", 1, "[a-]", 4));
+    TEST_ASSERT_FALSE(ray_glob_match("b", 1, "[a-]", 4));
+
+    /* Range that does not match — exercises i+=3 with no match. */
+    TEST_ASSERT_FALSE(ray_glob_match("9", 1, "[a-z]", 5));
+
+    /* Unterminated class — implementation accepts the partial class
+     * up to end-of-pattern.  Documenting the behaviour, not enforcing
+     * a stricter contract. */
+    TEST_ASSERT_TRUE(ray_glob_match("a", 1, "[abc", 4));
+    PASS();
+}
+
 /* ---- Suite definition -------------------------------------------------- */
 
 
@@ -1793,6 +2206,23 @@ const test_entry_t sym_entries[] = {
     { "sym/like_fn/wrong_type",        test_like_fn_wrong_type,          sym_setup, sym_teardown },
     { "sym/like_fn/empty_pattern",     test_like_fn_empty_pattern,       sym_setup, sym_teardown },
 
+    /* SYM-LIKE width matrix and empty-SYM (chunks 1, 2, 5) */
+    { "sym/like_fn/sym_vec_w8",                 test_like_fn_sym_vec_w8,                 sym_setup, sym_teardown },
+    { "sym/like_fn/sym_vec_w16",                test_like_fn_sym_vec_w16,                sym_setup, sym_teardown },
+    { "sym/like_fn/sym_vec_w32",                test_like_fn_sym_vec_w32,                sym_setup, sym_teardown },
+    { "sym/like_fn/sym_vec_w8_empty_sym",       test_like_fn_sym_vec_w8_empty_sym,       sym_setup, sym_teardown },
+    { "sym/like_fn/sym_atom_empty",             test_like_fn_sym_atom_empty,             sym_setup, sym_teardown },
+    { "sym/like_fn/sym_vec_w64_zero",           test_like_fn_sym_vec_w64_zero,           sym_setup, sym_teardown },
+    { "sym/like_fn/sym_vec_w8_out_of_range",    test_like_fn_sym_vec_w8_out_of_range,    sym_setup, sym_teardown },
+    { "sym/like_fn/sym_vec_general_matcher",    test_like_fn_sym_vec_general_matcher,    sym_setup, sym_teardown },
+
+    /* glob.c direct (chunks 7-9) */
+    { "sym/glob/match_compiled_shape_any",      test_glob_match_compiled_shape_any,      sym_setup, sym_teardown },
+    { "sym/glob/match_compiled_shape_none",     test_glob_match_compiled_shape_none,     sym_setup, sym_teardown },
+    { "sym/glob/match_compiled_shape_exact",    test_glob_match_compiled_shape_exact,    sym_setup, sym_teardown },
+    { "sym/glob/match_ci_class_branches",       test_glob_match_ci_class_branches,       sym_setup, sym_teardown },
+    { "sym/glob/match_class_edge",              test_glob_match_class_edge,              sym_setup, sym_teardown },
+
     { NULL, NULL, NULL, NULL },
 };