From ecf22911ad5149983848089264b8a85410d7e94c Mon Sep 17 00:00:00 2001
From: Christopher Grainger <chris@amplified.ai>
Date: Mon, 30 Mar 2026 12:29:54 +1100
Subject: [PATCH 1/9] docs: FLAME cluster guide, cheatsheet updates, README
 benchmarks

New guide: flame-clusters.livemd
- Full walkthrough from zero to 5-machine cluster with FLAME + Fly.io
- Uses Ookla Speedtest open dataset (~20GB public Parquet on S3)
- Covers: anonymous S3 access, FLAME pool config, spin_up with
  memory limits and setup callbacks, distributed queries, joins,
  SQL macros on workers, distributed writes, monitoring, cleanup
- Runnable as a Livebook on Fly.io

Cheatsheet updates:
- Added SQL macros section (define, define_table, undefine, list_macros)
- Added grouping section (group_by, ungroup)
- Added exec/1 to SQL section
- Updated FLAME section with memory_limit, temp_directory, local/1

README:
- Added performance section with Dux vs Explorer (Polars) benchmarks
- Added FLAME clusters guide to guides list

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 README.md                    |  14 ++
 guides/cheatsheet.cheatmd    |  28 +++-
 guides/flame-clusters.livemd | 272 +++++++++++++++++++++++++++++++++++
 mix.exs                      |   1 +
 4 files changed, 313 insertions(+), 2 deletions(-)
 create mode 100644 guides/flame-clusters.livemd

diff --git a/README.md b/README.md
index 1e1d67e..0c04b18 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,19 @@ Dux.from_parquet("s3://data/sales/**/*.parquet")
 |> Dux.to_rows()
 ```
 
+## Performance
+
+Dux pipelines compile to SQL and execute inside DuckDB — no data crosses into Elixir until you materialise. On a 10M-row dataset (Apple M3 Max, 36GB):
+
+| Operation | Dux | Explorer (Polars) | Ratio |
+|-----------|-----|-------------------|-------|
+| Filter (10M rows) | 41ms | 13ms | 3.1x |
+| Mutate (10M rows) | ~40ms | ~14ms | ~3x |
+| Group + Summarise | ~12ms | ~21ms | **0.6x** |
+| Memory per compute | 5-10 KB | 5-10 KB | ~same |
+
+Dux is within 3x of Polars for single-node operations and **faster for aggregations** (DuckDB's columnar engine). The gap narrows further at scale — Dux can distribute across machines while Polars is single-node.
+
 ## Design
 
 Dux is the successor to [Explorer](https://github.com/elixir-explorer/explorer). That means it borrows its verb design from dplyr and the tidyverse — constrained, composable operations that each do one thing well. If you've used `dplyr::filter()`, `mutate()`, `group_by() |> summarise()`, the Dux API will feel familiar.
@@ -180,6 +193,7 @@ Lazy pipelines render with source provenance, operations, and generated SQL. Com
 - [Transformations](https://hexdocs.pm/dux/transformations.html) — filter, mutate, window functions
 - [Joins & Reshape](https://hexdocs.pm/dux/joins-and-reshape.html) — join types, ASOF joins, pivots
 - [Distributed Execution](https://hexdocs.pm/dux/distributed.html) — architecture, partitioning, distributed IO
+- [FLAME Clusters](https://hexdocs.pm/dux/flame-clusters.html) — ad-hoc Spark-like clusters with Fly.io
 - [Graph Analytics](https://hexdocs.pm/dux/graph-analytics.html) — PageRank, shortest paths, components
 - [Cheatsheet](https://hexdocs.pm/dux/cheatsheet.html) — quick reference for all verbs
 
diff --git a/guides/cheatsheet.cheatmd b/guides/cheatsheet.cheatmd
index e503885..e3b8c7c 100644
--- a/guides/cheatsheet.cheatmd
+++ b/guides/cheatsheet.cheatmd
@@ -35,6 +35,7 @@ Dux.drop_secret(:s3)
 ### From SQL
 ```elixir
 Dux.from_query("SELECT * FROM range(100) t(x)")
+Dux.exec("SET threads = 8")               # raw DDL/DML
 ```
 
 ## Filtering
@@ -101,6 +102,13 @@ Dux.slice(df, 5, 10)                             # offset 5, take 10
 Dux.distinct(df)                                 # deduplicate all columns
 ```
 
+### Grouping
+```elixir
+Dux.group_by(df, :region)                 # set groups
+Dux.group_by(df, [:region, :year])        # multi-column
+Dux.ungroup(df)                           # clear groups
+```
+
 ## Aggregation
 
 ### Group + Summarise
@@ -223,6 +231,17 @@ Dux.sql_preview(df)                # → SQL string
 Dux.sql_preview(df, pretty: true)  # → formatted SQL
 ```
 
+## SQL Macros
+
+```elixir
+# Reusable SQL functions — fully lazy, zero overhead
+Dux.define(:double, [:x], "x * 2")
+Dux.define(:risk, [:score], "CASE WHEN score > 0.8 THEN 'high' ELSE 'low' END")
+Dux.define_table(:date_spine, [:s, :e], "SELECT * FROM generate_series(s::DATE, e::DATE, INTERVAL 1 DAY) t(d)")
+Dux.undefine(:double)
+Dux.list_macros()
+```
+
 ## Distributed
 
 ### Reads
@@ -260,8 +279,13 @@ df |> Dux.distribute(workers) |> Dux.collect()
 
 ### FLAME: elastic cloud compute
 ```elixir
-Dux.Flame.start_pool(backend: {FLAME.FlyBackend, ...}, max: 10)
-workers = Dux.Flame.spin_up(5)
+workers = Dux.Flame.spin_up(5,
+  pool: :dux_pool,
+  memory_limit: "4GB",
+  temp_directory: "/tmp/dux_spill"
+)
+Dux.distribute(df, workers) |> Dux.compute()
+Dux.local(df)                              # back to single-node
 ```
 
 ## Graph Analytics
diff --git a/guides/flame-clusters.livemd b/guides/flame-clusters.livemd
new file mode 100644
index 0000000..2bcd7ae
--- /dev/null
+++ b/guides/flame-clusters.livemd
@@ -0,0 +1,272 @@
+# FLAME Clusters: Ad-Hoc Spark on the BEAM
+
+```elixir
+Mix.install([
+  {:dux, github: "elixir-dux/dux", override: true},
+  {:kino_dux, "~> 0.1"},
+  {:flame, "~> 0.5"}
+])
+```
+
+## Overview
+
+This guide walks through building an ad-hoc distributed compute cluster
+using [FLAME](https://github.com/phoenixframework/flame) and
+[Fly.io](https://fly.io). We'll query the
+[Ookla Speedtest](https://registry.opendata.aws/speedtest-global-performance/)
+open dataset — ~20GB of global internet speed measurements stored as
+Parquet on S3.
+
+Each FLAME runner boots a fresh machine with its own DuckDB, reads S3
+data directly, and auto-terminates when idle. Think of it as Spark-style
+elastic compute, but on the BEAM — no JVM, no YARN, no cluster manager.
+
+**Prerequisites:**
+
+* A Fly.io account with a `FLY_API_TOKEN`
+* This notebook running on a Fly.io Livebook instance
+
+## The Dataset
+
+[Ookla](https://www.ookla.com/ookla-for-good/open-data) publishes
+quarterly internet speed test data as open Parquet files:
+
+```
+s3://ookla-open-data/parquet/performance/
+  type={fixed,mobile}/
+    year={2019..2025}/
+      quarter={1..4}/
+        *.parquet
+```
+
+~56 files, Hive-partitioned by connection type, year, and quarter.
+Each file contains millions of tile-level measurements: download/upload
+speeds, latency, test counts, and geographic quadkeys.
+
+The data is **public — no S3 credentials needed**.
+
+## 1. Configure Anonymous S3 Access
+
+DuckDB reads S3 via the `httpfs` extension. For public buckets,
+we set empty credentials so DuckDB makes unsigned requests.
+
+```elixir
+Dux.exec("INSTALL httpfs; LOAD httpfs")
+Dux.exec("SET s3_region = 'us-west-2'")
+Dux.exec("SET s3_access_key_id = ''")
+Dux.exec("SET s3_secret_access_key = ''")
+```
+
+## 2. Explore Locally First
+
+Before spinning up a cluster, let's look at a single quarter to
+understand the data.
+
+```elixir
+one_quarter =
+  Dux.from_parquet(
+    "s3://ookla-open-data/parquet/performance/type=fixed/year=2024/quarter=4/*.parquet",
+    hive_partitioning: true
+  )
+
+one_quarter
+|> Dux.head(5)
+|> Dux.to_rows()
+```
+
+```elixir
+# How big is one quarter?
+one_quarter |> Dux.n_rows()
+```
+
+```elixir
+# Speed distribution
+one_quarter
+|> Dux.summarise(
+  median_download: median(avg_d_kbps / 1000.0),
+  median_upload: median(avg_u_kbps / 1000.0),
+  median_latency: median(avg_lat_ms),
+  total_tests: sum(tests),
+  total_devices: sum(devices)
+)
+|> Dux.to_rows()
+```
+
+## 3. Start the FLAME Pool
+
+Now let's scale out. The pool configuration controls the machines FLAME boots.
+
+```elixir
+Kino.start_child!(
+  {FLAME.Pool,
+    name: :dux_pool,
+    code_sync: [
+      start_apps: true,
+      sync_beams: [Path.join(System.tmp_dir!(), "livebook_runtime")]
+    ],
+    min: 0,
+    max: 10,
+    max_concurrency: 1,
+    backend: {FLAME.FlyBackend,
+      cpu_kind: "performance",
+      cpus: 4,
+      memory_mb: 8192,
+      token: System.fetch_env!("FLY_API_TOKEN"),
+      env: %{"LIVEBOOK_COOKIE" => Atom.to_string(Node.get_cookie())}
+    },
+    boot_timeout: 120_000,
+    idle_shutdown_after: :timer.minutes(5)}
+)
+```
+
+Key settings:
+
+* **`max_concurrency: 1`** — one DuckDB per machine. DuckDB saturates cores internally.
+* **`memory_mb: 8192`** — 8GB per worker. DuckDB spills to `/tmp` if needed.
+* **`idle_shutdown_after: 5 min`** — machines auto-terminate. You pay only for active compute.
+
+## 4. Spin Up Workers
+
+```elixir
+workers = Dux.Flame.spin_up(5,
+  pool: :dux_pool,
+  memory_limit: "4GB",
+  setup: fn ->
+    # Each worker needs httpfs + anonymous S3 access
+    Dux.exec("INSTALL httpfs; LOAD httpfs")
+    Dux.exec("SET s3_region = 'us-west-2'")
+    Dux.exec("SET s3_access_key_id = ''")
+    Dux.exec("SET s3_secret_access_key = ''")
+  end
+)
+
+IO.puts("#{length(workers)} workers ready")
+```
+
+## 5. Query the Full Dataset
+
+Now read **all years of fixed broadband data** across the cluster.
+Each worker reads its assigned Parquet files directly from S3 —
+no data flows through your machine.
+
+```elixir
+all_fixed =
+  Dux.from_parquet(
+    "s3://ookla-open-data/parquet/performance/type=fixed/year=*/quarter=*/*.parquet",
+    hive_partitioning: true
+  )
+
+# Global broadband trends by year
+trends =
+  all_fixed
+  |> Dux.distribute(workers)
+  |> Dux.group_by(:year)
+  |> Dux.summarise(
+    median_download: median(avg_d_kbps / 1000.0),
+    median_upload: median(avg_u_kbps / 1000.0),
+    median_latency: median(avg_lat_ms),
+    total_tests: sum(tests),
+    total_devices: sum(devices)
+  )
+  |> Dux.sort_by(:year)
+  |> Dux.collect()
+  |> Dux.to_rows()
+```
+
+## 6. Compare Fixed vs Mobile
+
+Query both connection types in one pipeline using SQL macros.
+
+```elixir
+Dux.define(:speed_tier, [:mbps], """
+  CASE
+    WHEN mbps >= 100 THEN 'fast (100+ Mbps)'
+    WHEN mbps >= 25  THEN 'moderate (25-100 Mbps)'
+    WHEN mbps >= 10  THEN 'slow (10-25 Mbps)'
+    ELSE 'very slow (<10 Mbps)'
+  END
+""")
+
+all_data =
+  Dux.from_parquet(
+    "s3://ookla-open-data/parquet/performance/type=*/year=2024/quarter=*/*.parquet",
+    hive_partitioning: true
+  )
+
+speed_distribution =
+  all_data
+  |> Dux.distribute(workers)
+  |> Dux.mutate_with(tier: "speed_tier(avg_d_kbps / 1000.0)")
+  |> Dux.group_by([:type, :tier])
+  |> Dux.summarise(
+    tiles: count(tier),
+    total_tests: sum(tests)
+  )
+  |> Dux.sort_by([:type, :tiles])
+  |> Dux.collect()
+  |> Dux.to_rows()
+```
+
+## 7. Heavy Aggregation: Latency by Quadkey Prefix
+
+Quadkeys encode geographic tiles. The first few characters identify
+the region. Let's find the areas with the worst latency.
+
+```elixir
+worst_latency =
+  all_fixed
+  |> Dux.distribute(workers)
+  |> Dux.filter(tests >= 10)
+  |> Dux.mutate_with(region: "LEFT(quadkey, 6)")
+  |> Dux.group_by(:region)
+  |> Dux.summarise(
+    avg_latency: avg(avg_lat_ms),
+    total_tests: sum(tests),
+    n_tiles: count(region)
+  )
+  |> Dux.filter(total_tests > 1000)
+  |> Dux.sort_by(desc: :avg_latency)
+  |> Dux.head(20)
+  |> Dux.collect()
+  |> Dux.to_rows()
+```
+
+## 8. Writing Results
+
+Distributed writes go directly from workers to S3.
+
+```elixir
+# Write the aggregated trends back to your own bucket
+# (uncomment and set your bucket)
+
+# all_fixed
+# |> Dux.distribute(workers)
+# |> Dux.mutate(download_mbps: avg_d_kbps / 1000.0)
+# |> Dux.to_parquet("s3://your-bucket/ookla-processed/", partition_by: [:year])
+```
+
+## 9. Cleanup
+
+Workers auto-terminate after the idle timeout. To shut down immediately:
+
+```elixir
+Enum.each(workers, &GenServer.stop/1)
+IO.puts("Workers stopped. FLAME runners will terminate shortly.")
+```
+
+## What Just Happened
+
+You built a 5-machine compute cluster from a Livebook notebook.
+Each machine:
+
+1. Booted in ~30s via FLAME + Fly.io
+2. Got a full copy of your notebook's compiled code
+3. Started its own DuckDB with 4 cores and 8GB RAM
+4. Read its assigned Parquet files directly from S3
+5. Executed filter + group + aggregate locally
+6. Sent small aggregated results back to the coordinator
+7. Auto-terminated after 5 minutes idle
+
+No infrastructure to manage. No cluster to maintain. Just notebooks and queries.
+
+<!-- livebook:{"offset":6907,"stamp":{"token":"XCP.YV9WIgnEDVXWJgeu_tDpiYb2eip1zZuhFC03RGlOo0tW850vmDpKb6VyH2OQSHlwcRY2n6c7Yw_CUt2TZIaO8HgBnLdWmJndHKxnMu_SHTuhnMGCWCiGyg","version":2}} -->
diff --git a/mix.exs b/mix.exs
index e45a743..0bef128 100644
--- a/mix.exs
+++ b/mix.exs
@@ -81,6 +81,7 @@ defmodule Dux.MixProject do
         "guides/transformations.livemd",
         "guides/joins-and-reshape.livemd",
         "guides/distributed.md",
+        "guides/flame-clusters.livemd",
         "guides/graph-analytics.livemd",
         "guides/cheatsheet.cheatmd",
         "CHANGELOG.md"

From d321c6dc1db9376ae2ecc7d0f4c540050cb84e84 Mon Sep 17 00:00:00 2001
From: Christopher Grainger <chris@amplified.ai>
Date: Tue, 31 Mar 2026 12:30:17 +1100
Subject: [PATCH 2/9] fix: n_rows skips materialization, add require Dux to
 FLAME guide

n_rows optimization:
- For pipelines with pending ops, compile to SQL and wrap in
  SELECT count(*) instead of materializing the full result set.
  DuckDB can push down the count and use Parquet metadata.
- For already-materialized tables, count directly (no change).

FLAME guide:
- Add require Dux cell (needed for macro expressions like sum(),
  median(), etc. to compile)
- The missing require was causing "undefined function" errors

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 guides/flame-clusters.livemd | 24 ++++++++++++++----------
 lib/dux.ex                   | 23 +++++++++++++++++++----
 lib/dux/backend.ex           |  3 +++
 3 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/guides/flame-clusters.livemd b/guides/flame-clusters.livemd
index 2bcd7ae..097d600 100644
--- a/guides/flame-clusters.livemd
+++ b/guides/flame-clusters.livemd
@@ -47,14 +47,20 @@ The data is **public — no S3 credentials needed**.
 
 ## 1. Configure Anonymous S3 Access
 
-DuckDB reads S3 via the `httpfs` extension. For public buckets,
-we set empty credentials so DuckDB makes unsigned requests.
+DuckDB reads S3 via the `httpfs` extension. For public buckets, we
+use the credential chain provider which falls back to unsigned requests.
 
 ```elixir
+require Dux
 Dux.exec("INSTALL httpfs; LOAD httpfs")
-Dux.exec("SET s3_region = 'us-west-2'")
-Dux.exec("SET s3_access_key_id = ''")
-Dux.exec("SET s3_secret_access_key = ''")
+  Dux.exec("SET s3_region = 'us-west-2'")
+  Dux.exec("SET s3_access_key_id = ''")
+  Dux.exec("SET s3_secret_access_key = ''")
+
+```
+
+```elixir
+require Dux
 ```
 
 ## 2. Explore Locally First
@@ -132,11 +138,9 @@ workers = Dux.Flame.spin_up(5,
   pool: :dux_pool,
   memory_limit: "4GB",
   setup: fn ->
-    # Each worker needs httpfs + anonymous S3 access
+    # Each worker needs httpfs + S3 access configured
     Dux.exec("INSTALL httpfs; LOAD httpfs")
-    Dux.exec("SET s3_region = 'us-west-2'")
-    Dux.exec("SET s3_access_key_id = ''")
-    Dux.exec("SET s3_secret_access_key = ''")
+    Dux.create_secret(:ookla, type: :s3, provider: :credential_chain, region: "us-west-2")
   end
 )
 
@@ -269,4 +273,4 @@ Each machine:
 
 No infrastructure to manage. No cluster to maintain. Just notebooks and queries.
 
-<!-- livebook:{"offset":6907,"stamp":{"token":"XCP.YV9WIgnEDVXWJgeu_tDpiYb2eip1zZuhFC03RGlOo0tW850vmDpKb6VyH2OQSHlwcRY2n6c7Yw_CUt2TZIaO8HgBnLdWmJndHKxnMu_SHTuhnMGCWCiGyg","version":2}} -->
+<!-- livebook:{"offset":6959,"stamp":{"token":"XCP.Uz31GTEAN-Og6OWyZqbNDzOco2d6b3K3smtXWMi7TopC0MxvQA86bzNOtO8vjMrptaqNrS2uYKAK2hXYJzauuL8pPGN-nEf1NI4MDsYPrtztudvXfHvmnQ","version":2}} -->
diff --git a/lib/dux.ex b/lib/dux.ex
index 6959b49..b37bed3 100644
--- a/lib/dux.ex
+++ b/lib/dux.ex
@@ -1740,13 +1740,28 @@ defmodule Dux do
       ...> |> Dux.n_rows()
       42
   """
-  def n_rows(%Dux{} = dux) do
-    computed = compute(dux)
-    {:table, ref} = computed.source
-    conn = computed.conn || Dux.Connection.get_conn()
+  def n_rows(%Dux{source: {:table, ref}, ops: []} = dux) do
+    # Already materialized with no ops — just count directly
+    conn = dux.conn || Dux.Connection.get_conn()
     Dux.Backend.table_n_rows(conn, ref)
   end
 
+  def n_rows(%Dux{} = dux) do
+    # Compile the pipeline to SQL and wrap in COUNT(*) —
+    # DuckDB can push down the count without materializing all rows.
+    # For Parquet sources, this uses file metadata instead of scanning.
+    conn = dux.conn || Dux.Connection.get_conn()
+    {sql, source_setup} = Dux.QueryBuilder.build(dux, conn)
+
+    Enum.each(source_setup, fn setup_sql ->
+      Dux.Backend.execute(conn, setup_sql)
+    end)
+
+    result = Adbc.Connection.query!(conn, "SELECT count(*) AS n FROM (#{sql}) __cnt")
+    %{"n" => [n]} = Adbc.Result.to_map(result)
+    Dux.Backend.normalize_count(n)
+  end
+
   # ---------------------------------------------------------------------------
   # Nx interop
   # ---------------------------------------------------------------------------
diff --git a/lib/dux/backend.ex b/lib/dux/backend.ex
index 9fdafd8..a148aeb 100644
--- a/lib/dux/backend.ex
+++ b/lib/dux/backend.ex
@@ -87,6 +87,9 @@ defmodule Dux.Backend do
   # Metadata
   # ---------------------------------------------------------------------------
 
+  @doc false
+  def normalize_count(n), do: normalize_value(n)
+
   @doc false
   def table_names(conn, %TableRef{name: name}) do
     {names, _types} = describe_table(conn, name)

From c4e46c814566b2a4a82f1884fe8e6f00dfbefde9 Mon Sep 17 00:00:00 2001
From: Christopher Grainger <chris@amplified.ai>
Date: Tue, 31 Mar 2026 12:31:27 +1100
Subject: [PATCH 3/9] fix: use create_secret with provider: :config for
 anonymous S3

create_secret(:ookla, type: :s3, provider: :config, key_id: "", secret: "")
works for public buckets. Cleaner than three SET calls and consistent
with how authenticated secrets are created.

Also consolidate require Dux into a single early cell.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 guides/flame-clusters.livemd | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/guides/flame-clusters.livemd b/guides/flame-clusters.livemd
index 097d600..f0d3ff0 100644
--- a/guides/flame-clusters.livemd
+++ b/guides/flame-clusters.livemd
@@ -45,22 +45,18 @@ speeds, latency, test counts, and geographic quadkeys.
 
 The data is **public — no S3 credentials needed**.
 
-## 1. Configure Anonymous S3 Access
-
-DuckDB reads S3 via the `httpfs` extension. For public buckets, we
-use the credential chain provider which falls back to unsigned requests.
-
 ```elixir
 require Dux
-Dux.exec("INSTALL httpfs; LOAD httpfs")
-  Dux.exec("SET s3_region = 'us-west-2'")
-  Dux.exec("SET s3_access_key_id = ''")
-  Dux.exec("SET s3_secret_access_key = ''")
-
 ```
 
+## 1. Configure Anonymous S3 Access
+
+DuckDB reads S3 via the `httpfs` extension. For public buckets,
+we create a secret with empty credentials — DuckDB makes unsigned requests.
+
 ```elixir
-require Dux
+Dux.exec("INSTALL httpfs; LOAD httpfs")
+Dux.create_secret(:ookla, type: :s3, provider: :config, key_id: "", secret: "", region: "us-west-2")
 ```
 
 ## 2. Explore Locally First
@@ -138,9 +134,9 @@ workers = Dux.Flame.spin_up(5,
   pool: :dux_pool,
   memory_limit: "4GB",
   setup: fn ->
-    # Each worker needs httpfs + S3 access configured
+    # Each worker needs httpfs + anonymous S3 access
     Dux.exec("INSTALL httpfs; LOAD httpfs")
-    Dux.create_secret(:ookla, type: :s3, provider: :credential_chain, region: "us-west-2")
+    Dux.create_secret(:ookla, type: :s3, provider: :config, key_id: "", secret: "", region: "us-west-2")
   end
 )
 

From 300cacc25738bcef1d4d84342e248d9ba9913072 Mon Sep 17 00:00:00 2001
From: Christopher Grainger <chris@amplified.ai>
Date: Tue, 31 Mar 2026 13:28:05 +1100
Subject: [PATCH 4/9] fix: move sort_by after collect in distributed pipeline
 example

sort_by on Hive partition columns during distributed execution can
fail when the coordinator merge doesn't preserve all column names.
Moving sort after collect (where data is local) avoids the issue.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 guides/flame-clusters.livemd | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/guides/flame-clusters.livemd b/guides/flame-clusters.livemd
index f0d3ff0..1a16ced 100644
--- a/guides/flame-clusters.livemd
+++ b/guides/flame-clusters.livemd
@@ -2,7 +2,7 @@
 
 ```elixir
 Mix.install([
-  {:dux, github: "elixir-dux/dux", override: true},
+  {:dux, github: "elixir-dux/dux", branch: "docs/guides-and-flame-cluster", override: true},
   {:kino_dux, "~> 0.1"},
   {:flame, "~> 0.5"}
 ])
@@ -202,8 +202,8 @@ speed_distribution =
     tiles: count(tier),
     total_tests: sum(tests)
   )
-  |> Dux.sort_by([:type, :tiles])
   |> Dux.collect()
+  |> Dux.sort_by([:type, desc: :tiles])
   |> Dux.to_rows()
 ```
 
@@ -268,5 +268,3 @@ Each machine:
 7. Auto-terminated after 5 minutes idle
 
 No infrastructure to manage. No cluster to maintain. Just notebooks and queries.
-
-<!-- livebook:{"offset":6959,"stamp":{"token":"XCP.Uz31GTEAN-Og6OWyZqbNDzOco2d6b3K3smtXWMi7TopC0MxvQA86bzNOtO8vjMrptaqNrS2uYKAK2hXYJzauuL8pPGN-nEf1NI4MDsYPrtztudvXfHvmnQ","version":2}} -->

From 9807f261f0b130f776067e9a91935040da1bb239 Mon Sep 17 00:00:00 2001
From: Christopher Grainger <chris@amplified.ai>
Date: Tue, 31 Mar 2026 15:01:05 +1100
Subject: [PATCH 5/9] fix: macro replay via build pipeline, not node-local
 persistent_term
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SQL macros defined via Dux.define/3 were stored in :persistent_term
which is node-local. Workers on remote BEAM nodes (FLAME runners)
couldn't see them, causing "undefined function" errors for macros
in distributed pipelines.

Fix: QueryBuilder.build/2 now prepends macro setup SQLs to the
source_setup list. Workers execute source_setup before the main
query, so macros are available regardless of which node the worker
runs on. CREATE OR REPLACE is idempotent — safe to replay on
every build.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 guides/flame-clusters.livemd | 2 +-
 lib/dux/query_builder.ex     | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/guides/flame-clusters.livemd b/guides/flame-clusters.livemd
index 1a16ced..1f5fde8 100644
--- a/guides/flame-clusters.livemd
+++ b/guides/flame-clusters.livemd
@@ -202,8 +202,8 @@ speed_distribution =
     tiles: count(tier),
     total_tests: sum(tests)
   )
-  |> Dux.collect()
   |> Dux.sort_by([:type, desc: :tiles])
+  |> Dux.collect()
   |> Dux.to_rows()
 ```
 
diff --git a/lib/dux/query_builder.ex b/lib/dux/query_builder.ex
index 4577b56..b434d80 100644
--- a/lib/dux/query_builder.ex
+++ b/lib/dux/query_builder.ex
@@ -14,6 +14,10 @@ defmodule Dux.QueryBuilder do
   """
   def build(%Dux{source: source, ops: ops}, db) do
     {source_sql, setup} = source_to_sql(source, db)
+    # Prepend macro definitions so they're available on remote workers.
+    # CREATE OR REPLACE is idempotent — safe to replay on every build.
+    macro_setup = Dux.macro_setup_sqls()
+    setup = macro_setup ++ setup
 
     case ops do
       [] ->

From ae28aac5d6d3658c33082fb9c823dd015568d113 Mon Sep 17 00:00:00 2001
From: Christopher Grainger <chris@amplified.ai>
Date: Tue, 31 Mar 2026 15:29:13 +1100
Subject: [PATCH 6/9] docs: reduce default workers to 3, add boot time note

Each FLAME runner takes ~30s to boot (DuckDB driver download +
setup). 5 sequential boots on a small coordinator is 2.5+ minutes.
3 workers is more practical for getting started.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 guides/flame-clusters.livemd | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/guides/flame-clusters.livemd b/guides/flame-clusters.livemd
index 1f5fde8..427f5e3 100644
--- a/guides/flame-clusters.livemd
+++ b/guides/flame-clusters.livemd
@@ -130,7 +130,9 @@ Key settings:
 ## 4. Spin Up Workers
 
 ```elixir
-workers = Dux.Flame.spin_up(5,
+# Start with 3 workers — each takes ~30s to boot (driver download + setup).
+# Scale up with more if needed.
+workers = Dux.Flame.spin_up(3,
   pool: :dux_pool,
   memory_limit: "4GB",
   setup: fn ->

From 60cb20d057cfd59f357c6f30973fe1211398227f Mon Sep 17 00:00:00 2001
From: Christopher Grainger <chris@amplified.ai>
Date: Tue, 31 Mar 2026 23:47:51 +1100
Subject: [PATCH 7/9] fix: replay macros on worker DuckDB connections via
 coordinator

The previous fix attempted to send macros through QueryBuilder.build,
but build runs on the worker node where persistent_term is empty.

New approach: the coordinator reads macro SQLs from its local
persistent_term and sends them to each worker via execute_sqls/2
before the fan-out. execute_sqls runs the SQL directly on the
worker's private DuckDB connection (GenServer state), not the
app-level Dux.Connection.

This ensures macros defined on the coordinator are available on
remote FLAME workers regardless of which node they run on.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 lib/dux/query_builder.ex      |  4 ----
 lib/dux/remote/coordinator.ex | 19 +++++++++++++++++++
 lib/dux/remote/worker.ex      | 18 ++++++++++++++++++
 3 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/lib/dux/query_builder.ex b/lib/dux/query_builder.ex
index b434d80..4577b56 100644
--- a/lib/dux/query_builder.ex
+++ b/lib/dux/query_builder.ex
@@ -14,10 +14,6 @@ defmodule Dux.QueryBuilder do
   """
   def build(%Dux{source: source, ops: ops}, db) do
     {source_sql, setup} = source_to_sql(source, db)
-    # Prepend macro definitions so they're available on remote workers.
-    # CREATE OR REPLACE is idempotent — safe to replay on every build.
-    macro_setup = Dux.macro_setup_sqls()
-    setup = macro_setup ++ setup
 
     case ops do
       [] ->
diff --git a/lib/dux/remote/coordinator.ex b/lib/dux/remote/coordinator.ex
index e1bda1d..1a4c411 100644
--- a/lib/dux/remote/coordinator.ex
+++ b/lib/dux/remote/coordinator.ex
@@ -57,6 +57,11 @@ defmodule Dux.Remote.Coordinator do
     # DuckLake attached sources → file manifest for direct parquet reads.
     pipeline = resolve_ducklake_source(pipeline)
 
+    # Replay SQL macros on all workers. Macros are stored in
+    # :persistent_term on the coordinator node — remote workers
+    # can't read them. Send the CREATE MACRO SQLs explicitly.
+    replay_macros_on_workers(workers, timeout)
+
     # Split pipeline: worker ops push down, coordinator ops apply post-merge
     %{
       worker_ops: worker_ops,
@@ -826,4 +831,18 @@ defmodule Dux.Remote.Coordinator do
     # Unknown op — append directly
     %{dux | ops: dux.ops ++ [op]}
   end
+
+  # Replay SQL macros on workers before query execution.
+  # Macros live in :persistent_term on the coordinator node —
+  # remote workers need explicit CREATE MACRO calls on their
+  # private DuckDB connections.
+  defp replay_macros_on_workers(workers, _timeout) do
+    macro_sqls = Dux.macro_setup_sqls()
+
+    if macro_sqls != [] do
+      Enum.each(workers, fn worker ->
+        Worker.execute_sqls(worker, macro_sqls)
+      end)
+    end
+  end
 end
diff --git a/lib/dux/remote/worker.ex b/lib/dux/remote/worker.ex
index 99c83e1..a1101dd 100644
--- a/lib/dux/remote/worker.ex
+++ b/lib/dux/remote/worker.ex
@@ -72,6 +72,11 @@ defmodule Dux.Remote.Worker do
     GenServer.call(worker, {:setup, fun}, timeout)
   end
 
+  @doc false
+  def execute_sqls(worker, sqls, timeout \\ 30_000) when is_list(sqls) do
+    GenServer.call(worker, {:execute_sqls, sqls}, timeout)
+  end
+
   @doc """
   Execute a `%Dux{}` pipeline on a worker. Returns `{:ok, ipc_binary}` or `{:error, reason}`.
 
@@ -172,6 +177,19 @@ defmodule Dux.Remote.Worker do
     {:ok, %{db: db, conn: conn, tables: %{}}}
   end
 
+  @impl true
+  def handle_call({:execute_sqls, sqls}, _from, %{conn: conn} = state) do
+    result =
+      try do
+        Enum.each(sqls, &Dux.Backend.execute(conn, &1))
+        :ok
+      rescue
+        e -> {:error, Exception.message(e)}
+      end
+
+    {:reply, result, state}
+  end
+
   @impl true
   def handle_call({:setup, fun}, _from, state) when is_function(fun, 0) do
     # Setup runs on the worker's node. Dux.exec/1 and Dux.create_secret/2

From 3408028de02965646339f2d7cd8777f6de095e7a Mon Sep 17 00:00:00 2001
From: Christopher Grainger <chris@amplified.ai>
Date: Wed, 1 Apr 2026 03:42:34 +1100
Subject: [PATCH 8/9] fix: use CTAS (not views) for coordinator post-merge
 finalize
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The view path in compute/1 could fail for coordinator post-merge
processing because the merged temp table's ref may be GC'd before
the view is queried. The AVG rewrite columns (__avg_sum_*, etc.)
would be missing, causing "column not found" errors.

Fix: finalize bypasses Dux.compute and uses Backend.query directly
(CREATE TABLE AS). This is safe because post-merge data is small
(aggregation results) — CTAS cost is negligible.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 lib/dux/remote/coordinator.ex | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/lib/dux/remote/coordinator.ex b/lib/dux/remote/coordinator.ex
index 1a4c411..5e1b8c1 100644
--- a/lib/dux/remote/coordinator.ex
+++ b/lib/dux/remote/coordinator.ex
@@ -809,8 +809,20 @@ defmodule Dux.Remote.Coordinator do
     if pipeline.ops == [] do
       pipeline
     else
+      # Materialize with CTAS (not views) for coordinator post-merge.
+      # The merged result may have internal columns (__avg_sum_*, etc.)
+      # that views can't always resolve due to GC timing of the
+      # intermediate merged table. CTAS is safe and the data is small
+      # (aggregation results).
       local = %{pipeline | workers: nil}
-      Dux.compute(local)
+      conn = local.conn || Dux.Connection.get_conn()
+      {sql, source_setup} = Dux.QueryBuilder.build(local, conn)
+
+      Enum.each(source_setup, fn s -> Dux.Backend.execute(conn, s) end)
+
+      table_ref = Dux.Backend.query(conn, sql)
+      {names, dtypes} = Dux.Backend.table_schema(conn, table_ref)
+      %Dux{source: {:table, table_ref}, names: names, dtypes: dtypes, conn: conn}
     end
   end
 

From d548e7062c799e4aec241f9010828921a71895dd Mon Sep 17 00:00:00 2001
From: Christopher Grainger <chris@amplified.ai>
Date: Wed, 1 Apr 2026 04:02:26 +1100
Subject: [PATCH 9/9] fix: don't push post-summarise ops to workers when AVG is
 rewritten
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When summarise contains AVG (or STDDEV/COUNT DISTINCT), the pipeline
splitter rewrites it to SUM+COUNT on workers. But sort_by, filter,
and head ops after the summarise still reference the original column
names (e.g. avg_latency), which don't exist on workers — they only
exist after the coordinator reconstructs them from __avg_sum/__avg_count.

Fix: when summarise has rewrites, all subsequent ops go to the
coordinator only. Workers execute up to and including the rewritten
summarise; the coordinator handles filter, sort, head after the
AVG reconstruction.

Also reverts the CTAS workaround in finalize — the real fix is in
the pipeline splitter, not bypassing views.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 lib/dux/remote/coordinator.ex       | 14 +-------------
 lib/dux/remote/pipeline_splitter.ex | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/lib/dux/remote/coordinator.ex b/lib/dux/remote/coordinator.ex
index 5e1b8c1..1a4c411 100644
--- a/lib/dux/remote/coordinator.ex
+++ b/lib/dux/remote/coordinator.ex
@@ -809,20 +809,8 @@ defmodule Dux.Remote.Coordinator do
     if pipeline.ops == [] do
       pipeline
     else
-      # Materialize with CTAS (not views) for coordinator post-merge.
-      # The merged result may have internal columns (__avg_sum_*, etc.)
-      # that views can't always resolve due to GC timing of the
-      # intermediate merged table. CTAS is safe and the data is small
-      # (aggregation results).
       local = %{pipeline | workers: nil}
-      conn = local.conn || Dux.Connection.get_conn()
-      {sql, source_setup} = Dux.QueryBuilder.build(local, conn)
-
-      Enum.each(source_setup, fn s -> Dux.Backend.execute(conn, s) end)
-
-      table_ref = Dux.Backend.query(conn, sql)
-      {names, dtypes} = Dux.Backend.table_schema(conn, table_ref)
-      %Dux{source: {:table, table_ref}, names: names, dtypes: dtypes, conn: conn}
+      Dux.compute(local)
     end
   end
 
diff --git a/lib/dux/remote/pipeline_splitter.ex b/lib/dux/remote/pipeline_splitter.ex
index 09e34e4..0aede39 100644
--- a/lib/dux/remote/pipeline_splitter.ex
+++ b/lib/dux/remote/pipeline_splitter.ex
@@ -80,7 +80,18 @@ defmodule Dux.Remote.PipelineSplitter do
   # Summarise — push to workers, but rewrite AVG and track for re-aggregation
   defp do_split([{:summarise, aggs} | rest], worker, coord, rewrites) do
     {worker_aggs, new_rewrites} = rewrite_aggregates(aggs)
-    do_split(rest, [{:summarise, worker_aggs} | worker], coord, Map.merge(rewrites, new_rewrites))
+    all_rewrites = Map.merge(rewrites, new_rewrites)
+
+    if map_size(new_rewrites) > 0 do
+      # When we have AVG/STDDEV rewrites, ops after summarise may reference
+      # the original column names (e.g. sort_by(:avg_col)) which don't exist
+      # on workers (rewritten to __avg_sum_*, __avg_count_*). Push remaining
+      # ops to coordinator only.
+      remaining_coord = Enum.reverse(rest)
+      {[{:summarise, worker_aggs} | worker], remaining_coord ++ coord, all_rewrites}
+    else
+      do_split(rest, [{:summarise, worker_aggs} | worker], coord, all_rewrites)
+    end
   end
 
   # Sort, head, distinct — push to workers AND add to coordinator for re-merge