From 766a0cca1d24dae6c4ad6373a5d9fda09db2b85b Mon Sep 17 00:00:00 2001
From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com>
Date: Thu, 14 May 2026 09:40:29 +0200
Subject: [PATCH 1/2] =?UTF-8?q?Fix=20#933=20=E2=80=94=20detect=20compactio?=
 =?UTF-8?q?n=20exclude-columns=20per=20merge=20step?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CompactParquetFiles detected CompactionExcludeColumns once, globally,
across the union schema of every source file in a group. It then applied
that "* EXCLUDE (col)" clause to each pair in the pairwise merge.

query_plan_text was added to query_store_stats in migration v13
(2026-02-23). A reporter's archive contains both pre-v13 files (no
column) and post-v13 files (column present). The global DESCRIBE saw
the column in the newer files, so every merge step ran with
"* EXCLUDE (query_plan_text)" — including the steps that merged two
pre-v13 files, which fail with:

  Binder Error: Column "query_plan_text" in EXCLUDE list not found
  in FROM clause

Extract the schema detection into BuildSelectClause(table, paths) and
call it per merge-set instead of once globally — with the actual pair
in the pairwise path, and with all sources in the small-group path. A
pair that doesn't carry an exclude-column now merges with a plain "*".

Verified against DuckDB CLI v1.5.2: DESCRIBE of an [old, old] pair
correctly omits the column, and "* EXCLUDE (query_plan_text)" on that
pair reproduces the reporter's exact Binder Error. Cost is one extra
DESCRIBE per merge step — parquet footer reads, not data.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 Lite/Services/ArchiveService.cs | 51 ++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 20 deletions(-)
diff --git a/Lite/Services/ArchiveService.cs b/Lite/Services/ArchiveService.cs
index aa44a3c..13bd742 100644
--- a/Lite/Services/ArchiveService.cs
+++ b/Lite/Services/ArchiveService.cs
@@ -204,6 +204,35 @@ private static async Task ExportToParquet(DuckDBConnection connection, string ta
         ["query_store_stats"] = ["query_plan_text"]
     };
 
+    /* Build the SELECT clause for a compaction COPY, excluding only the
+       CompactionExcludeColumns actually present in THIS set of files.
+       Detection must be per-merge-set, not global: archive files predating a
+       schema change lack the column, so a globally-computed "* EXCLUDE (col)"
+       fails the binder on a pair where neither file has it. query_plan_text
+       was added to query_store_stats in migration v13 (2026-02-23), so a
+       reporter's pre-v13 archives don't carry it. (#933) */
+    private static string BuildSelectClause(string table, IReadOnlyList<string> paths)
+    {
+        if (!CompactionExcludeColumns.TryGetValue(table, out var excludeCols))
+        {
+            return "*";
+        }
+
+        using var schemaCon = new DuckDBConnection("DataSource=:memory:");
+        schemaCon.Open();
+        var pathList = string.Join(", ", paths.Select(p => $"'{EscapeSqlPath(p)}'"));
+        using var schemaCmd = schemaCon.CreateCommand();
+        schemaCmd.CommandText = $"SELECT column_name FROM (DESCRIBE SELECT * FROM read_parquet([{pathList}], union_by_name=true))";
+        using var reader = schemaCmd.ExecuteReader();
+        var existingCols = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
+        while (reader.Read()) existingCols.Add(reader.GetString(0));
+
+        var colsToExclude = excludeCols.Where(c => existingCols.Contains(c)).ToArray();
+        return colsToExclude.Length > 0
+            ? $"* EXCLUDE ({string.Join(", ", colsToExclude)})"
+            : "*";
+    }
+
     /// <summary>
     /// Compacts all per-cycle parquet files into monthly files (YYYYMM_tablename.parquet).
     /// This keeps the archive directory small (~75 files for 3 months of 25 tables)
@@ -371,26 +400,6 @@ parquet files already live on. */
                     .Select(f => Path.Combine(_archivePath, f).Replace("\\", "/"))
                     .ToList();
 
-                /* Determine column exclusions up front using all source files */
-                var selectClause = "*";
-                if (CompactionExcludeColumns.TryGetValue(table, out var excludeCols))
-                {
-                    using var schemaCon = new DuckDBConnection("DataSource=:memory:");
-                    schemaCon.Open();
-                    var allPathList = string.Join(", ", sourcePaths.Select(p => $"'{EscapeSqlPath(p)}'"));
-                    using var schemaCmd = schemaCon.CreateCommand();
-                    schemaCmd.CommandText = $"SELECT column_name FROM (DESCRIBE SELECT * FROM read_parquet([{allPathList}], union_by_name=true))";
-                    using var reader = schemaCmd.ExecuteReader();
-                    var existingCols = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
-                    while (reader.Read()) existingCols.Add(reader.GetString(0));
-
-                    var colsToExclude = excludeCols.Where(c => existingCols.Contains(c)).ToArray();
-                    if (colsToExclude.Length > 0)
-                    {
-                        selectClause = $"* EXCLUDE ({string.Join(", ", colsToExclude)})";
-                    }
-                }
-
                 if (sourcePaths.Count <= 2)
                 {
                     /* Small group — single-pass merge.
@@ -416,6 +425,7 @@ guide recommendation of 50-60% of system RAM.
                         pragma.ExecuteNonQuery();
                     }
 
+                    var selectClause = BuildSelectClause(table, sourcePaths);
                     var pathList = string.Join(", ", sourcePaths.Select(p => $"'{EscapeSqlPath(p)}'"));
                     using var cmd = con.CreateCommand();
                     cmd.CommandText = $"COPY (SELECT {selectClause} FROM read_parquet([{pathList}], union_by_name=true)) " +
@@ -447,6 +457,7 @@ Sort smallest-first so early merges are cheap. */
                             pragma.ExecuteNonQuery();
                         }
 
+                        var selectClause = BuildSelectClause(table, new[] { currentPath, sorted[i] });
                         var pairList = $"'{EscapeSqlPath(currentPath)}', '{EscapeSqlPath(sorted[i])}'";
                         using var cmd = con.CreateCommand();
                         cmd.CommandText = $"COPY (SELECT {selectClause} FROM read_parquet([{pairList}], union_by_name=true)) " +

From fd6dd91cfbd7f760f46d73eadb3a8194cb01a789 Mon Sep 17 00:00:00 2001
From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com>
Date: Fri, 15 May 2026 10:47:49 -0400
Subject: [PATCH 2/2] =?UTF-8?q?Fix=20#933=20=E2=80=94=20cap=20main=20conne?=
 =?UTF-8?q?ction=20memory=5Flimit,=20raise=20transiently=20for=20COPY?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

#933's titled complaint is "Memory usage on client": Lite holds ~2.7-2.9 GB
after 10 minutes with 4 servers. The compaction OOMs everyone has been
chasing in this thread are a downstream symptom — by the time compaction
runs the app already holds 2.7 GB, leaving little headroom on the reporter's
16 GB / ~1.6 GB-free machine.

Root cause: the main DuckDB ConnectionString set no memory_limit, so the
buffer pool ran at the DuckDB default of 80% of system RAM (~12.8 GB on a
16 GB box). With archive parquet files accumulating on disk, every UI query
over an archive view caches pages and the buffer pool grows freely.

The fix has to navigate one wrinkle: parquet COPY in DuckDB v1.5.2 hits a
buffer-manager-bypass pre-reservation that needs ~2-4 GB headroom. Capping
the main connection at 1 GB statically would break ExportToParquet and the
two COPY paths in ArchiveAllAndResetAsync. So:

- ConnectionString: memory_limit=1GB (caps resting buffer pool — addresses
  the actual complaint by stopping the archive-page cache from growing
  unbounded).
- Around each parquet COPY on the main connection: SET memory_limit='4GB',
  run the COPY, SET back to '1GB'. Factored into a WithRaisedCopyMemoryLimit
  helper so the three call sites stay consistent (ExportToParquet, and the
  two COPYs in ArchiveAllAndResetAsync).
- Compaction connections (separate :memory: instances) keep their 4 GB cap
  from #952.

Verified against DuckDB CLI v1.5.2 with synthetic query_snapshots-shaped
data:
- COPY table→parquet at 256MB/512MB/1GB: OOMs (pre-reservation, matches the
  read_parquet→parquet path we saw in #952 testing).
- COPY table→parquet at 2GB/4GB: succeeds, peak RSS well under cap.
- INSERT (Appender) and SELECT (including GROUP BY across 11k rows) work
  fine at 256MB cap — confirms collectors and UI queries don't have the
  pre-reservation behavior and aren't affected by the resting cap.

Tradeoff: the resting cap forces buffer-pool eviction of cached archive
parquet pages. Long-range historical UI queries that re-scan many parquet
files will do more disk I/O. Live/recent-data queries against the hot DB
are unaffected (hot DB is small enough to fit in 1 GB easily).

Plus the per-merge-step BuildSelectClause from the previous commit fixes
the separate query_store_stats Binder Error on archives that span the
v13 schema change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 Lite/Database/DuckDbInitializer.cs | 13 ++++--
 Lite/Services/ArchiveService.cs    | 74 ++++++++++++++++++++++++++----
 2 files changed, 74 insertions(+), 13 deletions(-)

diff --git a/Lite/Database/DuckDbInitializer.cs b/Lite/Database/DuckDbInitializer.cs
index e767c6d..a839b1a 100644
--- a/Lite/Database/DuckDbInitializer.cs
+++ b/Lite/Database/DuckDbInitializer.cs
@@ -124,10 +124,17 @@ public DuckDbInitializer(string databasePath, ILogger<DuckDbInitializer>? logger
 
     /// <summary>
     /// Gets the connection string for the DuckDB database.
-    /// Disables automatic WAL checkpoints to prevent 2-3s stop-the-world stalls
-    /// during collector writes. Manual CHECKPOINT runs between collection cycles instead.
+    /// - checkpoint_threshold=1GB: disables automatic WAL checkpoints to prevent
+    ///   2-3s stop-the-world stalls during collector writes. Manual CHECKPOINT
+    ///   runs between collection cycles instead.
+    /// - memory_limit=1GB: caps the resting buffer pool so it doesn't grow
+    ///   unbounded as the archive directory fills with parquet files (the
+    ///   ".tmp dir caching" path is the actual driver of #933's titled
+    ///   complaint — uncapped, buffer pool grows toward 80% of system RAM).
+    ///   ArchiveService raises this temporarily for parquet COPY operations,
+    ///   which need more headroom due to a DuckDB pre-reservation behavior.
     /// </summary>
-    public string ConnectionString => $"Data Source={_databasePath};checkpoint_threshold=1GB";
+    public string ConnectionString => $"Data Source={_databasePath};memory_limit=1GB;checkpoint_threshold=1GB";
 
     /// <summary>
     /// Ensures the database exists and all tables are created.
diff --git a/Lite/Services/ArchiveService.cs b/Lite/Services/ArchiveService.cs
index 13bd742..ed1100e 100644
--- a/Lite/Services/ArchiveService.cs
+++ b/Lite/Services/ArchiveService.cs
@@ -187,17 +187,65 @@ private static async Task<long> GetRowCountBeforeCutoff(DuckDBConnection connect
 
     private static async Task ExportToParquet(DuckDBConnection connection, string table, string timeColumn, DateTime cutoff, string filePath)
     {
-        using var cmd = connection.CreateCommand();
-        cmd.CommandText = $@"
+        await WithRaisedCopyMemoryLimit(connection, async () =>
+        {
+            using var cmd = connection.CreateCommand();
+            cmd.CommandText = $@"
 COPY (
     SELECT * FROM {table} WHERE {timeColumn} < $1
 ) TO '{EscapeSqlPath(filePath)}' (FORMAT PARQUET, COMPRESSION ZSTD)";
-        cmd.Parameters.Add(new DuckDBParameter { Value = cutoff });
-        await cmd.ExecuteNonQueryAsync();
+            cmd.Parameters.Add(new DuckDBParameter { Value = cutoff });
+            await cmd.ExecuteNonQueryAsync();
+        });
     }
 
     private static string EscapeSqlPath(string path) => DuckDbInitializer.EscapeSqlPath(path);
 
+    /* Resting and COPY memory_limit values for the main DuckDB connection.
+       The resting value is also set in DuckDbInitializer.ConnectionString so
+       newly-opened connections start at the resting cap; the COPY value is
+       applied transiently around parquet COPY operations and restored after.
+       See WithRaisedCopyMemoryLimit and the comment block on ConnectionString. */
+    private const string MainConnectionRestingMemoryLimit = "1GB";
+    private const string MainConnectionCopyMemoryLimit = "4GB";
+
+    /// <summary>
+    /// Runs <paramref name="action"/> with the connection's memory_limit raised
+    /// to <see cref="MainConnectionCopyMemoryLimit"/>, restoring to
+    /// <see cref="MainConnectionRestingMemoryLimit"/> after. Use around parquet
+    /// COPY operations on the main connection — those hit a DuckDB
+    /// pre-reservation behavior that needs more headroom than the resting cap
+    /// (#933). memory_limit is instance-level; concurrent operations briefly
+    /// see the raised cap.
+    /// </summary>
+    private static async Task WithRaisedCopyMemoryLimit(DuckDBConnection connection, Func<Task> action)
+    {
+        using (var raiseCmd = connection.CreateCommand())
+        {
+            raiseCmd.CommandText = $"SET memory_limit = '{MainConnectionCopyMemoryLimit}'";
+            await raiseCmd.ExecuteNonQueryAsync();
+        }
+
+        try
+        {
+            await action();
+        }
+        finally
+        {
+            try
+            {
+                using var restoreCmd = connection.CreateCommand();
+                restoreCmd.CommandText = $"SET memory_limit = '{MainConnectionRestingMemoryLimit}'";
+                await restoreCmd.ExecuteNonQueryAsync();
+            }
+            catch
+            {
+                /* Best-effort restore. If this fails the connection is in a bad
+                   state and will be disposed by the caller's `using` shortly. */
+            }
+        }
+    }
+
     /* Columns to exclude during compaction — dead weight from legacy archives */
     private static readonly Dictionary<string, string[]> CompactionExcludeColumns = new()
     {
@@ -573,9 +621,12 @@ Archive views use glob (*_table.parquet) to pick up all files. */
                         var parquetPath = Path.Combine(_archivePath, $"{timestamp}_{table}.parquet")
                             .Replace("\\", "/");
 
-                        using var exportCmd = connection.CreateCommand();
-                        exportCmd.CommandText = $"COPY (SELECT * FROM {table}) TO '{EscapeSqlPath(parquetPath)}' (FORMAT PARQUET, COMPRESSION ZSTD)";
-                        await exportCmd.ExecuteNonQueryAsync();
+                        await WithRaisedCopyMemoryLimit(connection, async () =>
+                        {
+                            using var exportCmd = connection.CreateCommand();
+                            exportCmd.CommandText = $"COPY (SELECT * FROM {table}) TO '{EscapeSqlPath(parquetPath)}' (FORMAT PARQUET, COMPRESSION ZSTD)";
+                            await exportCmd.ExecuteNonQueryAsync();
+                        });
 
                         _logger?.LogInformation("Archived {Count} rows from {Table}", rowCount, table);
                     }
@@ -598,9 +649,12 @@ Archive views use glob (*_table.parquet) to pick up all files. */
                         if (rowCount == 0) continue;
 
                         var preservePath = Path.Combine(preserveDir, $"{table}.parquet").Replace("\\", "/");
-                        using var exportCmd = connection.CreateCommand();
-                        exportCmd.CommandText = $"COPY (SELECT * FROM {table}) TO '{EscapeSqlPath(preservePath)}' (FORMAT PARQUET)";
-                        await exportCmd.ExecuteNonQueryAsync();
+                        await WithRaisedCopyMemoryLimit(connection, async () =>
+                        {
+                            using var exportCmd = connection.CreateCommand();
+                            exportCmd.CommandText = $"COPY (SELECT * FROM {table}) TO '{EscapeSqlPath(preservePath)}' (FORMAT PARQUET)";
+                            await exportCmd.ExecuteNonQueryAsync();
+                        });
                         preservedFiles[table] = preservePath;
 
                         _logger?.LogInformation("Preserved {Count} rows from {Table} for restoration after reset", rowCount, table);