From 766a0cca1d24dae6c4ad6373a5d9fda09db2b85b Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Thu, 14 May 2026 09:40:29 +0200 Subject: [PATCH 1/2] =?UTF-8?q?Fix=20#933=20=E2=80=94=20detect=20compactio?= =?UTF-8?q?n=20exclude-columns=20per=20merge=20step?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CompactParquetFiles detected CompactionExcludeColumns once, globally, across the union schema of every source file in a group. It then applied that "* EXCLUDE (col)" clause to each pair in the pairwise merge. query_plan_text was added to query_store_stats in migration v13 (2026-02-23). A reporter's archive contains both pre-v13 files (no column) and post-v13 files (column present). The global DESCRIBE saw the column in the newer files, so every merge step ran with "* EXCLUDE (query_plan_text)" — including the steps that merged two pre-v13 files, which fail with: Binder Error: Column "query_plan_text" in EXCLUDE list not found in FROM clause Extract the schema detection into BuildSelectClause(table, paths) and call it per merge-set instead of once globally — with the actual pair in the pairwise path, and with all sources in the small-group path. A pair that doesn't carry an exclude-column now merges with a plain "*". Verified against DuckDB CLI v1.5.2: DESCRIBE of an [old, old] pair correctly omits the column, and "* EXCLUDE (query_plan_text)" on that pair reproduces the reporter's exact Binder Error. Cost is one extra DESCRIBE per merge step — parquet footer reads, not data. Co-Authored-By: Claude Opus 4.7 (1M context) --- Lite/Services/ArchiveService.cs | 51 ++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/Lite/Services/ArchiveService.cs b/Lite/Services/ArchiveService.cs index aa44a3c..13bd742 100644 --- a/Lite/Services/ArchiveService.cs +++ b/Lite/Services/ArchiveService.cs @@ -204,6 +204,35 @@ private static async Task ExportToParquet(DuckDBConnection connection, string ta ["query_store_stats"] = ["query_plan_text"] }; + /* Build the SELECT clause for a compaction COPY, excluding only the + CompactionExcludeColumns actually present in THIS set of files. + Detection must be per-merge-set, not global: archive files predating a + schema change lack the column, so a globally-computed "* EXCLUDE (col)" + fails the binder on a pair where neither file has it. query_plan_text + was added to query_store_stats in migration v13 (2026-02-23), so a + reporter's pre-v13 archives don't carry it. (#933) */ + private static string BuildSelectClause(string table, IReadOnlyList paths) + { + if (!CompactionExcludeColumns.TryGetValue(table, out var excludeCols)) + { + return "*"; + } + + using var schemaCon = new DuckDBConnection("DataSource=:memory:"); + schemaCon.Open(); + var pathList = string.Join(", ", paths.Select(p => $"'{EscapeSqlPath(p)}'")); + using var schemaCmd = schemaCon.CreateCommand(); + schemaCmd.CommandText = $"SELECT column_name FROM (DESCRIBE SELECT * FROM read_parquet([{pathList}], union_by_name=true))"; + using var reader = schemaCmd.ExecuteReader(); + var existingCols = new HashSet(StringComparer.OrdinalIgnoreCase); + while (reader.Read()) existingCols.Add(reader.GetString(0)); + + var colsToExclude = excludeCols.Where(c => existingCols.Contains(c)).ToArray(); + return colsToExclude.Length > 0 + ? $"* EXCLUDE ({string.Join(", ", colsToExclude)})" + : "*"; + } + /// /// Compacts all per-cycle parquet files into monthly files (YYYYMM_tablename.parquet). /// This keeps the archive directory small (~75 files for 3 months of 25 tables) @@ -371,26 +400,6 @@ parquet files already live on. */ .Select(f => Path.Combine(_archivePath, f).Replace("\\", "/")) .ToList(); - /* Determine column exclusions up front using all source files */ - var selectClause = "*"; - if (CompactionExcludeColumns.TryGetValue(table, out var excludeCols)) - { - using var schemaCon = new DuckDBConnection("DataSource=:memory:"); - schemaCon.Open(); - var allPathList = string.Join(", ", sourcePaths.Select(p => $"'{EscapeSqlPath(p)}'")); - using var schemaCmd = schemaCon.CreateCommand(); - schemaCmd.CommandText = $"SELECT column_name FROM (DESCRIBE SELECT * FROM read_parquet([{allPathList}], union_by_name=true))"; - using var reader = schemaCmd.ExecuteReader(); - var existingCols = new HashSet(StringComparer.OrdinalIgnoreCase); - while (reader.Read()) existingCols.Add(reader.GetString(0)); - - var colsToExclude = excludeCols.Where(c => existingCols.Contains(c)).ToArray(); - if (colsToExclude.Length > 0) - { - selectClause = $"* EXCLUDE ({string.Join(", ", colsToExclude)})"; - } - } - if (sourcePaths.Count <= 2) { /* Small group — single-pass merge. @@ -416,6 +425,7 @@ guide recommendation of 50-60% of system RAM. pragma.ExecuteNonQuery(); } + var selectClause = BuildSelectClause(table, sourcePaths); var pathList = string.Join(", ", sourcePaths.Select(p => $"'{EscapeSqlPath(p)}'")); using var cmd = con.CreateCommand(); cmd.CommandText = $"COPY (SELECT {selectClause} FROM read_parquet([{pathList}], union_by_name=true)) " + @@ -447,6 +457,7 @@ Sort smallest-first so early merges are cheap. */ pragma.ExecuteNonQuery(); } + var selectClause = BuildSelectClause(table, new[] { currentPath, sorted[i] }); var pairList = $"'{EscapeSqlPath(currentPath)}', '{EscapeSqlPath(sorted[i])}'"; using var cmd = con.CreateCommand(); cmd.CommandText = $"COPY (SELECT {selectClause} FROM read_parquet([{pairList}], union_by_name=true)) " + From fd6dd91cfbd7f760f46d73eadb3a8194cb01a789 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Fri, 15 May 2026 10:47:49 -0400 Subject: [PATCH 2/2] =?UTF-8?q?Fix=20#933=20=E2=80=94=20cap=20main=20conne?= =?UTF-8?q?ction=20memory=5Flimit,=20raise=20transiently=20for=20COPY?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #933's titled complaint is "Memory usage on client": Lite holds ~2.7-2.9 GB after 10 minutes with 4 servers. The compaction OOMs everyone has been chasing in this thread are a downstream symptom — by the time compaction runs the app already holds 2.7 GB, leaving little headroom on the reporter's 16 GB / ~1.6 GB-free machine. Root cause: the main DuckDB ConnectionString set no memory_limit, so the buffer pool ran at the DuckDB default of 80% of system RAM (~12.8 GB on a 16 GB box). With archive parquet files accumulating on disk, every UI query over an archive view caches pages and the buffer pool grows freely. The fix has to navigate one wrinkle: parquet COPY in DuckDB v1.5.2 hits a buffer-manager-bypass pre-reservation that needs ~2-4 GB headroom. Capping the main connection at 1 GB statically would break ExportToParquet and the two COPY paths in ArchiveAllAndResetAsync. So: - ConnectionString: memory_limit=1GB (caps resting buffer pool — addresses the actual complaint by stopping the archive-page cache from growing unbounded). - Around each parquet COPY on the main connection: SET memory_limit='4GB', run the COPY, SET back to '1GB'. Factored into a WithRaisedCopyMemoryLimit helper so the three call sites stay consistent (ExportToParquet, and the two COPYs in ArchiveAllAndResetAsync). - Compaction connections (separate :memory: instances) keep their 4 GB cap from #952. Verified against DuckDB CLI v1.5.2 with synthetic query_snapshots-shaped data: - COPY table→parquet at 256MB/512MB/1GB: OOMs (pre-reservation, matches the read_parquet→parquet path we saw in #952 testing). - COPY table→parquet at 2GB/4GB: succeeds, peak RSS well under cap. - INSERT (Appender) and SELECT (including GROUP BY across 11k rows) work fine at 256MB cap — confirms collectors and UI queries don't have the pre-reservation behavior and aren't affected by the resting cap. Tradeoff: the resting cap forces buffer-pool eviction of cached archive parquet pages. Long-range historical UI queries that re-scan many parquet files will do more disk I/O. Live/recent-data queries against the hot DB are unaffected (hot DB is small enough to fit in 1 GB easily). Plus the per-merge-step BuildSelectClause from the previous commit fixes the separate query_store_stats Binder Error on archives that span the v13 schema change. Co-Authored-By: Claude Opus 4.7 (1M context) --- Lite/Database/DuckDbInitializer.cs | 13 ++++-- Lite/Services/ArchiveService.cs | 74 ++++++++++++++++++++++++++---- 2 files changed, 74 insertions(+), 13 deletions(-) diff --git a/Lite/Database/DuckDbInitializer.cs b/Lite/Database/DuckDbInitializer.cs index e767c6d..a839b1a 100644 --- a/Lite/Database/DuckDbInitializer.cs +++ b/Lite/Database/DuckDbInitializer.cs @@ -124,10 +124,17 @@ public DuckDbInitializer(string databasePath, ILogger? logger /// /// Gets the connection string for the DuckDB database. - /// Disables automatic WAL checkpoints to prevent 2-3s stop-the-world stalls - /// during collector writes. Manual CHECKPOINT runs between collection cycles instead. + /// - checkpoint_threshold=1GB: disables automatic WAL checkpoints to prevent + /// 2-3s stop-the-world stalls during collector writes. Manual CHECKPOINT + /// runs between collection cycles instead. + /// - memory_limit=1GB: caps the resting buffer pool so it doesn't grow + /// unbounded as the archive directory fills with parquet files (the + /// ".tmp dir caching" path is the actual driver of #933's titled + /// complaint — uncapped, buffer pool grows toward 80% of system RAM). + /// ArchiveService raises this temporarily for parquet COPY operations, + /// which need more headroom due to a DuckDB pre-reservation behavior. /// - public string ConnectionString => $"Data Source={_databasePath};checkpoint_threshold=1GB"; + public string ConnectionString => $"Data Source={_databasePath};memory_limit=1GB;checkpoint_threshold=1GB"; /// /// Ensures the database exists and all tables are created. diff --git a/Lite/Services/ArchiveService.cs b/Lite/Services/ArchiveService.cs index 13bd742..ed1100e 100644 --- a/Lite/Services/ArchiveService.cs +++ b/Lite/Services/ArchiveService.cs @@ -187,17 +187,65 @@ private static async Task GetRowCountBeforeCutoff(DuckDBConnection connect private static async Task ExportToParquet(DuckDBConnection connection, string table, string timeColumn, DateTime cutoff, string filePath) { - using var cmd = connection.CreateCommand(); - cmd.CommandText = $@" + await WithRaisedCopyMemoryLimit(connection, async () => + { + using var cmd = connection.CreateCommand(); + cmd.CommandText = $@" COPY ( SELECT * FROM {table} WHERE {timeColumn} < $1 ) TO '{EscapeSqlPath(filePath)}' (FORMAT PARQUET, COMPRESSION ZSTD)"; - cmd.Parameters.Add(new DuckDBParameter { Value = cutoff }); - await cmd.ExecuteNonQueryAsync(); + cmd.Parameters.Add(new DuckDBParameter { Value = cutoff }); + await cmd.ExecuteNonQueryAsync(); + }); } private static string EscapeSqlPath(string path) => DuckDbInitializer.EscapeSqlPath(path); + /* Resting and COPY memory_limit values for the main DuckDB connection. + The resting value is also set in DuckDbInitializer.ConnectionString so + newly-opened connections start at the resting cap; the COPY value is + applied transiently around parquet COPY operations and restored after. + See WithRaisedCopyMemoryLimit and the comment block on ConnectionString. */ + private const string MainConnectionRestingMemoryLimit = "1GB"; + private const string MainConnectionCopyMemoryLimit = "4GB"; + + /// + /// Runs with the connection's memory_limit raised + /// to , restoring to + /// after. Use around parquet + /// COPY operations on the main connection — those hit a DuckDB + /// pre-reservation behavior that needs more headroom than the resting cap + /// (#933). memory_limit is instance-level; concurrent operations briefly + /// see the raised cap. + /// + private static async Task WithRaisedCopyMemoryLimit(DuckDBConnection connection, Func action) + { + using (var raiseCmd = connection.CreateCommand()) + { + raiseCmd.CommandText = $"SET memory_limit = '{MainConnectionCopyMemoryLimit}'"; + await raiseCmd.ExecuteNonQueryAsync(); + } + + try + { + await action(); + } + finally + { + try + { + using var restoreCmd = connection.CreateCommand(); + restoreCmd.CommandText = $"SET memory_limit = '{MainConnectionRestingMemoryLimit}'"; + await restoreCmd.ExecuteNonQueryAsync(); + } + catch + { + /* Best-effort restore. If this fails the connection is in a bad + state and will be disposed by the caller's `using` shortly. */ + } + } + } + /* Columns to exclude during compaction — dead weight from legacy archives */ private static readonly Dictionary CompactionExcludeColumns = new() { @@ -573,9 +621,12 @@ Archive views use glob (*_table.parquet) to pick up all files. */ var parquetPath = Path.Combine(_archivePath, $"{timestamp}_{table}.parquet") .Replace("\\", "/"); - using var exportCmd = connection.CreateCommand(); - exportCmd.CommandText = $"COPY (SELECT * FROM {table}) TO '{EscapeSqlPath(parquetPath)}' (FORMAT PARQUET, COMPRESSION ZSTD)"; - await exportCmd.ExecuteNonQueryAsync(); + await WithRaisedCopyMemoryLimit(connection, async () => + { + using var exportCmd = connection.CreateCommand(); + exportCmd.CommandText = $"COPY (SELECT * FROM {table}) TO '{EscapeSqlPath(parquetPath)}' (FORMAT PARQUET, COMPRESSION ZSTD)"; + await exportCmd.ExecuteNonQueryAsync(); + }); _logger?.LogInformation("Archived {Count} rows from {Table}", rowCount, table); } @@ -598,9 +649,12 @@ Archive views use glob (*_table.parquet) to pick up all files. */ if (rowCount == 0) continue; var preservePath = Path.Combine(preserveDir, $"{table}.parquet").Replace("\\", "/"); - using var exportCmd = connection.CreateCommand(); - exportCmd.CommandText = $"COPY (SELECT * FROM {table}) TO '{EscapeSqlPath(preservePath)}' (FORMAT PARQUET)"; - await exportCmd.ExecuteNonQueryAsync(); + await WithRaisedCopyMemoryLimit(connection, async () => + { + using var exportCmd = connection.CreateCommand(); + exportCmd.CommandText = $"COPY (SELECT * FROM {table}) TO '{EscapeSqlPath(preservePath)}' (FORMAT PARQUET)"; + await exportCmd.ExecuteNonQueryAsync(); + }); preservedFiles[table] = preservePath; _logger?.LogInformation("Preserved {Count} rows from {Table} for restoration after reset", rowCount, table);