From 54df56ce8c00e77fdef647136a7526225f4e799a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 28 Mar 2026 15:39:46 -0600 Subject: [PATCH 1/3] perf(db): add NativeDatabase napi-rs class for rusqlite connection lifecycle (6.13) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foundation for moving all DB operations to rusqlite on the native engine path. Creates a persistent rusqlite::Connection holder exposed to JS, handling schema migrations and build metadata KV — eliminating redundant per-call connection open/close in the native build pipeline. --- crates/codegraph-core/src/lib.rs | 1 + crates/codegraph-core/src/native_db.rs | 564 ++++++++++++++++++++ src/db/migrations.ts | 2 + src/domain/graph/builder/context.ts | 2 + src/domain/graph/builder/pipeline.ts | 39 +- src/domain/graph/builder/stages/finalize.ts | 49 +- src/types.ts | 16 + 7 files changed, 657 insertions(+), 16 deletions(-) create mode 100644 crates/codegraph-core/src/native_db.rs diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index 00da9ec7..21f8fe68 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -10,6 +10,7 @@ pub mod extractors; pub mod import_resolution; pub mod incremental; pub mod insert_nodes; +pub mod native_db; pub mod parallel; pub mod parser_registry; pub mod roles_db; diff --git a/crates/codegraph-core/src/native_db.rs b/crates/codegraph-core/src/native_db.rs new file mode 100644 index 00000000..049fff0a --- /dev/null +++ b/crates/codegraph-core/src/native_db.rs @@ -0,0 +1,564 @@ +//! NativeDatabase — persistent rusqlite Connection exposed as a napi-rs class. +//! +//! Phase 6.13: foundation for moving all DB operations to rusqlite on the native +//! engine path. Handles lifecycle (open/close), schema migrations, and build +//! metadata KV operations. +//! +//! IMPORTANT: Migration DDL is mirrored from src/db/migrations.ts. +//! Any changes there MUST be reflected here (and vice-versa). + +use napi_derive::napi; +use rusqlite::{params, Connection, OpenFlags}; +use send_wrapper::SendWrapper; + +// ── Migration DDL (mirrored from src/db/migrations.ts) ────────────────── + +struct Migration { + version: u32, + up: &'static str, +} + +const MIGRATIONS: &[Migration] = &[ + Migration { + version: 1, + up: r#" + CREATE TABLE IF NOT EXISTS nodes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + kind TEXT NOT NULL, + file TEXT NOT NULL, + line INTEGER, + end_line INTEGER, + UNIQUE(name, kind, file, line) + ); + CREATE TABLE IF NOT EXISTS edges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_id INTEGER NOT NULL, + target_id INTEGER NOT NULL, + kind TEXT NOT NULL, + confidence REAL DEFAULT 1.0, + dynamic INTEGER DEFAULT 0, + FOREIGN KEY(source_id) REFERENCES nodes(id), + FOREIGN KEY(target_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(name); + CREATE INDEX IF NOT EXISTS idx_nodes_file ON nodes(file); + CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind); + CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id); + CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id); + CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind); + CREATE TABLE IF NOT EXISTS node_metrics ( + node_id INTEGER PRIMARY KEY, + line_count INTEGER, + symbol_count INTEGER, + import_count INTEGER, + export_count INTEGER, + fan_in INTEGER, + fan_out INTEGER, + cohesion REAL, + file_count INTEGER, + FOREIGN KEY(node_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_node_metrics_node ON node_metrics(node_id); + "#, + }, + Migration { + version: 2, + up: r#" + CREATE INDEX IF NOT EXISTS idx_nodes_name_kind_file ON nodes(name, kind, file); + CREATE INDEX IF NOT EXISTS idx_nodes_file_kind ON nodes(file, kind); + CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_id, kind); + CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_id, kind); + "#, + }, + Migration { + version: 3, + up: r#" + CREATE TABLE IF NOT EXISTS file_hashes ( + file TEXT PRIMARY KEY, + hash TEXT NOT NULL, + mtime INTEGER NOT NULL + ); + "#, + }, + Migration { + version: 4, + up: "ALTER TABLE file_hashes ADD COLUMN size INTEGER DEFAULT 0;", + }, + Migration { + version: 5, + up: r#" + CREATE TABLE IF NOT EXISTS co_changes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file_a TEXT NOT NULL, + file_b TEXT NOT NULL, + commit_count INTEGER NOT NULL, + jaccard REAL NOT NULL, + last_commit_epoch INTEGER, + UNIQUE(file_a, file_b) + ); + CREATE INDEX IF NOT EXISTS idx_co_changes_file_a ON co_changes(file_a); + CREATE INDEX IF NOT EXISTS idx_co_changes_file_b ON co_changes(file_b); + CREATE INDEX IF NOT EXISTS idx_co_changes_jaccard ON co_changes(jaccard DESC); + CREATE TABLE IF NOT EXISTS co_change_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + "#, + }, + Migration { + version: 6, + up: r#" + CREATE TABLE IF NOT EXISTS file_commit_counts ( + file TEXT PRIMARY KEY, + commit_count INTEGER NOT NULL DEFAULT 0 + ); + "#, + }, + Migration { + version: 7, + up: r#" + CREATE TABLE IF NOT EXISTS build_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + "#, + }, + Migration { + version: 8, + up: r#" + CREATE TABLE IF NOT EXISTS function_complexity ( + node_id INTEGER PRIMARY KEY, + cognitive INTEGER NOT NULL, + cyclomatic INTEGER NOT NULL, + max_nesting INTEGER NOT NULL, + FOREIGN KEY(node_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_fc_cognitive ON function_complexity(cognitive DESC); + CREATE INDEX IF NOT EXISTS idx_fc_cyclomatic ON function_complexity(cyclomatic DESC); + "#, + }, + Migration { + version: 9, + up: r#" + ALTER TABLE function_complexity ADD COLUMN loc INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN sloc INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN comment_lines INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_n1 INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_n2 INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_big_n1 INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_big_n2 INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_vocabulary INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_length INTEGER DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_volume REAL DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_difficulty REAL DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_effort REAL DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN halstead_bugs REAL DEFAULT 0; + ALTER TABLE function_complexity ADD COLUMN maintainability_index REAL DEFAULT 0; + CREATE INDEX IF NOT EXISTS idx_fc_mi ON function_complexity(maintainability_index ASC); + "#, + }, + Migration { + version: 10, + up: r#" + CREATE TABLE IF NOT EXISTS dataflow ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_id INTEGER NOT NULL, + target_id INTEGER NOT NULL, + kind TEXT NOT NULL, + param_index INTEGER, + expression TEXT, + line INTEGER, + confidence REAL DEFAULT 1.0, + FOREIGN KEY(source_id) REFERENCES nodes(id), + FOREIGN KEY(target_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_dataflow_source ON dataflow(source_id); + CREATE INDEX IF NOT EXISTS idx_dataflow_target ON dataflow(target_id); + CREATE INDEX IF NOT EXISTS idx_dataflow_kind ON dataflow(kind); + CREATE INDEX IF NOT EXISTS idx_dataflow_source_kind ON dataflow(source_id, kind); + "#, + }, + Migration { + version: 11, + up: r#" + ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id); + CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id); + CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id); + "#, + }, + Migration { + version: 12, + up: r#" + CREATE TABLE IF NOT EXISTS cfg_blocks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + function_node_id INTEGER NOT NULL, + block_index INTEGER NOT NULL, + block_type TEXT NOT NULL, + start_line INTEGER, + end_line INTEGER, + label TEXT, + FOREIGN KEY(function_node_id) REFERENCES nodes(id), + UNIQUE(function_node_id, block_index) + ); + CREATE INDEX IF NOT EXISTS idx_cfg_blocks_fn ON cfg_blocks(function_node_id); + + CREATE TABLE IF NOT EXISTS cfg_edges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + function_node_id INTEGER NOT NULL, + source_block_id INTEGER NOT NULL, + target_block_id INTEGER NOT NULL, + kind TEXT NOT NULL, + FOREIGN KEY(function_node_id) REFERENCES nodes(id), + FOREIGN KEY(source_block_id) REFERENCES cfg_blocks(id), + FOREIGN KEY(target_block_id) REFERENCES cfg_blocks(id) + ); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_fn ON cfg_edges(function_node_id); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_src ON cfg_edges(source_block_id); + CREATE INDEX IF NOT EXISTS idx_cfg_edges_tgt ON cfg_edges(target_block_id); + "#, + }, + Migration { + version: 13, + up: r#" + CREATE TABLE IF NOT EXISTS ast_nodes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file TEXT NOT NULL, + line INTEGER NOT NULL, + kind TEXT NOT NULL, + name TEXT NOT NULL, + text TEXT, + receiver TEXT, + parent_node_id INTEGER, + FOREIGN KEY(parent_node_id) REFERENCES nodes(id) + ); + CREATE INDEX IF NOT EXISTS idx_ast_kind ON ast_nodes(kind); + CREATE INDEX IF NOT EXISTS idx_ast_name ON ast_nodes(name); + CREATE INDEX IF NOT EXISTS idx_ast_file ON ast_nodes(file); + CREATE INDEX IF NOT EXISTS idx_ast_parent ON ast_nodes(parent_node_id); + CREATE INDEX IF NOT EXISTS idx_ast_kind_name ON ast_nodes(kind, name); + "#, + }, + Migration { + version: 14, + up: r#" + ALTER TABLE nodes ADD COLUMN exported INTEGER DEFAULT 0; + CREATE INDEX IF NOT EXISTS idx_nodes_exported ON nodes(exported); + "#, + }, + Migration { + version: 15, + up: r#" + ALTER TABLE nodes ADD COLUMN qualified_name TEXT; + ALTER TABLE nodes ADD COLUMN scope TEXT; + ALTER TABLE nodes ADD COLUMN visibility TEXT; + UPDATE nodes SET qualified_name = name WHERE qualified_name IS NULL; + CREATE INDEX IF NOT EXISTS idx_nodes_qualified_name ON nodes(qualified_name); + CREATE INDEX IF NOT EXISTS idx_nodes_scope ON nodes(scope); + "#, + }, + Migration { + version: 16, + up: r#" + CREATE INDEX IF NOT EXISTS idx_edges_kind_target ON edges(kind, target_id); + CREATE INDEX IF NOT EXISTS idx_edges_kind_source ON edges(kind, source_id); + "#, + }, +]; + +// ── napi types ────────────────────────────────────────────────────────── + +/// A key-value entry for build metadata. +#[napi(object)] +#[derive(Debug, Clone)] +pub struct BuildMetaEntry { + pub key: String, + pub value: String, +} + +// ── NativeDatabase class ──────────────────────────────────────────────── + +/// Persistent rusqlite Connection wrapper exposed to JS via napi-rs. +/// +/// Holds a single `rusqlite::Connection` for the lifetime of a build pipeline. +/// Replaces `better-sqlite3` for schema initialization and build metadata on +/// the native engine path. +#[napi] +pub struct NativeDatabase { + conn: SendWrapper>, + db_path: String, +} + +#[napi] +impl NativeDatabase { + /// Open a read-write connection to the database at `db_path`. + /// Creates the file and parent directories if they don't exist. + #[napi(factory)] + pub fn open_read_write(db_path: String) -> napi::Result { + let flags = OpenFlags::SQLITE_OPEN_READ_WRITE + | OpenFlags::SQLITE_OPEN_CREATE + | OpenFlags::SQLITE_OPEN_NO_MUTEX; + let conn = Connection::open_with_flags(&db_path, flags) + .map_err(|e| napi::Error::from_reason(format!("Failed to open DB: {e}")))?; + conn.execute_batch( + "PRAGMA journal_mode = WAL; PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000;", + ) + .map_err(|e| napi::Error::from_reason(format!("Failed to set pragmas: {e}")))?; + Ok(Self { + conn: SendWrapper::new(Some(conn)), + db_path, + }) + } + + /// Open a read-only connection to the database at `db_path`. + #[napi(factory)] + pub fn open_readonly(db_path: String) -> napi::Result { + let flags = OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_NO_MUTEX; + let conn = Connection::open_with_flags(&db_path, flags) + .map_err(|e| napi::Error::from_reason(format!("Failed to open DB readonly: {e}")))?; + conn.execute_batch("PRAGMA busy_timeout = 5000;") + .map_err(|e| napi::Error::from_reason(format!("Failed to set pragmas: {e}")))?; + Ok(Self { + conn: SendWrapper::new(Some(conn)), + db_path, + }) + } + + /// Close the database connection. Idempotent — safe to call multiple times. + #[napi] + pub fn close(&mut self) { + self.conn.take(); + } + + /// The path this database was opened with. + #[napi(getter)] + pub fn db_path(&self) -> String { + self.db_path.clone() + } + + /// Whether the connection is still open. + #[napi(getter)] + pub fn is_open(&self) -> bool { + self.conn.is_some() + } + + /// Execute one or more SQL statements (no result returned). + #[napi] + pub fn exec(&self, sql: String) -> napi::Result<()> { + let conn = self.conn()?; + conn.execute_batch(&sql) + .map_err(|e| napi::Error::from_reason(format!("exec failed: {e}"))) + } + + /// Execute a PRAGMA statement and return the first result as a string. + /// Returns `null` if the pragma produces no output. + #[napi] + pub fn pragma(&self, sql: String) -> napi::Result> { + let conn = self.conn()?; + let query = format!("PRAGMA {sql}"); + let mut stmt = conn + .prepare(&query) + .map_err(|e| napi::Error::from_reason(format!("pragma prepare failed: {e}")))?; + let mut rows = stmt + .query([]) + .map_err(|e| napi::Error::from_reason(format!("pragma query failed: {e}")))?; + match rows.next() { + Ok(Some(row)) => { + let val: String = row + .get(0) + .map_err(|e| napi::Error::from_reason(format!("pragma get failed: {e}")))?; + Ok(Some(val)) + } + Ok(None) => Ok(None), + Err(e) => Err(napi::Error::from_reason(format!("pragma next failed: {e}"))), + } + } + + /// Run all schema migrations. Mirrors `initSchema()` from `src/db/migrations.ts`. + #[napi] + pub fn init_schema(&self) -> napi::Result<()> { + let conn = self.conn()?; + + conn.execute_batch( + "CREATE TABLE IF NOT EXISTS schema_version (version INTEGER NOT NULL DEFAULT 0)", + ) + .map_err(|e| napi::Error::from_reason(format!("create schema_version failed: {e}")))?; + + let mut current_version: u32 = conn + .query_row("SELECT version FROM schema_version", [], |row| row.get(0)) + .unwrap_or(0); + + // Insert version 0 if table was just created (empty) + let count: u32 = conn + .query_row("SELECT COUNT(*) FROM schema_version", [], |row| row.get(0)) + .unwrap_or(0); + if count == 0 { + conn.execute("INSERT INTO schema_version (version) VALUES (0)", []) + .map_err(|e| { + napi::Error::from_reason(format!("insert schema_version failed: {e}")) + })?; + } + + for migration in MIGRATIONS { + if migration.version > current_version { + conn.execute_batch(migration.up).map_err(|e| { + napi::Error::from_reason(format!( + "migration v{} failed: {e}", + migration.version + )) + })?; + conn.execute( + "UPDATE schema_version SET version = ?1", + params![migration.version], + ) + .map_err(|e| { + napi::Error::from_reason(format!("update schema_version failed: {e}")) + })?; + current_version = migration.version; + } + } + + // Legacy column compat — add columns that may be missing from pre-migration DBs. + // Mirrors the post-migration block in src/db/migrations.ts initSchema(). + if has_table(conn, "nodes") { + if !has_column(conn, "nodes", "end_line") { + let _ = conn.execute_batch("ALTER TABLE nodes ADD COLUMN end_line INTEGER"); + } + if !has_column(conn, "nodes", "role") { + let _ = conn.execute_batch("ALTER TABLE nodes ADD COLUMN role TEXT"); + } + let _ = conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)"); + if !has_column(conn, "nodes", "parent_id") { + let _ = conn.execute_batch( + "ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id)", + ); + } + let _ = conn + .execute_batch("CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)"); + let _ = conn.execute_batch( + "CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)", + ); + if !has_column(conn, "nodes", "qualified_name") { + let _ = conn.execute_batch("ALTER TABLE nodes ADD COLUMN qualified_name TEXT"); + } + if !has_column(conn, "nodes", "scope") { + let _ = conn.execute_batch("ALTER TABLE nodes ADD COLUMN scope TEXT"); + } + if !has_column(conn, "nodes", "visibility") { + let _ = conn.execute_batch("ALTER TABLE nodes ADD COLUMN visibility TEXT"); + } + let _ = conn.execute_batch( + "UPDATE nodes SET qualified_name = name WHERE qualified_name IS NULL", + ); + let _ = conn.execute_batch( + "CREATE INDEX IF NOT EXISTS idx_nodes_qualified_name ON nodes(qualified_name)", + ); + let _ = + conn.execute_batch("CREATE INDEX IF NOT EXISTS idx_nodes_scope ON nodes(scope)"); + } + if has_table(conn, "edges") { + if !has_column(conn, "edges", "confidence") { + let _ = + conn.execute_batch("ALTER TABLE edges ADD COLUMN confidence REAL DEFAULT 1.0"); + } + if !has_column(conn, "edges", "dynamic") { + let _ = + conn.execute_batch("ALTER TABLE edges ADD COLUMN dynamic INTEGER DEFAULT 0"); + } + } + + Ok(()) + } + + /// Retrieve a single build metadata value by key. Returns `null` if missing. + #[napi] + pub fn get_build_meta(&self, key: String) -> napi::Result> { + let conn = self.conn()?; + + if !has_table(conn, "build_meta") { + return Ok(None); + } + + let result = conn.query_row( + "SELECT value FROM build_meta WHERE key = ?1", + params![key], + |row| row.get::<_, String>(0), + ); + match result { + Ok(val) => Ok(Some(val)), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), + Err(e) => Err(napi::Error::from_reason(format!( + "getBuildMeta failed for key \"{key}\": {e}" + ))), + } + } + + /// Upsert multiple build metadata entries in a single transaction. + #[napi] + pub fn set_build_meta(&self, entries: Vec) -> napi::Result<()> { + let conn = self.conn()?; + + // Ensure build_meta table exists (may be called before full migration on edge cases) + conn.execute_batch( + "CREATE TABLE IF NOT EXISTS build_meta (key TEXT PRIMARY KEY, value TEXT NOT NULL)", + ) + .map_err(|e| napi::Error::from_reason(format!("ensure build_meta table failed: {e}")))?; + + let tx = conn + .unchecked_transaction() + .map_err(|e| napi::Error::from_reason(format!("begin transaction failed: {e}")))?; + { + let mut stmt = tx + .prepare_cached("INSERT OR REPLACE INTO build_meta (key, value) VALUES (?1, ?2)") + .map_err(|e| { + napi::Error::from_reason(format!("prepare setBuildMeta failed: {e}")) + })?; + for entry in &entries { + stmt.execute(params![entry.key, entry.value]).map_err(|e| { + napi::Error::from_reason(format!( + "setBuildMeta insert failed for \"{}\": {e}", + entry.key + )) + })?; + } + } + tx.commit() + .map_err(|e| napi::Error::from_reason(format!("commit setBuildMeta failed: {e}")))?; + Ok(()) + } +} + +// ── Private helpers ───────────────────────────────────────────────────── + +impl NativeDatabase { + /// Get a reference to the open connection, or error if closed. + fn conn(&self) -> napi::Result<&Connection> { + self.conn + .as_ref() + .ok_or_else(|| napi::Error::from_reason("NativeDatabase is closed")) + } +} + +/// Check if a table exists in the database. +fn has_table(conn: &Connection, table: &str) -> bool { + conn.query_row( + "SELECT 1 FROM sqlite_master WHERE type='table' AND name=?1", + params![table], + |_| Ok(()), + ) + .is_ok() +} + +/// Check if a column exists in a table. +fn has_column(conn: &Connection, table: &str, column: &str) -> bool { + // PRAGMA table_info returns rows with: cid, name, type, notnull, dflt_value, pk + let query = format!("PRAGMA table_info({table})"); + let result: Result, _> = conn.prepare(&query).and_then(|mut stmt| { + stmt.query_map([], |row| row.get::<_, String>(1)) + .map(|rows| rows.filter_map(|r| r.ok()).collect()) + }); + match result { + Ok(cols) => cols.iter().any(|c| c == column), + Err(_) => false, + } +} diff --git a/src/db/migrations.ts b/src/db/migrations.ts index b2fbecc9..03828b49 100644 --- a/src/db/migrations.ts +++ b/src/db/migrations.ts @@ -8,6 +8,8 @@ interface Migration { up: string; } +// IMPORTANT: Migration DDL is mirrored in crates/codegraph-core/src/native_db.rs. +// Any changes here MUST be reflected there (and vice-versa). export const MIGRATIONS: Migration[] = [ { version: 1, diff --git a/src/domain/graph/builder/context.ts b/src/domain/graph/builder/context.ts index db339175..06fcecea 100644 --- a/src/domain/graph/builder/context.ts +++ b/src/domain/graph/builder/context.ts @@ -12,6 +12,7 @@ import type { ExtractorOutput, FileToParse, MetadataUpdate, + NativeDatabase, NodeRow, ParseChange, PathAliases, @@ -31,6 +32,7 @@ export class PipelineContext { incremental!: boolean; forceFullRebuild: boolean = false; schemaVersion!: number; + nativeDb?: NativeDatabase; // ── File collection (set by collectFiles stage) ──────────────────── allFiles!: string[]; diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index 47f54ffc..4067b272 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -9,6 +9,7 @@ import { performance } from 'node:perf_hooks'; import { closeDb, getBuildMeta, initSchema, MIGRATIONS, openDb } from '../../../db/index.js'; import { detectWorkspaces, loadConfig } from '../../../infrastructure/config.js'; import { info, warn } from '../../../infrastructure/logger.js'; +import { loadNative } from '../../../infrastructure/native.js'; import { CODEGRAPH_VERSION } from '../../../shared/version.js'; import type { BuildGraphOpts, BuildResult } from '../../../types.js'; import { getActiveEngine } from '../../parser.js'; @@ -46,19 +47,23 @@ function checkEngineSchemaMismatch(ctx: PipelineContext): void { ctx.forceFullRebuild = false; if (!ctx.incremental) return; - const prevEngine = getBuildMeta(ctx.db, 'engine'); + // Route metadata reads through NativeDatabase when available (Phase 6.13) + const meta = (key: string): string | null => + ctx.nativeDb ? ctx.nativeDb.getBuildMeta(key) : getBuildMeta(ctx.db, key); + + const prevEngine = meta('engine'); if (prevEngine && prevEngine !== ctx.engineName) { info(`Engine changed (${prevEngine} → ${ctx.engineName}), promoting to full rebuild.`); ctx.forceFullRebuild = true; } - const prevSchema = getBuildMeta(ctx.db, 'schema_version'); + const prevSchema = meta('schema_version'); if (prevSchema && Number(prevSchema) !== ctx.schemaVersion) { info( `Schema version changed (${prevSchema} → ${ctx.schemaVersion}), promoting to full rebuild.`, ); ctx.forceFullRebuild = true; } - const prevVersion = getBuildMeta(ctx.db, 'codegraph_version'); + const prevVersion = meta('codegraph_version'); if (prevVersion && prevVersion !== CODEGRAPH_VERSION) { info( `Codegraph version changed (${prevVersion} → ${CODEGRAPH_VERSION}), promoting to full rebuild.`, @@ -91,7 +96,23 @@ function setupPipeline(ctx: PipelineContext): void { ctx.rootDir = path.resolve(ctx.rootDir); ctx.dbPath = path.join(ctx.rootDir, '.codegraph', 'graph.db'); ctx.db = openDb(ctx.dbPath); - initSchema(ctx.db); + + // Use NativeDatabase for schema init when native engine is available (Phase 6.13). + // better-sqlite3 (ctx.db) is still always opened — needed for queries and stages + // that haven't been migrated to rusqlite yet. + const native = loadNative(); + if (native?.NativeDatabase) { + try { + ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath); + ctx.nativeDb.initSchema(); + } catch (err) { + warn(`NativeDatabase init failed, falling back to JS: ${(err as Error).message}`); + ctx.nativeDb = undefined; + initSchema(ctx.db); + } + } else { + initSchema(ctx.db); + } ctx.config = loadConfig(ctx.rootDir); ctx.incremental = @@ -168,7 +189,15 @@ export async function buildGraph( setupPipeline(ctx); await runPipelineStages(ctx); } catch (err) { - if (!ctx.earlyExit && ctx.db) closeDb(ctx.db); + if (!ctx.earlyExit) { + if (ctx.nativeDb) + try { + ctx.nativeDb.close(); + } catch { + /* ignore */ + } + if (ctx.db) closeDb(ctx.db); + } throw err; } diff --git a/src/domain/graph/builder/stages/finalize.ts b/src/domain/graph/builder/stages/finalize.ts index 099f7642..763f9c96 100644 --- a/src/domain/graph/builder/stages/finalize.ts +++ b/src/domain/graph/builder/stages/finalize.ts @@ -43,8 +43,12 @@ export async function finalize(ctx: PipelineContext): Promise { // Incremental drift detection — skip for small incremental changes where // count fluctuation is expected (reverse-dep edge churn). if (!isFullBuild && allSymbols.size > 3) { - const prevNodes = getBuildMeta(db, 'node_count'); - const prevEdges = getBuildMeta(db, 'edge_count'); + const prevNodes = ctx.nativeDb + ? ctx.nativeDb.getBuildMeta('node_count') + : getBuildMeta(db, 'node_count'); + const prevEdges = ctx.nativeDb + ? ctx.nativeDb.getBuildMeta('edge_count') + : getBuildMeta(db, 'edge_count'); if (prevNodes && prevEdges) { const prevN = Number(prevNodes); const prevE = Number(prevEdges); @@ -71,15 +75,29 @@ export async function finalize(ctx: PipelineContext): Promise { // counts stay fresh whenever drift detection reads them. if (isFullBuild || allSymbols.size > 3) { try { - setBuildMeta(db, { - engine: ctx.engineName, - engine_version: ctx.engineVersion || '', - codegraph_version: CODEGRAPH_VERSION, - schema_version: String(schemaVersion), - built_at: buildNow.toISOString(), - node_count: nodeCount, - edge_count: actualEdgeCount, - }); + if (ctx.nativeDb) { + ctx.nativeDb.setBuildMeta( + Object.entries({ + engine: ctx.engineName, + engine_version: ctx.engineVersion || '', + codegraph_version: CODEGRAPH_VERSION, + schema_version: String(schemaVersion), + built_at: buildNow.toISOString(), + node_count: String(nodeCount), + edge_count: String(actualEdgeCount), + }).map(([key, value]) => ({ key, value: String(value) })), + ); + } else { + setBuildMeta(db, { + engine: ctx.engineName, + engine_version: ctx.engineVersion || '', + codegraph_version: CODEGRAPH_VERSION, + schema_version: String(schemaVersion), + built_at: buildNow.toISOString(), + node_count: nodeCount, + edge_count: actualEdgeCount, + }); + } } catch (err) { warn(`Failed to write build metadata: ${(err as Error).message}`); } @@ -165,6 +183,15 @@ export async function finalize(ctx: PipelineContext): Promise { // separately via timing.closeDbMs when available. ctx.timing.finalizeMs = performance.now() - t0; + // Close NativeDatabase before better-sqlite3 (Phase 6.13) + if (ctx.nativeDb) { + try { + ctx.nativeDb.close(); + } catch { + /* ignore */ + } + } + // For small incremental builds, defer db.close() to the next event loop tick. // The WAL checkpoint in db.close() costs ~250ms on Windows NTFS due to fsync. // Deferring lets buildGraph() return immediately; the checkpoint runs after. diff --git a/src/types.ts b/src/types.ts index 4939ff8a..51116293 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1879,6 +1879,10 @@ export interface NativeAddon { } | null; engineVersion(): string; ParseTreeCache: new () => NativeParseTreeCache; + NativeDatabase: { + openReadWrite(dbPath: string): NativeDatabase; + openReadonly(dbPath: string): NativeDatabase; + }; } /** Native parse-tree cache instance. */ @@ -1889,6 +1893,18 @@ export interface NativeParseTreeCache { clear(): void; } +/** Native rusqlite database wrapper instance (Phase 6.13). */ +export interface NativeDatabase { + initSchema(): void; + getBuildMeta(key: string): string | null; + setBuildMeta(entries: Array<{ key: string; value: string }>): void; + exec(sql: string): void; + pragma(sql: string): string | null; + close(): void; + readonly dbPath: string; + readonly isOpen: boolean; +} + // ════════════════════════════════════════════════════════════════════════ // §14 CLI Command Framework // ════════════════════════════════════════════════════════════════════════ From f44a564e9946a6e3ac600e628474430ab842a831 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 28 Mar 2026 16:09:44 -0600 Subject: [PATCH 2/3] perf(db): migrate build pipeline writes to NativeDatabase persistent connection (6.15) Consolidate all build-pipeline write operations into NativeDatabase methods that reuse a single persistent rusqlite connection, eliminating the per-call connection open/close overhead from the standalone functions. Rust: Refactor standalone modules (insert_nodes, edges_db, ast_db, roles_db) to expose pub(crate) functions accepting &Connection, then add wrapper methods on NativeDatabase: bulkInsertNodes, bulkInsertEdges, bulkInsertAstNodes, classifyRolesFull, classifyRolesIncremental, and purgeFilesData. Standalone #[napi] functions preserved for backward compatibility. TypeScript: Wire all build stages (insert-nodes, build-edges, build-structure, detect-changes, ast) to prefer ctx.nativeDb methods when available, falling back to standalone native functions then JS. Thread nativeDb through EngineOpts for analysis-phase AST insertion. --- crates/codegraph-core/src/ast_db.rs | 58 +++++---- crates/codegraph-core/src/edges_db.rs | 8 +- crates/codegraph-core/src/insert_nodes.rs | 10 +- crates/codegraph-core/src/native_db.rs | 115 ++++++++++++++++++ crates/codegraph-core/src/roles_db.rs | 18 +-- src/domain/graph/builder/pipeline.ts | 1 + .../graph/builder/stages/build-edges.ts | 15 ++- .../graph/builder/stages/build-structure.ts | 24 +++- .../graph/builder/stages/detect-changes.ts | 8 +- .../graph/builder/stages/insert-nodes.ts | 14 ++- src/features/ast.ts | 27 +++- src/types.ts | 77 +++++++++++- 12 files changed, 311 insertions(+), 64 deletions(-) diff --git a/crates/codegraph-core/src/ast_db.rs b/crates/codegraph-core/src/ast_db.rs index 4f317db1..b67b94fc 100644 --- a/crates/codegraph-core/src/ast_db.rs +++ b/crates/codegraph-core/src/ast_db.rs @@ -74,15 +74,26 @@ pub fn bulk_insert_ast_nodes(db_path: String, batches: Vec) -> u32 } let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; - let mut conn = match Connection::open_with_flags(&db_path, flags) { + let conn = match Connection::open_with_flags(&db_path, flags) { Ok(c) => c, Err(_) => return 0, }; // Match the JS-side performance pragmas (including busy_timeout for WAL contention) - let _ = conn.execute_batch( - "PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000", - ); + let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); + + do_insert_ast_nodes(&conn, &batches).unwrap_or(0) +} + +/// Internal implementation: insert AST nodes using an existing connection. +/// Used by both the standalone `bulk_insert_ast_nodes` function and `NativeDatabase`. +pub(crate) fn do_insert_ast_nodes( + conn: &Connection, + batches: &[FileAstBatch], +) -> rusqlite::Result { + if batches.is_empty() { + return Ok(0); + } // Bail out if the ast_nodes table doesn't exist (schema too old) let has_table: bool = conn @@ -90,19 +101,15 @@ pub fn bulk_insert_ast_nodes(db_path: String, batches: Vec) -> u32 .and_then(|mut s| s.query_row([], |_| Ok(true))) .unwrap_or(false); if !has_table { - return 0; + return Ok(0); } // ── Phase 1: Pre-fetch node definitions for parent resolution ──────── let mut file_defs: HashMap> = HashMap::new(); { - let Ok(mut stmt) = - conn.prepare("SELECT id, line, end_line FROM nodes WHERE file = ?1") - else { - return 0; - }; + let mut stmt = conn.prepare("SELECT id, line, end_line FROM nodes WHERE file = ?1")?; - for batch in &batches { + for batch in batches { if batch.nodes.is_empty() || file_defs.contains_key(&batch.file) { continue; } @@ -118,30 +125,26 @@ pub fn bulk_insert_ast_nodes(db_path: String, batches: Vec) -> u32 .unwrap_or_default(); file_defs.insert(batch.file.clone(), defs); } - } // `stmt` dropped — releases the immutable borrow on `conn` + } // ── Phase 2: Bulk insert in a single transaction ───────────────────── - let Ok(tx) = conn.transaction() else { - return 0; - }; + let tx = conn.unchecked_transaction()?; let mut total = 0u32; { - let Ok(mut insert_stmt) = tx.prepare( + let mut insert_stmt = tx.prepare( "INSERT INTO ast_nodes (file, line, kind, name, text, receiver, parent_node_id) \ VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", - ) else { - return 0; - }; + )?; - for batch in &batches { + for batch in batches { let empty = Vec::new(); let defs = file_defs.get(&batch.file).unwrap_or(&empty); for node in &batch.nodes { let parent_id = find_parent_id(defs, node.line); - match insert_stmt.execute(params![ + insert_stmt.execute(params![ &batch.file, node.line, &node.kind, @@ -149,17 +152,12 @@ pub fn bulk_insert_ast_nodes(db_path: String, batches: Vec) -> u32 &node.text, &node.receiver, parent_id, - ]) { - Ok(_) => total += 1, - Err(_) => return 0, // abort; tx rolls back on drop - } + ])?; + total += 1; } } - } // `insert_stmt` dropped - - if tx.commit().is_err() { - return 0; } - total + tx.commit()?; + Ok(total) } diff --git a/crates/codegraph-core/src/edges_db.rs b/crates/codegraph-core/src/edges_db.rs index 25f1ae51..1d4ba297 100644 --- a/crates/codegraph-core/src/edges_db.rs +++ b/crates/codegraph-core/src/edges_db.rs @@ -32,20 +32,20 @@ pub fn bulk_insert_edges(db_path: String, edges: Vec) -> bool { return true; } let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; - let mut conn = match Connection::open_with_flags(&db_path, flags) { + let conn = match Connection::open_with_flags(&db_path, flags) { Ok(c) => c, Err(_) => return false, }; let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); - do_insert(&mut conn, &edges).is_ok() + do_insert_edges(&conn, &edges).is_ok() } /// 199 rows × 5 params = 995 bind parameters per statement, safely under /// the legacy `SQLITE_MAX_VARIABLE_NUMBER` default of 999. const CHUNK: usize = 199; -fn do_insert(conn: &mut Connection, edges: &[EdgeRow]) -> rusqlite::Result<()> { - let tx = conn.transaction()?; +pub(crate) fn do_insert_edges(conn: &Connection, edges: &[EdgeRow]) -> rusqlite::Result<()> { + let tx = conn.unchecked_transaction()?; for chunk in edges.chunks(CHUNK) { let placeholders: Vec = (0..chunk.len()) diff --git a/crates/codegraph-core/src/insert_nodes.rs b/crates/codegraph-core/src/insert_nodes.rs index e49006b0..6891aba2 100644 --- a/crates/codegraph-core/src/insert_nodes.rs +++ b/crates/codegraph-core/src/insert_nodes.rs @@ -76,14 +76,14 @@ pub fn bulk_insert_nodes( removed_files: Vec, ) -> bool { let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; - let mut conn = match Connection::open_with_flags(&db_path, flags) { + let conn = match Connection::open_with_flags(&db_path, flags) { Ok(c) => c, Err(_) => return false, }; let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); - do_insert(&mut conn, &batches, &file_hashes, &removed_files).is_ok() + do_insert(&conn, &batches, &file_hashes, &removed_files).is_ok() } // ── Internal implementation ───────────────────────────────────────── @@ -108,13 +108,13 @@ fn query_node_ids( Ok(map) } -fn do_insert( - conn: &mut Connection, +pub(crate) fn do_insert( + conn: &Connection, batches: &[InsertNodesBatch], file_hashes: &[FileHashEntry], removed_files: &[String], ) -> rusqlite::Result<()> { - let tx = conn.transaction()?; + let tx = conn.unchecked_transaction()?; // ── Phase 1: Insert file nodes + definitions + export nodes ────── { diff --git a/crates/codegraph-core/src/native_db.rs b/crates/codegraph-core/src/native_db.rs index 049fff0a..fa8f3370 100644 --- a/crates/codegraph-core/src/native_db.rs +++ b/crates/codegraph-core/src/native_db.rs @@ -11,6 +11,11 @@ use napi_derive::napi; use rusqlite::{params, Connection, OpenFlags}; use send_wrapper::SendWrapper; +use crate::ast_db::{self, FileAstBatch}; +use crate::edges_db::{self, EdgeRow}; +use crate::insert_nodes::{self, FileHashEntry, InsertNodesBatch}; +use crate::roles_db::{self, RoleSummary}; + // ── Migration DDL (mirrored from src/db/migrations.ts) ────────────────── struct Migration { @@ -526,6 +531,116 @@ impl NativeDatabase { .map_err(|e| napi::Error::from_reason(format!("commit setBuildMeta failed: {e}")))?; Ok(()) } + + // ── Phase 6.15: Build pipeline write operations ───────────────────── + + /// Bulk-insert nodes, children, containment edges, exports, and file hashes. + /// Reuses the persistent connection instead of opening a new one. + /// Returns `true` on success, `false` on failure. + #[napi] + pub fn bulk_insert_nodes( + &self, + batches: Vec, + file_hashes: Vec, + removed_files: Vec, + ) -> napi::Result { + let conn = self.conn()?; + Ok(insert_nodes::do_insert(conn, &batches, &file_hashes, &removed_files).is_ok()) + } + + /// Bulk-insert edge rows using chunked multi-value INSERT statements. + /// Returns `true` on success, `false` on failure. + #[napi] + pub fn bulk_insert_edges(&self, edges: Vec) -> napi::Result { + if edges.is_empty() { + return Ok(true); + } + let conn = self.conn()?; + Ok(edges_db::do_insert_edges(conn, &edges).is_ok()) + } + + /// Bulk-insert AST nodes, resolving parent_node_id from the nodes table. + /// Returns the number of rows inserted (0 on failure). + #[napi] + pub fn bulk_insert_ast_nodes(&self, batches: Vec) -> napi::Result { + let conn = self.conn()?; + Ok(ast_db::do_insert_ast_nodes(conn, &batches).unwrap_or(0)) + } + + /// Full role classification: queries all nodes, computes fan-in/fan-out, + /// classifies roles, and batch-updates the `role` column. + #[napi] + pub fn classify_roles_full(&self) -> napi::Result> { + let conn = self.conn()?; + Ok(roles_db::do_classify_full(conn).ok()) + } + + /// Incremental role classification: only reclassifies nodes from changed + /// files plus their immediate edge neighbours. + #[napi] + pub fn classify_roles_incremental( + &self, + changed_files: Vec, + ) -> napi::Result> { + let conn = self.conn()?; + Ok(roles_db::do_classify_incremental(conn, &changed_files).ok()) + } + + /// Cascade-delete all graph data for the specified files across all tables. + /// Order: dependent tables first (embeddings, cfg, dataflow, complexity, + /// metrics, ast_nodes), then edges, then nodes, then optionally file_hashes. + #[napi] + pub fn purge_files_data( + &self, + files: Vec, + purge_hashes: Option, + ) -> napi::Result<()> { + if files.is_empty() { + return Ok(()); + } + let conn = self.conn()?; + let purge_hashes = purge_hashes.unwrap_or(true); + + let tx = conn + .unchecked_transaction() + .map_err(|e| napi::Error::from_reason(format!("purge transaction failed: {e}")))?; + + // Purge each file across all tables. Optional tables are silently + // skipped if they don't exist. Order: dependents → edges → nodes → hashes. + let purge_sql: &[(&str, bool)] = &[ + ("DELETE FROM embeddings WHERE node_id IN (SELECT id FROM nodes WHERE file = ?1)", false), + ("DELETE FROM cfg_edges WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?1)", false), + ("DELETE FROM cfg_blocks WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?1)", false), + ("DELETE FROM dataflow WHERE source_id IN (SELECT id FROM nodes WHERE file = ?1) OR target_id IN (SELECT id FROM nodes WHERE file = ?1)", false), + ("DELETE FROM function_complexity WHERE node_id IN (SELECT id FROM nodes WHERE file = ?1)", false), + ("DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?1)", false), + ("DELETE FROM ast_nodes WHERE file = ?1", false), + // Core tables — errors propagated + ("DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?1) OR target_id IN (SELECT id FROM nodes WHERE file = ?1)", true), + ("DELETE FROM nodes WHERE file = ?1", true), + ]; + + for file in &files { + for &(sql, required) in purge_sql { + match tx.execute(sql, params![file]) { + Ok(_) => {} + Err(e) if required => { + return Err(napi::Error::from_reason(format!( + "purge failed for \"{file}\": {e}" + ))); + } + Err(_) => {} // optional table missing — skip + } + } + if purge_hashes { + let _ = tx.execute("DELETE FROM file_hashes WHERE file = ?1", params![file]); + } + } + + tx.commit() + .map_err(|e| napi::Error::from_reason(format!("purge commit failed: {e}")))?; + Ok(()) + } } // ── Private helpers ───────────────────────────────────────────────────── diff --git a/crates/codegraph-core/src/roles_db.rs b/crates/codegraph-core/src/roles_db.rs index 784787d7..24f36e84 100644 --- a/crates/codegraph-core/src/roles_db.rs +++ b/crates/codegraph-core/src/roles_db.rs @@ -74,9 +74,9 @@ pub struct RoleSummary { #[napi] pub fn classify_roles_full(db_path: String) -> Option { let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; - let mut conn = Connection::open_with_flags(&db_path, flags).ok()?; + let conn = Connection::open_with_flags(&db_path, flags).ok()?; let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); - do_classify_full(&mut conn).ok() + do_classify_full(&conn).ok() } /// Incremental role classification: only reclassifies nodes from changed files @@ -88,9 +88,9 @@ pub fn classify_roles_incremental( changed_files: Vec, ) -> Option { let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; - let mut conn = Connection::open_with_flags(&db_path, flags).ok()?; + let conn = Connection::open_with_flags(&db_path, flags).ok()?; let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); - do_classify_incremental(&mut conn, &changed_files).ok() + do_classify_incremental(&conn, &changed_files).ok() } // ── Shared helpers ─────────────────────────────────────────────────── @@ -228,8 +228,8 @@ fn batch_update_roles( // ── Full classification ────────────────────────────────────────────── -fn do_classify_full(conn: &mut Connection) -> rusqlite::Result { - let tx = conn.transaction()?; +pub(crate) fn do_classify_full(conn: &Connection) -> rusqlite::Result { + let tx = conn.unchecked_transaction()?; let mut summary = RoleSummary::default(); // 1. Leaf kinds → dead-leaf (skip expensive fan-in/fan-out JOINs) @@ -351,11 +351,11 @@ fn do_classify_full(conn: &mut Connection) -> rusqlite::Result { // ── Incremental classification ─────────────────────────────────────── -fn do_classify_incremental( - conn: &mut Connection, +pub(crate) fn do_classify_incremental( + conn: &Connection, changed_files: &[String], ) -> rusqlite::Result { - let tx = conn.transaction()?; + let tx = conn.unchecked_transaction()?; let mut summary = RoleSummary::default(); // Build placeholders for changed files diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index 4067b272..f450c76e 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -34,6 +34,7 @@ function initializeEngine(ctx: PipelineContext): void { engine: ctx.opts.engine || 'auto', dataflow: ctx.opts.dataflow !== false, ast: ctx.opts.ast !== false, + nativeDb: ctx.nativeDb, }; const { name: engineName, version: engineVersion } = getActiveEngine(ctx.engineOpts); ctx.engineName = engineName as 'native' | 'wasm'; diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 2869ada5..8aafcb91 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -673,15 +673,17 @@ export async function buildEdges(ctx: PipelineContext): Promise { // When using native edge insert, skip JS insert here — do it after tx commits. // Otherwise insert edges within this transaction for atomicity. - if (!native?.bulkInsertEdges) { + const useNativeEdgeInsert = !!(ctx.nativeDb?.bulkInsertEdges || native?.bulkInsertEdges); + if (!useNativeEdgeInsert) { batchInsertEdges(db, allEdgeRows); } }); computeEdgesTx(); // Phase 2: Native rusqlite bulk insert (outside better-sqlite3 transaction - // since rusqlite opens its own connection — avoids SQLITE_BUSY contention) - if (native?.bulkInsertEdges && allEdgeRows.length > 0) { + // to avoid SQLITE_BUSY contention). Prefer NativeDatabase persistent + // connection (6.15), fall back to standalone function (6.12). + if ((ctx.nativeDb?.bulkInsertEdges || native?.bulkInsertEdges) && allEdgeRows.length > 0) { const nativeEdges = allEdgeRows.map((r) => ({ sourceId: r[0], targetId: r[1], @@ -689,7 +691,12 @@ export async function buildEdges(ctx: PipelineContext): Promise { confidence: r[3], dynamic: r[4], })); - const ok = native.bulkInsertEdges(db.name, nativeEdges); + let ok: boolean; + if (ctx.nativeDb?.bulkInsertEdges) { + ok = ctx.nativeDb.bulkInsertEdges(nativeEdges); + } else { + ok = native!.bulkInsertEdges(db.name, nativeEdges); + } if (!ok) { debug('Native bulkInsertEdges failed — falling back to JS batchInsertEdges'); batchInsertEdges(db, allEdgeRows); diff --git a/src/domain/graph/builder/stages/build-structure.ts b/src/domain/graph/builder/stages/build-structure.ts index 212ce367..bb638806 100644 --- a/src/domain/graph/builder/stages/build-structure.ts +++ b/src/domain/graph/builder/stages/build-structure.ts @@ -89,8 +89,28 @@ export async function buildStructure(ctx: PipelineContext): Promise { try { let roleSummary: Record | null = null; - // Try native rusqlite path first (eliminates JS<->SQLite round-trips) - if (ctx.engineName === 'native') { + // Try NativeDatabase persistent connection first (6.15), then standalone (6.12) + if (ctx.nativeDb?.classifyRolesFull) { + const nativeResult = + changedFileList && changedFileList.length > 0 + ? ctx.nativeDb.classifyRolesIncremental(changedFileList) + : ctx.nativeDb.classifyRolesFull(); + if (nativeResult) { + roleSummary = { + entry: nativeResult.entry, + core: nativeResult.core, + utility: nativeResult.utility, + adapter: nativeResult.adapter, + dead: nativeResult.dead, + 'dead-leaf': nativeResult.deadLeaf, + 'dead-entry': nativeResult.deadEntry, + 'dead-ffi': nativeResult.deadFfi, + 'dead-unresolved': nativeResult.deadUnresolved, + 'test-only': nativeResult.testOnly, + leaf: nativeResult.leaf, + }; + } + } else if (ctx.engineName === 'native') { const native = loadNative(); if (native?.classifyRolesFull) { const dbPath = db.name; diff --git a/src/domain/graph/builder/stages/detect-changes.ts b/src/domain/graph/builder/stages/detect-changes.ts index 045f2e0b..045340ba 100644 --- a/src/domain/graph/builder/stages/detect-changes.ts +++ b/src/domain/graph/builder/stages/detect-changes.ts @@ -326,7 +326,13 @@ function purgeAndAddReverseDeps( ): void { const { db, rootDir } = ctx; if (changePaths.length > 0 || ctx.removed.length > 0) { - purgeFilesFromGraph(db, [...ctx.removed, ...changePaths], { purgeHashes: false }); + const filesToPurge = [...ctx.removed, ...changePaths]; + // Prefer NativeDatabase persistent connection for purge (6.15) + if (ctx.nativeDb?.purgeFilesData) { + ctx.nativeDb.purgeFilesData(filesToPurge, false); + } else { + purgeFilesFromGraph(db, filesToPurge, { purgeHashes: false }); + } } if (reverseDeps.size > 0) { const deleteOutgoingEdgesForFile = db.prepare( diff --git a/src/domain/graph/builder/stages/insert-nodes.ts b/src/domain/graph/builder/stages/insert-nodes.ts index da8c62c7..c6197a55 100644 --- a/src/domain/graph/builder/stages/insert-nodes.ts +++ b/src/domain/graph/builder/stages/insert-nodes.ts @@ -40,11 +40,13 @@ interface PrecomputedFileData { // ── Native fast-path ───────────────────────────────────────────────── function tryNativeInsert(ctx: PipelineContext): boolean { - const native = loadNative(); - if (!native?.bulkInsertNodes) return false; + // Prefer NativeDatabase persistent connection (6.15), fall back to standalone (6.12) + const hasNativeDb = !!ctx.nativeDb?.bulkInsertNodes; + const native = hasNativeDb ? null : loadNative(); + if (!hasNativeDb && !native?.bulkInsertNodes) return false; const { dbPath, allSymbols, filesToParse, metadataUpdates, rootDir, removed } = ctx; - if (!dbPath) return false; + if (!hasNativeDb && !dbPath) return false; // Marshal allSymbols → InsertNodesBatch[] const batches: Array<{ @@ -139,7 +141,11 @@ function tryNativeInsert(ctx: PipelineContext): boolean { fileHashes.push({ file: item.relPath, hash: item.hash, mtime, size }); } - return native.bulkInsertNodes(dbPath, batches, fileHashes, removed); + // Route through persistent NativeDatabase when available (6.15) + if (ctx.nativeDb?.bulkInsertNodes) { + return ctx.nativeDb.bulkInsertNodes(batches, fileHashes, removed); + } + return native!.bulkInsertNodes(dbPath!, batches, fileHashes, removed); } // ── JS fallback: Phase 1 ──────────────────────────────────────────── diff --git a/src/features/ast.ts b/src/features/ast.ts index 6edd428f..b92a9e4e 100644 --- a/src/features/ast.ts +++ b/src/features/ast.ts @@ -66,11 +66,28 @@ export async function buildAstNodes( db: BetterSqlite3Database, fileSymbols: Map, _rootDir: string, - _engineOpts?: unknown, + engineOpts?: { + nativeDb?: { + bulkInsertAstNodes( + batches: Array<{ + file: string; + nodes: Array<{ + line: number; + kind: string; + name: string; + text?: string | null; + receiver?: string | null; + }>; + }>, + ): number; + }; + }, ): Promise { // ── Native bulk-insert fast path ────────────────────────────────────── - const native = loadNative(); - if (native?.bulkInsertAstNodes) { + // Prefer NativeDatabase persistent connection (6.15), then standalone (6.12) + const nativeDb = engineOpts?.nativeDb; + const native = nativeDb ? null : loadNative(); + if (nativeDb?.bulkInsertAstNodes || native?.bulkInsertAstNodes) { let needsJsFallback = false; const batches: Array<{ file: string; @@ -103,7 +120,9 @@ export async function buildAstNodes( if (!needsJsFallback) { const expectedNodes = batches.reduce((s, b) => s + b.nodes.length, 0); - const inserted = native.bulkInsertAstNodes(db.name, batches); + const inserted = nativeDb + ? nativeDb.bulkInsertAstNodes(batches) + : native!.bulkInsertAstNodes(db.name, batches); if (inserted === expectedNodes) { debug(`AST extraction (native bulk): ${inserted} nodes stored`); return; diff --git a/src/types.ts b/src/types.ts index 51116293..b6d8f83e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -881,6 +881,8 @@ export interface EngineOpts { engine: EngineMode; dataflow: boolean; ast: boolean; + /** Persistent NativeDatabase connection for build writes (Phase 6.15). */ + nativeDb?: NativeDatabase; } /** A file change detected during incremental builds. */ @@ -1893,8 +1895,9 @@ export interface NativeParseTreeCache { clear(): void; } -/** Native rusqlite database wrapper instance (Phase 6.13). */ +/** Native rusqlite database wrapper instance (Phase 6.13 + 6.15). */ export interface NativeDatabase { + // ── Lifecycle (6.13) ──────────────────────────────────────────────── initSchema(): void; getBuildMeta(key: string): string | null; setBuildMeta(entries: Array<{ key: string; value: string }>): void; @@ -1903,6 +1906,78 @@ export interface NativeDatabase { close(): void; readonly dbPath: string; readonly isOpen: boolean; + + // ── Build pipeline writes (6.15) ─────────────────────────────────── + bulkInsertNodes( + batches: Array<{ + file: string; + definitions: Array<{ + name: string; + kind: string; + line: number; + endLine?: number | null; + visibility?: string | null; + children: Array<{ + name: string; + kind: string; + line: number; + endLine?: number | null; + visibility?: string | null; + }>; + }>; + exports: Array<{ name: string; kind: string; line: number }>; + }>, + fileHashes: Array<{ file: string; hash: string; mtime: number; size: number }>, + removedFiles: string[], + ): boolean; + bulkInsertEdges( + edges: Array<{ + sourceId: number; + targetId: number; + kind: string; + confidence: number; + dynamic: number; + }>, + ): boolean; + bulkInsertAstNodes( + batches: Array<{ + file: string; + nodes: Array<{ + line: number; + kind: string; + name: string; + text?: string | null; + receiver?: string | null; + }>; + }>, + ): number; + classifyRolesFull(): { + entry: number; + core: number; + utility: number; + adapter: number; + dead: number; + deadLeaf: number; + deadEntry: number; + deadFfi: number; + deadUnresolved: number; + testOnly: number; + leaf: number; + } | null; + classifyRolesIncremental(changedFiles: string[]): { + entry: number; + core: number; + utility: number; + adapter: number; + dead: number; + deadLeaf: number; + deadEntry: number; + deadFfi: number; + deadUnresolved: number; + testOnly: number; + leaf: number; + } | null; + purgeFilesData(files: string[], purgeHashes?: boolean): void; } // ════════════════════════════════════════════════════════════════════════ From 850d08c85268e5c159fe0fb649af5c957cdbe01a Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 28 Mar 2026 17:02:13 -0600 Subject: [PATCH 3/3] fix: rename do_insert to do_insert_nodes and add error logging before .is_ok() (#669) --- crates/codegraph-core/src/insert_nodes.rs | 4 ++-- crates/codegraph-core/src/native_db.rs | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/crates/codegraph-core/src/insert_nodes.rs b/crates/codegraph-core/src/insert_nodes.rs index 6891aba2..1f594d19 100644 --- a/crates/codegraph-core/src/insert_nodes.rs +++ b/crates/codegraph-core/src/insert_nodes.rs @@ -83,7 +83,7 @@ pub fn bulk_insert_nodes( let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); - do_insert(&conn, &batches, &file_hashes, &removed_files).is_ok() + do_insert_nodes(&conn, &batches, &file_hashes, &removed_files).is_ok() } // ── Internal implementation ───────────────────────────────────────── @@ -108,7 +108,7 @@ fn query_node_ids( Ok(map) } -pub(crate) fn do_insert( +pub(crate) fn do_insert_nodes( conn: &Connection, batches: &[InsertNodesBatch], file_hashes: &[FileHashEntry], diff --git a/crates/codegraph-core/src/native_db.rs b/crates/codegraph-core/src/native_db.rs index 2dd54bc4..df7fe69b 100644 --- a/crates/codegraph-core/src/native_db.rs +++ b/crates/codegraph-core/src/native_db.rs @@ -562,7 +562,9 @@ impl NativeDatabase { removed_files: Vec, ) -> napi::Result { let conn = self.conn()?; - Ok(insert_nodes::do_insert(conn, &batches, &file_hashes, &removed_files).is_ok()) + Ok(insert_nodes::do_insert_nodes(conn, &batches, &file_hashes, &removed_files) + .inspect_err(|e| eprintln!("[NativeDatabase] bulk_insert_nodes failed: {e}")) + .is_ok()) } /// Bulk-insert edge rows using chunked multi-value INSERT statements. @@ -573,7 +575,9 @@ impl NativeDatabase { return Ok(true); } let conn = self.conn()?; - Ok(edges_db::do_insert_edges(conn, &edges).is_ok()) + Ok(edges_db::do_insert_edges(conn, &edges) + .inspect_err(|e| eprintln!("[NativeDatabase] bulk_insert_edges failed: {e}")) + .is_ok()) } /// Bulk-insert AST nodes, resolving parent_node_id from the nodes table.