From c898a4a3a72b7c0da8502c8aa2120e7a987d212c Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 27 Mar 2026 02:15:45 -0600 Subject: [PATCH 1/6] =?UTF-8?q?fix(parser):=20close=20WASM=E2=80=93native?= =?UTF-8?q?=20engine=20parity=20gap=20(#649)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four root causes fixed across both engines: WASM JS/TS: extractConstantsWalk only walked root-level children, missing constants inside top-level blocks (if, try, for). Now recurses into all non-function scopes, matching native's recursive walk. WASM JS/TS: query pattern only matched property_identifier for methods, missing private #methods. Added private_property_identifier pattern. WASM Rust: extractRustParameters included self_parameter nodes that native correctly skips. Now skips them. Native JS: find_parent_of_types missed generator_function_declaration and generator_function, leaking constants from generator function bodies. Added both types to the scope filter. Native Rust: function_item handler emitted a duplicate entry for trait default-impl functions already captured by trait_item handler. Added parent guard matching WASM's existing skip logic. --- .../src/extractors/javascript.rs | 1 + .../src/extractors/rust_lang.rs | 8 +- src/domain/parser.ts | 1 + src/extractors/javascript.ts | 88 +++++++++++-------- src/extractors/rust.ts | 3 +- 5 files changed, 63 insertions(+), 38 deletions(-) diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index a4b9f881..4147dc03 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -295,6 +295,7 @@ fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: && find_parent_of_types(node, &[ "function_declaration", "arrow_function", "function_expression", "method_definition", + "generator_function_declaration", "generator_function", ]).is_none() { symbols.definitions.push(Definition { diff --git a/crates/codegraph-core/src/extractors/rust_lang.rs b/crates/codegraph-core/src/extractors/rust_lang.rs index 550fc5db..c4c07c11 100644 --- a/crates/codegraph-core/src/extractors/rust_lang.rs +++ b/crates/codegraph-core/src/extractors/rust_lang.rs @@ -40,7 +40,13 @@ fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: } match node.kind() { "function_item" => { - if let Some(name_node) = node.child_by_field_name("name") { + // Skip default-impl functions inside traits — already emitted by trait_item handler + if node.parent() + .and_then(|p| p.parent()) + .map_or(false, |gp| gp.kind() == "trait_item") + { + // still recurse into children below + } else if let Some(name_node) = node.child_by_field_name("name") { let name = node_text(&name_node, source); let impl_type = find_current_impl(node, source); let (full_name, kind) = match &impl_type { diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 68abde2d..70b7bde6 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -85,6 +85,7 @@ const COMMON_QUERY_PATTERNS: string[] = [ '(variable_declarator name: (identifier) @varfn_name value: (arrow_function) @varfn_value)', '(variable_declarator name: (identifier) @varfn_name value: (function_expression) @varfn_value)', '(method_definition name: (property_identifier) @meth_name) @meth_node', + '(method_definition name: (private_property_identifier) @meth_name) @meth_node', '(import_statement source: (string) @imp_source) @imp_node', '(export_statement) @exp_node', '(call_expression function: (identifier) @callfn_name) @callfn_node', diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index ef88a6c2..4b870606 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -260,51 +260,67 @@ function extractSymbolsQuery(tree: TreeSitterTree, query: TreeSitterQuery): Extr return { definitions, calls, imports, classes, exports: exps, typeMap }; } +/** Node types that define a function scope — constants inside these are skipped. */ +const FUNCTION_SCOPE_TYPES = new Set([ + 'function_declaration', + 'arrow_function', + 'function_expression', + 'method_definition', + 'generator_function_declaration', + 'generator_function', +]); + /** - * Walk program-level children to extract `const x = ` as constants. - * The query-based fast path has no pattern for lexical_declaration/variable_declaration, - * so constants are missed. This targeted walk fills that gap without a full tree traversal. + * Recursively walk the AST to extract `const x = ` as constants. + * Skips nodes inside function scopes so only file-level / block-level constants + * are captured — matching the native engine's behaviour. */ -function extractConstantsWalk(rootNode: TreeSitterNode, definitions: Definition[]): void { - for (let i = 0; i < rootNode.childCount; i++) { - const node = rootNode.child(i); - if (!node) continue; +function extractConstantsWalk(node: TreeSitterNode, definitions: Definition[]): void { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + + // Don't descend into function scopes + if (FUNCTION_SCOPE_TYPES.has(child.type)) continue; - let declNode = node; + let declNode = child; // Handle `export const …` — unwrap the export_statement to its declaration child - if (node.type === 'export_statement') { - const inner = node.childForFieldName('declaration'); - if (!inner) continue; - declNode = inner; + if (child.type === 'export_statement') { + const inner = child.childForFieldName('declaration'); + if (inner) declNode = inner; } const t = declNode.type; - if (t !== 'lexical_declaration' && t !== 'variable_declaration') continue; - if (!declNode.text.startsWith('const ')) continue; - - for (let j = 0; j < declNode.childCount; j++) { - const declarator = declNode.child(j); - if (!declarator || declarator.type !== 'variable_declarator') continue; - const nameN = declarator.childForFieldName('name'); - const valueN = declarator.childForFieldName('value'); - if (!nameN || nameN.type !== 'identifier' || !valueN) continue; - // Skip functions — already captured by query patterns - const valType = valueN.type; - if ( - valType === 'arrow_function' || - valType === 'function_expression' || - valType === 'function' - ) - continue; - if (isConstantValue(valueN)) { - definitions.push({ - name: nameN.text, - kind: 'constant', - line: declNode.startPosition.row + 1, - endLine: nodeEndLine(declNode), - }); + if (t === 'lexical_declaration' || t === 'variable_declaration') { + if (declNode.text.startsWith('const ')) { + for (let j = 0; j < declNode.childCount; j++) { + const declarator = declNode.child(j); + if (!declarator || declarator.type !== 'variable_declarator') continue; + const nameN = declarator.childForFieldName('name'); + const valueN = declarator.childForFieldName('value'); + if (!nameN || nameN.type !== 'identifier' || !valueN) continue; + // Skip functions — already captured by query patterns + const valType = valueN.type; + if ( + valType === 'arrow_function' || + valType === 'function_expression' || + valType === 'function' + ) + continue; + if (isConstantValue(valueN)) { + definitions.push({ + name: nameN.text, + kind: 'constant', + line: declNode.startPosition.row + 1, + endLine: nodeEndLine(declNode), + }); + } + } } } + + // Recurse into non-function children (blocks, if-statements, etc.) + extractConstantsWalk(child, definitions); } } diff --git a/src/extractors/rust.ts b/src/extractors/rust.ts index e74f2e78..3f40737e 100644 --- a/src/extractors/rust.ts +++ b/src/extractors/rust.ts @@ -227,7 +227,8 @@ function extractRustParameters(paramListNode: TreeSitterNode | null): SubDeclara const param = paramListNode.child(i); if (!param) continue; if (param.type === 'self_parameter') { - params.push({ name: 'self', kind: 'parameter', line: param.startPosition.row + 1 }); + // Skip self parameters — matches native engine behaviour + continue; } else if (param.type === 'parameter') { const pattern = param.childForFieldName('pattern'); if (pattern) { From e54f5a9aa581fe52dc7b98aaec34e236dd579184 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 27 Mar 2026 02:16:13 -0600 Subject: [PATCH 2/6] style: remove useless continue flagged by biome --- src/extractors/rust.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/extractors/rust.ts b/src/extractors/rust.ts index 3f40737e..031834c7 100644 --- a/src/extractors/rust.ts +++ b/src/extractors/rust.ts @@ -228,7 +228,6 @@ function extractRustParameters(paramListNode: TreeSitterNode | null): SubDeclara if (!param) continue; if (param.type === 'self_parameter') { // Skip self parameters — matches native engine behaviour - continue; } else if (param.type === 'parameter') { const pattern = param.childForFieldName('pattern'); if (pattern) { From 42335644cc09441e363aaac0005efdad82e5774e Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 27 Mar 2026 02:44:17 -0600 Subject: [PATCH 3/6] perf(insert-nodes): native Rust/rusqlite pipeline for node insertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the entire insert-nodes stage to Rust when the native engine is available. The new `bulk_insert_nodes` napi function receives marshalled definitions, exports, children, and file-hash data from JS and writes everything to SQLite via rusqlite in a single transaction — eliminating the JS↔better-sqlite3 FFI overhead that dominated the ~206ms timing. Rust path: prepare_cached + individual executes (zero per-row FFI cost). JS fallback preserved unchanged for WASM engine or native failure. Target: insertMs < 50 ms on native full builds. --- crates/codegraph-core/src/insert_nodes.rs | 313 ++++++++++++++++++ crates/codegraph-core/src/lib.rs | 1 + .../graph/builder/stages/insert-nodes.ts | 140 +++++++- src/types.ts | 23 ++ 4 files changed, 467 insertions(+), 10 deletions(-) create mode 100644 crates/codegraph-core/src/insert_nodes.rs diff --git a/crates/codegraph-core/src/insert_nodes.rs b/crates/codegraph-core/src/insert_nodes.rs new file mode 100644 index 00000000..9afd643e --- /dev/null +++ b/crates/codegraph-core/src/insert_nodes.rs @@ -0,0 +1,313 @@ +//! Bulk node insertion via rusqlite — native replacement for the JS insert-nodes stage. +//! +//! Moves the entire insert-nodes loop to Rust: receives `InsertNodesBatch[]` from JS +//! and writes nodes, children, containment/parameter_of edges, exports, and file hashes +//! directly to SQLite without crossing back to JS. + +use std::collections::HashMap; + +use napi_derive::napi; +use rusqlite::{params, Connection, OpenFlags}; +use serde::{Deserialize, Serialize}; + +// ── Input types (received from JS via napi) ───────────────────────── + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InsertNodesChild { + pub name: String, + pub kind: String, + pub line: u32, + #[napi(js_name = "endLine")] + pub end_line: Option, + pub visibility: Option, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InsertNodesDefinition { + pub name: String, + pub kind: String, + pub line: u32, + #[napi(js_name = "endLine")] + pub end_line: Option, + pub visibility: Option, + pub children: Vec, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InsertNodesExport { + pub name: String, + pub kind: String, + pub line: u32, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InsertNodesBatch { + pub file: String, + pub definitions: Vec, + pub exports: Vec, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileHashEntry { + pub file: String, + pub hash: String, + /// `Math.floor(stat.mtimeMs)` from JS — f64 because JS `number`. + pub mtime: f64, + pub size: f64, +} + +// ── Public napi entry point ───────────────────────────────────────── + +/// Bulk-insert nodes, children, containment edges, exports, and file hashes +/// into the database. Runs all writes in a single SQLite transaction. +/// +/// Returns `true` on success, `false` on any error (DB open failure, +/// missing table, transaction failure) so the JS caller can fall back. +#[napi] +pub fn bulk_insert_nodes( + db_path: String, + batches: Vec, + file_hashes: Vec, + removed_files: Vec, +) -> bool { + let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; + let mut conn = match Connection::open_with_flags(&db_path, flags) { + Ok(c) => c, + Err(_) => return false, + }; + + let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); + + do_insert(&mut conn, &batches, &file_hashes, &removed_files).is_ok() +} + +// ── Internal implementation ───────────────────────────────────────── + +fn query_node_ids( + stmt: &mut rusqlite::CachedStatement, + file: &str, +) -> rusqlite::Result> { + let mut map = HashMap::new(); + let rows = stmt.query_map(params![file], |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + row.get::<_, u32>(3)?, + )) + })?; + for row in rows { + let (id, name, kind, line) = row?; + map.insert(format!("{name}|{kind}|{line}"), id); + } + Ok(map) +} + +fn do_insert( + conn: &mut Connection, + batches: &[InsertNodesBatch], + file_hashes: &[FileHashEntry], + removed_files: &[String], +) -> rusqlite::Result<()> { + let tx = conn.transaction()?; + + // ── Phase 1: Insert file nodes + definitions + export nodes ────── + { + let mut stmt = tx.prepare_cached( + "INSERT OR IGNORE INTO nodes \ + (name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)", + )?; + + for batch in batches { + // File node + stmt.execute(params![ + &batch.file, + "file", + &batch.file, + 0, + None::, + None::, + None::<&str>, + None::<&str>, + None::<&str> + ])?; + + // Definitions + for def in &batch.definitions { + let scope: Option<&str> = def.name.rfind('.').map(|i| &def.name[..i]); + stmt.execute(params![ + &def.name, + &def.kind, + &batch.file, + def.line, + def.end_line, + None::, + &def.name, + scope, + &def.visibility + ])?; + } + + // Exports (may duplicate definitions — OR IGNORE handles it) + for exp in &batch.exports { + stmt.execute(params![ + &exp.name, + &exp.kind, + &batch.file, + exp.line, + None::, + None::, + &exp.name, + None::<&str>, + None::<&str> + ])?; + } + } + } + + // ── Phase 1b: Mark exported nodes ──────────────────────────────── + { + let mut stmt = tx.prepare_cached( + "UPDATE nodes SET exported = 1 \ + WHERE name = ?1 AND kind = ?2 AND file = ?3 AND line = ?4", + )?; + for batch in batches { + for exp in &batch.exports { + stmt.execute(params![&exp.name, &exp.kind, &batch.file, exp.line])?; + } + } + } + + // ── Phase 2: Query node IDs, insert children, collect file→def edges + let mut contains_edges: Vec<(i64, i64)> = Vec::new(); + let mut param_of_edges: Vec<(i64, i64)> = Vec::new(); + + { + let mut id_stmt = + tx.prepare_cached("SELECT id, name, kind, line FROM nodes WHERE file = ?1")?; + let mut child_stmt = tx.prepare_cached( + "INSERT OR IGNORE INTO nodes \ + (name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)", + )?; + + for batch in batches { + let node_ids = query_node_ids(&mut id_stmt, &batch.file)?; + let file_id = node_ids.get(&format!("{}|file|0", &batch.file)).copied(); + + for def in &batch.definitions { + let def_key = format!("{}|{}|{}", def.name, def.kind, def.line); + let def_id = node_ids.get(&def_key).copied(); + + // file → definition containment edge + if let (Some(fid), Some(did)) = (file_id, def_id) { + contains_edges.push((fid, did)); + } + + let def_id = match def_id { + Some(id) if !def.children.is_empty() => id, + _ => continue, + }; + + for child in &def.children { + let qname = format!("{}.{}", def.name, child.name); + child_stmt.execute(params![ + &child.name, + &child.kind, + &batch.file, + child.line, + child.end_line, + def_id, + &qname, + &def.name, + &child.visibility + ])?; + } + } + } + } + + // ── Phase 3: Re-fetch IDs (including children), add def→child edges + { + let mut id_stmt = + tx.prepare_cached("SELECT id, name, kind, line FROM nodes WHERE file = ?1")?; + + for batch in batches { + let node_ids = query_node_ids(&mut id_stmt, &batch.file)?; + + for def in &batch.definitions { + if def.children.is_empty() { + continue; + } + let def_key = format!("{}|{}|{}", def.name, def.kind, def.line); + let def_id = match node_ids.get(&def_key) { + Some(&id) => id, + None => continue, + }; + + for child in &def.children { + let child_key = format!("{}|{}|{}", child.name, child.kind, child.line); + if let Some(&child_id) = node_ids.get(&child_key) { + contains_edges.push((def_id, child_id)); + if child.kind == "parameter" { + param_of_edges.push((child_id, def_id)); + } + } + } + } + } + } + + // ── Insert all edges ───────────────────────────────────────────── + { + let mut stmt = tx.prepare_cached( + "INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) \ + VALUES (?1, ?2, ?3, ?4, ?5)", + )?; + for &(src, tgt) in &contains_edges { + stmt.execute(params![src, tgt, "contains", 1.0, 0])?; + } + for &(src, tgt) in ¶m_of_edges { + stmt.execute(params![src, tgt, "parameter_of", 1.0, 0])?; + } + } + + // ── Phase 4: File hashes ───────────────────────────────────────── + let has_file_hashes = tx + .prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name='file_hashes'") + .and_then(|mut s| s.query_row([], |_| Ok(true))) + .unwrap_or(false); + + if has_file_hashes { + { + let mut upsert = tx.prepare_cached( + "INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) \ + VALUES (?1, ?2, ?3, ?4)", + )?; + for entry in file_hashes { + upsert.execute(params![ + &entry.file, + &entry.hash, + entry.mtime as i64, + entry.size as i64 + ])?; + } + } + + if !removed_files.is_empty() { + let mut delete = + tx.prepare_cached("DELETE FROM file_hashes WHERE file = ?1")?; + for file in removed_files { + delete.execute(params![file])?; + } + } + } + + tx.commit() +} diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index 391f0854..533fb2df 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -8,6 +8,7 @@ pub mod edge_builder; pub mod extractors; pub mod import_resolution; pub mod incremental; +pub mod insert_nodes; pub mod parallel; pub mod parser_registry; pub mod types; diff --git a/src/domain/graph/builder/stages/insert-nodes.ts b/src/domain/graph/builder/stages/insert-nodes.ts index 769bec6d..da8c62c7 100644 --- a/src/domain/graph/builder/stages/insert-nodes.ts +++ b/src/domain/graph/builder/stages/insert-nodes.ts @@ -3,10 +3,15 @@ * * Batch-inserts file nodes, definitions, exports, children, and contains/parameter_of edges. * Updates file hashes for incremental builds. + * + * When the native engine is available, delegates all SQLite writes to Rust via + * `bulkInsertNodes` — eliminating JS↔C boundary overhead. Falls back to the + * JS implementation on failure or when native is unavailable. */ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { bulkNodeIdsByFile } from '../../../../db/index.js'; +import { loadNative } from '../../../../infrastructure/native.js'; import type { BetterSqlite3Database, ExtractorOutput, @@ -32,7 +37,112 @@ interface PrecomputedFileData { _reverseDepOnly?: boolean; } -// ── Phase 1: Insert file nodes, definitions, exports ──────────────────── +// ── Native fast-path ───────────────────────────────────────────────── + +function tryNativeInsert(ctx: PipelineContext): boolean { + const native = loadNative(); + if (!native?.bulkInsertNodes) return false; + + const { dbPath, allSymbols, filesToParse, metadataUpdates, rootDir, removed } = ctx; + if (!dbPath) return false; + + // Marshal allSymbols → InsertNodesBatch[] + const batches: Array<{ + file: string; + definitions: Array<{ + name: string; + kind: string; + line: number; + endLine?: number | null; + visibility?: string | null; + children: Array<{ + name: string; + kind: string; + line: number; + endLine?: number | null; + visibility?: string | null; + }>; + }>; + exports: Array<{ name: string; kind: string; line: number }>; + }> = []; + + for (const [relPath, symbols] of allSymbols) { + batches.push({ + file: relPath, + definitions: symbols.definitions.map((def) => ({ + name: def.name, + kind: def.kind, + line: def.line, + endLine: def.endLine ?? null, + visibility: def.visibility ?? null, + children: (def.children ?? []).map((c) => ({ + name: c.name, + kind: c.kind, + line: c.line, + endLine: c.endLine ?? null, + visibility: c.visibility ?? null, + })), + })), + exports: symbols.exports.map((exp) => ({ + name: exp.name, + kind: exp.kind, + line: exp.line, + })), + }); + } + + // Build file hash entries + const precomputedData = new Map(); + for (const item of filesToParse) { + if (item.relPath) precomputedData.set(item.relPath, item as PrecomputedFileData); + } + + const fileHashes: Array<{ file: string; hash: string; mtime: number; size: number }> = []; + for (const [relPath] of allSymbols) { + const precomputed = precomputedData.get(relPath); + if (precomputed?._reverseDepOnly) { + continue; // file unchanged, hash already correct + } + if (precomputed?.hash) { + let mtime: number; + let size: number; + if (precomputed.stat) { + mtime = precomputed.stat.mtime; + size = precomputed.stat.size; + } else { + const rawStat = fileStat(path.join(rootDir, relPath)); + mtime = rawStat ? Math.floor(rawStat.mtimeMs) : 0; + size = rawStat ? rawStat.size : 0; + } + fileHashes.push({ file: relPath, hash: precomputed.hash, mtime, size }); + } else { + const absPath = path.join(rootDir, relPath); + let code: string | null; + try { + code = readFileSafe(absPath); + } catch { + code = null; + } + if (code !== null) { + const stat = fileStat(absPath); + const mtime = stat ? Math.floor(stat.mtimeMs) : 0; + const size = stat ? stat.size : 0; + fileHashes.push({ file: relPath, hash: fileHash(code), mtime, size }); + } + } + } + + // Also include metadata-only updates (self-heal mtime/size without re-parse) + for (const item of metadataUpdates) { + const mtime = item.stat ? Math.floor(item.stat.mtime) : 0; + const size = item.stat ? item.stat.size : 0; + fileHashes.push({ file: item.relPath, hash: item.hash, mtime, size }); + } + + return native.bulkInsertNodes(dbPath, batches, fileHashes, removed); +} + +// ── JS fallback: Phase 1 ──────────────────────────────────────────── function insertDefinitionsAndExports( db: BetterSqlite3Database, @@ -90,7 +200,7 @@ function insertDefinitionsAndExports( } } -// ── Phase 2+3: Insert children and containment edges (two nodeIdMap passes) ── +// ── JS fallback: Phase 2+3 ────────────────────────────────────────── function insertChildrenAndEdges( db: BetterSqlite3Database, @@ -165,7 +275,7 @@ function insertChildrenAndEdges( batchInsertEdges(db, edgeRows); } -// ── Phase 4: Update file hashes ───────────────────────────────────────── +// ── JS fallback: Phase 4 ──────────────────────────────────────────── function updateFileHashes( _db: BetterSqlite3Database, @@ -218,11 +328,27 @@ function updateFileHashes( } } -// ── Main entry point ──────────────────────────────────────────────────── +// ── Main entry point ──────────────────────────────────────────────── export async function insertNodes(ctx: PipelineContext): Promise { const { db, allSymbols, filesToParse, metadataUpdates, rootDir, removed } = ctx; + // Populate fileSymbols before any DB writes (used by later stages) + for (const [relPath, symbols] of allSymbols) { + ctx.fileSymbols.set(relPath, symbols); + } + + const t0 = performance.now(); + + // Try native Rust path first — single transaction, no JS↔C overhead + if (ctx.engineName === 'native' && tryNativeInsert(ctx)) { + ctx.timing.insertMs = performance.now() - t0; + + // Removed-file hash cleanup is handled inside the native call + return; + } + + // JS fallback const precomputedData = new Map(); for (const item of filesToParse) { if (item.relPath) precomputedData.set(item.relPath, item as PrecomputedFileData); @@ -237,18 +363,12 @@ export async function insertNodes(ctx: PipelineContext): Promise { upsertHash = null; } - // Populate fileSymbols before the transaction so it is a pure input - for (const [relPath, symbols] of allSymbols) { - ctx.fileSymbols.set(relPath, symbols); - } - const insertAll = db.transaction(() => { insertDefinitionsAndExports(db, allSymbols); insertChildrenAndEdges(db, allSymbols); updateFileHashes(db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash); }); - const t0 = performance.now(); insertAll(); ctx.timing.insertMs = performance.now() - t0; diff --git a/src/types.ts b/src/types.ts index 41058cce..f593ae37 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1815,6 +1815,29 @@ export interface NativeAddon { }>; }>, ): number; + bulkInsertNodes( + dbPath: string, + batches: Array<{ + file: string; + definitions: Array<{ + name: string; + kind: string; + line: number; + endLine?: number | null; + visibility?: string | null; + children: Array<{ + name: string; + kind: string; + line: number; + endLine?: number | null; + visibility?: string | null; + }>; + }>; + exports: Array<{ name: string; kind: string; line: number }>; + }>, + fileHashes: Array<{ file: string; hash: string; mtime: number; size: number }>, + removedFiles: string[], + ): boolean; engineVersion(): string; ParseTreeCache: new () => NativeParseTreeCache; } From 9215de94b453527d042eb975b8d6b386a80dd39f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 27 Mar 2026 03:37:33 -0600 Subject: [PATCH 4/6] fix(parser): prevent double-counting export const declarations (#654) --- src/extractors/javascript.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index 4b870606..fc32576c 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -319,8 +319,10 @@ function extractConstantsWalk(node: TreeSitterNode, definitions: Definition[]): } } - // Recurse into non-function children (blocks, if-statements, etc.) - extractConstantsWalk(child, definitions); + // Recurse into non-function, non-export-statement children (blocks, if-statements, etc.) + if (child.type !== 'export_statement') { + extractConstantsWalk(child, definitions); + } } } From 1d4f19380d969fe58cee1634ec9886a84abb7139 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 27 Mar 2026 13:39:00 -0600 Subject: [PATCH 5/6] fix(rust): remove stale cfg_db/dataflow_db module declarations (#654) The merge commit a8849c3 brought in `pub mod cfg_db` and `pub mod dataflow_db` declarations in lib.rs without the corresponding source files, causing Rust compilation to fail (E0583: file not found). This branch supersedes those modules with the unified insert_nodes.rs pipeline, so remove the stale mod declarations and their NativeAddon type signatures in types.ts. --- crates/codegraph-core/src/lib.rs | 2 -- src/types.ts | 35 -------------------------------- 2 files changed, 37 deletions(-) diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index 6207b725..533fb2df 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -1,11 +1,9 @@ pub mod ast_db; pub mod cfg; -pub mod cfg_db; pub mod complexity; pub mod constants; pub mod cycles; pub mod dataflow; -pub mod dataflow_db; pub mod edge_builder; pub mod extractors; pub mod import_resolution; diff --git a/src/types.ts b/src/types.ts index bb4922f2..f593ae37 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1838,41 +1838,6 @@ export interface NativeAddon { fileHashes: Array<{ file: string; hash: string; mtime: number; size: number }>, removedFiles: string[], ): boolean; - bulkInsertCfg( - dbPath: string, - batches: Array<{ - name: string; - file: string; - line: number; - blocks: Array<{ - index: number; - type: string; - startLine?: number | null; - endLine?: number | null; - label?: string | null; - }>; - edges: Array<{ - sourceIndex: number; - targetIndex: number; - kind: string; - }>; - }>, - ): number; - bulkInsertDataflow( - dbPath: string, - batches: Array<{ - file: string; - edges: Array<{ - sourceName: string; - targetName: string; - kind: string; - paramIndex?: number | null; - expression?: string | null; - line?: number | null; - confidence: number; - }>; - }>, - ): number; engineVersion(): string; ParseTreeCache: new () => NativeParseTreeCache; } From 2b6e30dcce6d8c061a917b4deb650378c5a46f85 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:14:42 -0600 Subject: [PATCH 6/6] fix(rust): use INSERT OR IGNORE for edges to prevent duplicates on incremental builds (#654) The edges INSERT in do_insert lacked a conflict handler. Since the edges table has no UNIQUE constraint, incremental builds where target-side edges weren't pre-cleaned could silently accumulate duplicate rows. Switch to INSERT OR IGNORE to match the node insert behaviour. --- crates/codegraph-core/src/insert_nodes.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/codegraph-core/src/insert_nodes.rs b/crates/codegraph-core/src/insert_nodes.rs index 9afd643e..e49006b0 100644 --- a/crates/codegraph-core/src/insert_nodes.rs +++ b/crates/codegraph-core/src/insert_nodes.rs @@ -267,7 +267,7 @@ fn do_insert( // ── Insert all edges ───────────────────────────────────────────── { let mut stmt = tx.prepare_cached( - "INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) \ + "INSERT OR IGNORE INTO edges (source_id, target_id, kind, confidence, dynamic) \ VALUES (?1, ?2, ?3, ?4, ?5)", )?; for &(src, tgt) in &contains_edges {