diff --git a/crates/codegraph-core/src/edges_db.rs b/crates/codegraph-core/src/edges_db.rs new file mode 100644 index 00000000..25f1ae51 --- /dev/null +++ b/crates/codegraph-core/src/edges_db.rs @@ -0,0 +1,81 @@ +//! Bulk edge insertion via rusqlite — native replacement for the JS +//! `batchInsertEdges` helper. +//! +//! Used by the build-edges stage to write computed call/receiver/extends/ +//! implements edges directly to SQLite without marshaling back to JS. + +use napi_derive::napi; +use rusqlite::{Connection, OpenFlags}; + +/// A single edge row to insert: [source_id, target_id, kind, confidence, dynamic]. +#[napi(object)] +#[derive(Debug, Clone)] +pub struct EdgeRow { + #[napi(js_name = "sourceId")] + pub source_id: u32, + #[napi(js_name = "targetId")] + pub target_id: u32, + pub kind: String, + pub confidence: f64, + pub dynamic: u32, +} + +/// Bulk-insert edge rows into the database via rusqlite. +/// Runs all writes in a single SQLite transaction with chunked multi-value +/// INSERT statements for maximum throughput. +/// +/// Returns `true` on success, `false` on any error so the JS caller can +/// fall back to the JS batch insert path. +#[napi] +pub fn bulk_insert_edges(db_path: String, edges: Vec) -> bool { + if edges.is_empty() { + return true; + } + let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; + let mut conn = match Connection::open_with_flags(&db_path, flags) { + Ok(c) => c, + Err(_) => return false, + }; + let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); + do_insert(&mut conn, &edges).is_ok() +} + +/// 199 rows × 5 params = 995 bind parameters per statement, safely under +/// the legacy `SQLITE_MAX_VARIABLE_NUMBER` default of 999. +const CHUNK: usize = 199; + +fn do_insert(conn: &mut Connection, edges: &[EdgeRow]) -> rusqlite::Result<()> { + let tx = conn.transaction()?; + + for chunk in edges.chunks(CHUNK) { + let placeholders: Vec = (0..chunk.len()) + .map(|i| { + let base = i * 5; + format!( + "(?{},?{},?{},?{},?{})", + base + 1, + base + 2, + base + 3, + base + 4, + base + 5 + ) + }) + .collect(); + let sql = format!( + "INSERT OR IGNORE INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES {}", + placeholders.join(",") + ); + let mut stmt = tx.prepare_cached(&sql)?; + for (i, edge) in chunk.iter().enumerate() { + let base = i * 5; + stmt.raw_bind_parameter(base +1, edge.source_id)?; + stmt.raw_bind_parameter(base +2, edge.target_id)?; + stmt.raw_bind_parameter(base +3, edge.kind.as_str())?; + stmt.raw_bind_parameter(base +4, edge.confidence)?; + stmt.raw_bind_parameter(base +5, edge.dynamic)?; + } + stmt.raw_execute()?; + } + + tx.commit() +} diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index 533fb2df..00da9ec7 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -5,12 +5,14 @@ pub mod constants; pub mod cycles; pub mod dataflow; pub mod edge_builder; +pub mod edges_db; pub mod extractors; pub mod import_resolution; pub mod incremental; pub mod insert_nodes; pub mod parallel; pub mod parser_registry; +pub mod roles_db; pub mod types; use napi_derive::napi; diff --git a/crates/codegraph-core/src/roles_db.rs b/crates/codegraph-core/src/roles_db.rs new file mode 100644 index 00000000..784787d7 --- /dev/null +++ b/crates/codegraph-core/src/roles_db.rs @@ -0,0 +1,575 @@ +//! Native role classification via rusqlite. +//! +//! Replaces the JS `classifyNodeRolesFull` / `classifyNodeRolesIncremental` +//! functions: runs fan-in/fan-out queries, computes medians, classifies roles, +//! and batch-updates nodes — all in a single Rust function with one DB +//! connection, eliminating JS<->SQLite round-trips. + +use std::collections::HashMap; + +use napi_derive::napi; +use rusqlite::{Connection, OpenFlags}; + +// ── Constants ──────────────────────────────────────────────────────── + +const FRAMEWORK_ENTRY_PREFIXES: &[&str] = &["route:", "event:", "command:"]; + +const LEAF_KINDS: &[&str] = &["parameter", "property", "constant"]; + +/// Path patterns indicating framework-dispatched entry points (matches JS +/// `ENTRY_PATH_PATTERNS` in `graph/classifiers/roles.ts`). +const ENTRY_PATH_PATTERNS: &[&str] = &[ + "cli/commands/", + "cli\\commands\\", + "mcp/", + "mcp\\", + "routes/", + "routes\\", + "route/", + "route\\", + "handlers/", + "handlers\\", + "handler/", + "handler\\", + "middleware/", + "middleware\\", +]; + +const TEST_FILE_PATTERNS: &[&str] = &[ + "%.test.%", + "%.spec.%", + "%__test__%", + "%__tests__%", + "%.stories.%", +]; + +// ── Output types ───────────────────────────────────────────────────── + +#[napi(object)] +#[derive(Debug, Clone, Default)] +pub struct RoleSummary { + pub entry: u32, + pub core: u32, + pub utility: u32, + pub adapter: u32, + pub dead: u32, + #[napi(js_name = "deadLeaf")] + pub dead_leaf: u32, + #[napi(js_name = "deadEntry")] + pub dead_entry: u32, + #[napi(js_name = "deadFfi")] + pub dead_ffi: u32, + #[napi(js_name = "deadUnresolved")] + pub dead_unresolved: u32, + #[napi(js_name = "testOnly")] + pub test_only: u32, + pub leaf: u32, +} + +// ── Public napi entry points ───────────────────────────────────────── + +/// Full role classification: queries all nodes, computes fan-in/fan-out, +/// classifies roles, and batch-updates the `role` column. +/// Returns a summary of role counts, or null on failure. +#[napi] +pub fn classify_roles_full(db_path: String) -> Option { + let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; + let mut conn = Connection::open_with_flags(&db_path, flags).ok()?; + let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); + do_classify_full(&mut conn).ok() +} + +/// Incremental role classification: only reclassifies nodes from changed files +/// plus their immediate edge neighbours. +/// Returns a summary of role counts for the affected nodes, or null on failure. +#[napi] +pub fn classify_roles_incremental( + db_path: String, + changed_files: Vec, +) -> Option { + let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; + let mut conn = Connection::open_with_flags(&db_path, flags).ok()?; + let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); + do_classify_incremental(&mut conn, &changed_files).ok() +} + +// ── Shared helpers ─────────────────────────────────────────────────── + +fn median(sorted: &[u32]) -> u32 { + if sorted.is_empty() { + return 0; + } + let mid = sorted.len() / 2; + if sorted.len() % 2 == 0 { + (sorted[mid - 1] + sorted[mid]) / 2 + } else { + sorted[mid] + } +} + +/// Dead sub-role classification matching JS `classifyDeadSubRole`. +fn classify_dead_sub_role(_name: &str, kind: &str, file: &str) -> &'static str { + // Leaf kinds + if LEAF_KINDS.iter().any(|k| *k == kind) { + return "dead-leaf"; + } + // FFI boundary (checked before dead-entry — an FFI boundary is a more + // fundamental classification than a path-based hint, matching JS priority) + let ffi_exts = [".rs", ".c", ".cpp", ".h", ".go", ".java", ".cs"]; + if ffi_exts.iter().any(|ext| file.ends_with(ext)) { + return "dead-ffi"; + } + // Framework-dispatched entry points (CLI commands, MCP tools, routes) + if ENTRY_PATH_PATTERNS.iter().any(|p| file.contains(p)) { + return "dead-entry"; + } + "dead-unresolved" +} + +/// Classify a single node into a role. +fn classify_node( + name: &str, + kind: &str, + file: &str, + fan_in: u32, + fan_out: u32, + is_exported: bool, + production_fan_in: u32, + median_fan_in: u32, + median_fan_out: u32, +) -> &'static str { + // Framework entry + if FRAMEWORK_ENTRY_PREFIXES + .iter() + .any(|p| name.starts_with(p)) + { + return "entry"; + } + + if fan_in == 0 && !is_exported { + // Test-only check: if node has test fan-in but zero total fan-in it's + // classified in the dead sub-role path (JS mirrors this) + return classify_dead_sub_role(name, kind, file); + } + + if fan_in == 0 && is_exported { + return "entry"; + } + + // Test-only: has callers but all are in test files + if fan_in > 0 && production_fan_in == 0 { + return "test-only"; + } + + let high_in = fan_in >= median_fan_in && fan_in > 0; + let high_out = fan_out >= median_fan_out && fan_out > 0; + + if high_in && !high_out { + "core" + } else if high_in && high_out { + "utility" + } else if !high_in && high_out { + "adapter" + } else { + "leaf" + } +} + +fn increment_summary(summary: &mut RoleSummary, role: &str) { + match role { + "entry" => summary.entry += 1, + "core" => summary.core += 1, + "utility" => summary.utility += 1, + "adapter" => summary.adapter += 1, + "leaf" => summary.leaf += 1, + "test-only" => summary.test_only += 1, + "dead-leaf" => { + summary.dead += 1; + summary.dead_leaf += 1; + } + "dead-ffi" => { + summary.dead += 1; + summary.dead_ffi += 1; + } + "dead-entry" => { + summary.dead += 1; + summary.dead_entry += 1; + } + "dead-unresolved" => { + summary.dead += 1; + summary.dead_unresolved += 1; + } + _ => summary.leaf += 1, + } +} + +/// Batch UPDATE nodes SET role = ? WHERE id IN (...) using chunked statements. +fn batch_update_roles( + tx: &rusqlite::Transaction, + ids_by_role: &HashMap<&str, Vec>, +) -> rusqlite::Result<()> { + const CHUNK: usize = 500; + + for (role, ids) in ids_by_role { + for chunk in ids.chunks(CHUNK) { + let placeholders: String = chunk.iter().map(|_| "?").collect::>().join(","); + let sql = format!("UPDATE nodes SET role = ?1 WHERE id IN ({})", placeholders); + let mut stmt = tx.prepare_cached(&sql)?; + // Bind role as param 1, then each id + stmt.raw_bind_parameter(1, *role)?; + for (i, id) in chunk.iter().enumerate() { + stmt.raw_bind_parameter(i + 2, *id)?; + } + stmt.raw_execute()?; + } + } + Ok(()) +} + +// ── Full classification ────────────────────────────────────────────── + +fn do_classify_full(conn: &mut Connection) -> rusqlite::Result { + let tx = conn.transaction()?; + let mut summary = RoleSummary::default(); + + // 1. Leaf kinds → dead-leaf (skip expensive fan-in/fan-out JOINs) + let leaf_ids: Vec = { + let mut stmt = + tx.prepare("SELECT id FROM nodes WHERE kind IN ('parameter', 'property')")?; + let rows = stmt.query_map([], |row| row.get::<_, i64>(0))?; + rows.filter_map(|r| r.ok()).collect() + }; + + // 2. Fan-in/fan-out for callable nodes + let rows: Vec<(i64, String, String, String, u32, u32)> = { + let mut stmt = tx.prepare( + "SELECT n.id, n.name, n.kind, n.file, + COALESCE(fi.cnt, 0) AS fan_in, + COALESCE(fo.cnt, 0) AS fan_out + FROM nodes n + LEFT JOIN ( + SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id + ) fi ON n.id = fi.target_id + LEFT JOIN ( + SELECT source_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY source_id + ) fo ON n.id = fo.source_id + WHERE n.kind NOT IN ('file', 'directory', 'parameter', 'property')", + )?; + let mapped = stmt.query_map([], |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + row.get::<_, String>(3)?, + row.get::<_, u32>(4)?, + row.get::<_, u32>(5)?, + )) + })?; + mapped.filter_map(|r| r.ok()).collect() + }; + + if rows.is_empty() && leaf_ids.is_empty() { + tx.commit()?; + return Ok(summary); + } + + // 3. Exported IDs (cross-file callers) + let exported_ids: std::collections::HashSet = { + let mut stmt = tx.prepare( + "SELECT DISTINCT e.target_id + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE e.kind = 'calls' AND caller.file != target.file", + )?; + let rows = stmt.query_map([], |row| row.get::<_, i64>(0))?; + rows.filter_map(|r| r.ok()).collect() + }; + + // 4. Production fan-in (excluding test files) + let prod_fan_in: HashMap = { + let test_filter = TEST_FILE_PATTERNS + .iter() + .map(|p| format!("AND caller.file NOT LIKE '{}'", p)) + .collect::>() + .join(" "); + let sql = format!( + "SELECT e.target_id, COUNT(*) AS cnt + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + WHERE e.kind = 'calls' {} + GROUP BY e.target_id", + test_filter + ); + let mut stmt = tx.prepare(&sql)?; + let mapped = stmt.query_map([], |row| Ok((row.get::<_, i64>(0)?, row.get::<_, u32>(1)?)))?; + mapped.filter_map(|r| r.ok()).collect() + }; + + // 5. Compute medians from non-zero values + let mut fan_in_vals: Vec = rows.iter().map(|r| r.4).filter(|&v| v > 0).collect(); + let mut fan_out_vals: Vec = rows.iter().map(|r| r.5).filter(|&v| v > 0).collect(); + fan_in_vals.sort_unstable(); + fan_out_vals.sort_unstable(); + let median_fan_in = median(&fan_in_vals); + let median_fan_out = median(&fan_out_vals); + + // 6. Classify and collect IDs by role + let mut ids_by_role: HashMap<&str, Vec> = HashMap::new(); + + if !leaf_ids.is_empty() { + summary.dead += leaf_ids.len() as u32; + summary.dead_leaf += leaf_ids.len() as u32; + ids_by_role.insert("dead-leaf", leaf_ids); + } + + for (id, name, kind, file, fan_in, fan_out) in &rows { + let is_exported = exported_ids.contains(id); + let prod_fi = prod_fan_in.get(id).copied().unwrap_or(0); + let role = classify_node( + name, + kind, + file, + *fan_in, + *fan_out, + is_exported, + prod_fi, + median_fan_in, + median_fan_out, + ); + increment_summary(&mut summary, role); + ids_by_role.entry(role).or_default().push(*id); + } + + // 7. Batch UPDATE: reset all roles then set per-role + tx.execute("UPDATE nodes SET role = NULL", [])?; + batch_update_roles(&tx, &ids_by_role)?; + + tx.commit()?; + Ok(summary) +} + +// ── Incremental classification ─────────────────────────────────────── + +fn do_classify_incremental( + conn: &mut Connection, + changed_files: &[String], +) -> rusqlite::Result { + let tx = conn.transaction()?; + let mut summary = RoleSummary::default(); + + // Build placeholders for changed files + let seed_ph: String = changed_files.iter().map(|_| "?").collect::>().join(","); + + // Expand affected set: include edge neighbours + let neighbour_sql = format!( + "SELECT DISTINCT n2.file FROM edges e + JOIN nodes n1 ON (e.source_id = n1.id OR e.target_id = n1.id) + JOIN nodes n2 ON (e.source_id = n2.id OR e.target_id = n2.id) + WHERE e.kind = 'calls' + AND n1.file IN ({}) + AND n2.file NOT IN ({}) + AND n2.kind NOT IN ('file', 'directory')", + seed_ph, seed_ph + ); + let neighbour_files: Vec = { + let mut stmt = tx.prepare(&neighbour_sql)?; + // Bind changed_files twice (for both IN clauses) + let mut idx = 1; + for f in changed_files { + stmt.raw_bind_parameter(idx, f.as_str())?; + idx += 1; + } + for f in changed_files { + stmt.raw_bind_parameter(idx, f.as_str())?; + idx += 1; + } + let rows = stmt.raw_query(); + let mut result = Vec::new(); + let mut rows = rows; + while let Some(row) = rows.next()? { + result.push(row.get::<_, String>(0)?); + } + result + }; + + let mut all_affected: Vec<&str> = changed_files.iter().map(|s| s.as_str()).collect(); + for f in &neighbour_files { + all_affected.push(f.as_str()); + } + let affected_ph: String = all_affected.iter().map(|_| "?").collect::>().join(","); + + // 1. Global medians from edge distribution + let median_fan_in = { + let mut stmt = tx.prepare( + "SELECT COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id", + )?; + let mut vals: Vec = stmt + .query_map([], |row| row.get::<_, u32>(0))? + .filter_map(|r| r.ok()) + .collect(); + vals.sort_unstable(); + median(&vals) + }; + let median_fan_out = { + let mut stmt = tx.prepare( + "SELECT COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY source_id", + )?; + let mut vals: Vec = stmt + .query_map([], |row| row.get::<_, u32>(0))? + .filter_map(|r| r.ok()) + .collect(); + vals.sort_unstable(); + median(&vals) + }; + + // 2a. Leaf kinds in affected files + let leaf_sql = format!( + "SELECT id FROM nodes WHERE kind IN ('parameter', 'property') AND file IN ({})", + affected_ph + ); + let leaf_ids: Vec = { + let mut stmt = tx.prepare(&leaf_sql)?; + for (i, f) in all_affected.iter().enumerate() { + stmt.raw_bind_parameter(i + 1, *f)?; + } + let mut rows = stmt.raw_query(); + let mut result = Vec::new(); + while let Some(row) = rows.next()? { + result.push(row.get::<_, i64>(0)?); + } + result + }; + + // 2b. Callable nodes with correlated subquery fan-in/fan-out + let rows_sql = format!( + "SELECT n.id, n.name, n.kind, n.file, + (SELECT COUNT(*) FROM edges WHERE kind = 'calls' AND target_id = n.id) AS fan_in, + (SELECT COUNT(*) FROM edges WHERE kind = 'calls' AND source_id = n.id) AS fan_out + FROM nodes n + WHERE n.kind NOT IN ('file', 'directory', 'parameter', 'property') + AND n.file IN ({})", + affected_ph + ); + let rows: Vec<(i64, String, String, String, u32, u32)> = { + let mut stmt = tx.prepare(&rows_sql)?; + for (i, f) in all_affected.iter().enumerate() { + stmt.raw_bind_parameter(i + 1, *f)?; + } + let mut qrows = stmt.raw_query(); + let mut result = Vec::new(); + while let Some(row) = qrows.next()? { + result.push(( + row.get::<_, i64>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + row.get::<_, String>(3)?, + row.get::<_, u32>(4)?, + row.get::<_, u32>(5)?, + )); + } + result + }; + + if rows.is_empty() && leaf_ids.is_empty() { + tx.commit()?; + return Ok(summary); + } + + // 3. Exported IDs for affected nodes + let exported_sql = format!( + "SELECT DISTINCT e.target_id + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE e.kind = 'calls' AND caller.file != target.file + AND target.file IN ({})", + affected_ph + ); + let exported_ids: std::collections::HashSet = { + let mut stmt = tx.prepare(&exported_sql)?; + for (i, f) in all_affected.iter().enumerate() { + stmt.raw_bind_parameter(i + 1, *f)?; + } + let mut qrows = stmt.raw_query(); + let mut result = std::collections::HashSet::new(); + while let Some(row) = qrows.next()? { + result.insert(row.get::<_, i64>(0)?); + } + result + }; + + // 4. Production fan-in for affected nodes + let test_filter = TEST_FILE_PATTERNS + .iter() + .map(|p| format!("AND caller.file NOT LIKE '{}'", p)) + .collect::>() + .join(" "); + let prod_sql = format!( + "SELECT e.target_id, COUNT(*) AS cnt + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE e.kind = 'calls' + AND target.file IN ({}) + {} + GROUP BY e.target_id", + affected_ph, test_filter + ); + let prod_fan_in: HashMap = { + let mut stmt = tx.prepare(&prod_sql)?; + for (i, f) in all_affected.iter().enumerate() { + stmt.raw_bind_parameter(i + 1, *f)?; + } + let mut qrows = stmt.raw_query(); + let mut result = HashMap::new(); + while let Some(row) = qrows.next()? { + result.insert(row.get::<_, i64>(0)?, row.get::<_, u32>(1)?); + } + result + }; + + // 5. Classify + let mut ids_by_role: HashMap<&str, Vec> = HashMap::new(); + + if !leaf_ids.is_empty() { + summary.dead += leaf_ids.len() as u32; + summary.dead_leaf += leaf_ids.len() as u32; + ids_by_role.insert("dead-leaf", leaf_ids); + } + + for (id, name, kind, file, fan_in, fan_out) in &rows { + let is_exported = exported_ids.contains(id); + let prod_fi = prod_fan_in.get(id).copied().unwrap_or(0); + let role = classify_node( + name, + kind, + file, + *fan_in, + *fan_out, + is_exported, + prod_fi, + median_fan_in, + median_fan_out, + ); + increment_summary(&mut summary, role); + ids_by_role.entry(role).or_default().push(*id); + } + + // 6. Reset roles for affected files only, then update + let reset_sql = format!( + "UPDATE nodes SET role = NULL WHERE file IN ({}) AND kind NOT IN ('file', 'directory')", + affected_ph + ); + { + let mut stmt = tx.prepare(&reset_sql)?; + for (i, f) in all_affected.iter().enumerate() { + stmt.raw_bind_parameter(i + 1, *f)?; + } + stmt.raw_execute()?; + } + batch_update_roles(&tx, &ids_by_role)?; + + tx.commit()?; + Ok(summary) +} diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 4d452128..2869ada5 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -7,6 +7,7 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { getNodeId } from '../../../../db/index.js'; +import { debug } from '../../../../infrastructure/logger.js'; import { loadNative } from '../../../../infrastructure/native.js'; import type { BetterSqlite3Database, @@ -639,9 +640,16 @@ export async function buildEdges(ctx: PipelineContext): Promise { addLazyFallback(ctx, scopedLoad); const t0 = performance.now(); - const buildEdgesTx = db.transaction(() => { - // Delete stale outgoing edges for barrel-only files inside the transaction - // so that deletion and re-creation are atomic (no edge loss on mid-build crash). + const native = engineName === 'native' ? loadNative() : null; + + // Phase 1: Compute edges inside a better-sqlite3 transaction. + // Barrel-edge deletion lives here so that the JS path (which also inserts + // edges in this transaction) keeps deletion + insertion atomic. + // When using the native rusqlite path, insertion happens in Phase 2 on a + // separate connection — a crash between Phase 1 and Phase 2 would leave + // barrel edges missing until the next incremental rebuild re-creates them. + const allEdgeRows: EdgeRowTuple[] = []; + const computeEdgesTx = db.transaction(() => { if (ctx.barrelOnlyFiles.size > 0) { const deleteOutgoingEdges = db.prepare( 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', @@ -651,19 +659,42 @@ export async function buildEdges(ctx: PipelineContext): Promise { } } - const allEdgeRows: EdgeRowTuple[] = []; - buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); - const native = engineName === 'native' ? loadNative() : null; - if (native?.buildCallEdges) { - buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native); + // Skip native call-edge path for small incremental builds (≤3 files): + // napi-rs marshaling overhead for allNodes exceeds computation savings. + const useNativeCallEdges = + native?.buildCallEdges && (ctx.isFullBuild || ctx.fileSymbols.size > 3); + if (useNativeCallEdges) { + buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native!); } else { buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows); } - batchInsertEdges(db, allEdgeRows); + // When using native edge insert, skip JS insert here — do it after tx commits. + // Otherwise insert edges within this transaction for atomicity. + if (!native?.bulkInsertEdges) { + batchInsertEdges(db, allEdgeRows); + } }); - buildEdgesTx(); + computeEdgesTx(); + + // Phase 2: Native rusqlite bulk insert (outside better-sqlite3 transaction + // since rusqlite opens its own connection — avoids SQLITE_BUSY contention) + if (native?.bulkInsertEdges && allEdgeRows.length > 0) { + const nativeEdges = allEdgeRows.map((r) => ({ + sourceId: r[0], + targetId: r[1], + kind: r[2], + confidence: r[3], + dynamic: r[4], + })); + const ok = native.bulkInsertEdges(db.name, nativeEdges); + if (!ok) { + debug('Native bulkInsertEdges failed — falling back to JS batchInsertEdges'); + batchInsertEdges(db, allEdgeRows); + } + } + ctx.timing.edgesMs = performance.now() - t0; } diff --git a/src/domain/graph/builder/stages/build-structure.ts b/src/domain/graph/builder/stages/build-structure.ts index a3ea2d9b..212ce367 100644 --- a/src/domain/graph/builder/stages/build-structure.ts +++ b/src/domain/graph/builder/stages/build-structure.ts @@ -6,6 +6,7 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { debug } from '../../../../infrastructure/logger.js'; +import { loadNative } from '../../../../infrastructure/native.js'; import { normalizePath } from '../../../../shared/constants.js'; import type { ExtractorOutput } from '../../../../types.js'; import type { PipelineContext } from '../context.js'; @@ -86,13 +87,46 @@ export async function buildStructure(ctx: PipelineContext): Promise { // Classify node roles (incremental: only reclassify changed files' nodes) const t1 = performance.now(); try { - const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { - classifyNodeRoles: ( - db: PipelineContext['db'], - changedFiles?: string[] | null, - ) => Record; - }; - const roleSummary = classifyNodeRoles(db, changedFileList); + let roleSummary: Record | null = null; + + // Try native rusqlite path first (eliminates JS<->SQLite round-trips) + if (ctx.engineName === 'native') { + const native = loadNative(); + if (native?.classifyRolesFull) { + const dbPath = db.name; + const nativeResult = + changedFileList && changedFileList.length > 0 + ? native.classifyRolesIncremental?.(dbPath, changedFileList) + : native.classifyRolesFull(dbPath); + if (nativeResult) { + roleSummary = { + entry: nativeResult.entry, + core: nativeResult.core, + utility: nativeResult.utility, + adapter: nativeResult.adapter, + dead: nativeResult.dead, + 'dead-leaf': nativeResult.deadLeaf, + 'dead-entry': nativeResult.deadEntry, + 'dead-ffi': nativeResult.deadFfi, + 'dead-unresolved': nativeResult.deadUnresolved, + 'test-only': nativeResult.testOnly, + leaf: nativeResult.leaf, + }; + } + } + } + + // Fall back to JS path + if (!roleSummary) { + const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { + classifyNodeRoles: ( + db: PipelineContext['db'], + changedFiles?: string[] | null, + ) => Record; + }; + roleSummary = classifyNodeRoles(db, changedFileList); + } + debug( `Roles${changedFileList ? ` (incremental, ${changedFileList.length} files)` : ''}: ${Object.entries( roleSummary, diff --git a/src/types.ts b/src/types.ts index f593ae37..4939ff8a 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1838,6 +1838,45 @@ export interface NativeAddon { fileHashes: Array<{ file: string; hash: string; mtime: number; size: number }>, removedFiles: string[], ): boolean; + bulkInsertEdges( + dbPath: string, + edges: Array<{ + sourceId: number; + targetId: number; + kind: string; + confidence: number; + dynamic: number; + }>, + ): boolean; + classifyRolesFull(dbPath: string): { + entry: number; + core: number; + utility: number; + adapter: number; + dead: number; + deadLeaf: number; + deadEntry: number; + deadFfi: number; + deadUnresolved: number; + testOnly: number; + leaf: number; + } | null; + classifyRolesIncremental( + dbPath: string, + changedFiles: string[], + ): { + entry: number; + core: number; + utility: number; + adapter: number; + dead: number; + deadLeaf: number; + deadEntry: number; + deadFfi: number; + deadUnresolved: number; + testOnly: number; + leaf: number; + } | null; engineVersion(): string; ParseTreeCache: new () => NativeParseTreeCache; }