diff --git a/crates/codegraph-core/src/cfg_db.rs b/crates/codegraph-core/src/cfg_db.rs deleted file mode 100644 index 5381218a..00000000 --- a/crates/codegraph-core/src/cfg_db.rs +++ /dev/null @@ -1,199 +0,0 @@ -//! Bulk CFG block and edge insertion via rusqlite. -//! -//! Bypasses the JS iteration loop by opening the SQLite database directly -//! from Rust and inserting all CFG blocks and edges in a single transaction. -//! Function node IDs are resolved by querying the `nodes` table. - -use std::collections::HashMap; - -use napi_derive::napi; -use rusqlite::{params, Connection, OpenFlags}; -use serde::{Deserialize, Serialize}; - -/// A single CFG block to insert (received from JS). -#[napi(object)] -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CfgInsertBlock { - pub index: u32, - #[napi(js_name = "type")] - pub block_type: String, - #[napi(js_name = "startLine")] - pub start_line: Option, - #[napi(js_name = "endLine")] - pub end_line: Option, - pub label: Option, -} - -/// A single CFG edge to insert (received from JS). -#[napi(object)] -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CfgInsertEdge { - #[napi(js_name = "sourceIndex")] - pub source_index: u32, - #[napi(js_name = "targetIndex")] - pub target_index: u32, - pub kind: String, -} - -/// CFG data for a single function definition. -#[napi(object)] -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CfgFunctionBatch { - /// Definition name (used to look up node ID) - pub name: String, - /// Relative file path - pub file: String, - /// Definition source line - pub line: u32, - pub blocks: Vec, - pub edges: Vec, -} - -/// Bulk-insert CFG blocks and edges into the database. -/// -/// For each function batch: -/// 1. Resolve the function's node ID from the `nodes` table -/// 2. Delete any existing CFG data for that node (handles incremental rebuilds) -/// 3. Insert all blocks, collecting their auto-generated row IDs -/// 4. Insert all edges, mapping block indices to row IDs -/// -/// Returns the total number of functions processed. Returns 0 on any error. -#[napi] -pub fn bulk_insert_cfg(db_path: String, batches: Vec) -> u32 { - if batches.is_empty() { - return 0; - } - - let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; - let mut conn = match Connection::open_with_flags(&db_path, flags) { - Ok(c) => c, - Err(_) => return 0, - }; - - let _ = conn.execute_batch( - "PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000", - ); - - // Bail out if CFG tables don't exist - let has_tables: bool = conn - .prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name='cfg_blocks'") - .and_then(|mut s| s.query_row([], |_| Ok(true))) - .unwrap_or(false); - if !has_tables { - return 0; - } - - // ── Phase 1: Pre-fetch function node IDs ───────────────────────────── - let mut node_ids: HashMap<(String, String, u32), i64> = HashMap::new(); - { - let Ok(mut stmt) = conn.prepare( - "SELECT id FROM nodes WHERE name = ?1 AND kind IN ('function','method') AND file = ?2 AND line = ?3", - ) else { - return 0; - }; - - for batch in &batches { - let key = (batch.name.clone(), batch.file.clone(), batch.line); - if node_ids.contains_key(&key) { - continue; - } - if let Ok(id) = stmt.query_row(params![&batch.name, &batch.file, batch.line], |row| { - row.get::<_, i64>(0) - }) { - node_ids.insert(key, id); - } - } - } - - // ── Phase 2: Bulk insert in a single transaction ───────────────────── - let Ok(tx) = conn.transaction() else { - return 0; - }; - - let mut total = 0u32; - { - let Ok(mut delete_edges) = - tx.prepare("DELETE FROM cfg_edges WHERE function_node_id = ?1") - else { - return 0; - }; - let Ok(mut delete_blocks) = - tx.prepare("DELETE FROM cfg_blocks WHERE function_node_id = ?1") - else { - return 0; - }; - let Ok(mut insert_block) = tx.prepare( - "INSERT INTO cfg_blocks (function_node_id, block_index, block_type, start_line, end_line, label) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6)", - ) else { - return 0; - }; - let Ok(mut insert_edge) = tx.prepare( - "INSERT INTO cfg_edges (function_node_id, source_block_id, target_block_id, kind) \ - VALUES (?1, ?2, ?3, ?4)", - ) else { - return 0; - }; - - for batch in &batches { - let key = (batch.name.clone(), batch.file.clone(), batch.line); - let Some(&node_id) = node_ids.get(&key) else { - continue; - }; - - // Always delete stale CFG rows (handles body-removed / incremental case) - if delete_edges.execute(params![node_id]).is_err() { - return 0; - } - if delete_blocks.execute(params![node_id]).is_err() { - return 0; - } - - if batch.blocks.is_empty() { - total += 1; - continue; - } - - // Insert blocks and collect their auto-generated row IDs - let mut block_db_ids: HashMap = HashMap::new(); - for block in &batch.blocks { - match insert_block.execute(params![ - node_id, - block.index, - &block.block_type, - block.start_line, - block.end_line, - &block.label, - ]) { - Ok(_) => { - block_db_ids.insert(block.index, tx.last_insert_rowid()); - } - Err(_) => return 0, - } - } - - // Insert edges, mapping block indices to row IDs - for edge in &batch.edges { - let Some(&source_db_id) = block_db_ids.get(&edge.source_index) else { - continue; - }; - let Some(&target_db_id) = block_db_ids.get(&edge.target_index) else { - continue; - }; - match insert_edge.execute(params![node_id, source_db_id, target_db_id, &edge.kind]) - { - Ok(_) => {} - Err(_) => return 0, - } - } - - total += 1; - } - } - - if tx.commit().is_err() { - return 0; - } - - total -} diff --git a/crates/codegraph-core/src/dataflow_db.rs b/crates/codegraph-core/src/dataflow_db.rs deleted file mode 100644 index fea11e01..00000000 --- a/crates/codegraph-core/src/dataflow_db.rs +++ /dev/null @@ -1,185 +0,0 @@ -//! Bulk dataflow edge insertion via rusqlite. -//! -//! Bypasses the JS iteration loop by opening the SQLite database directly -//! from Rust and inserting all dataflow edges in a single transaction. -//! Node IDs are resolved by querying the `nodes` table (local-first, then global). - -use std::collections::HashMap; - -use napi_derive::napi; -use rusqlite::{params, Connection, OpenFlags}; -use serde::{Deserialize, Serialize}; - -/// A single dataflow edge to insert (received from JS). -#[napi(object)] -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DataflowInsertEdge { - /// Source function name (resolved to node ID) - #[napi(js_name = "sourceName")] - pub source_name: String, - /// Target function name (resolved to node ID) - #[napi(js_name = "targetName")] - pub target_name: String, - /// Edge kind: "flows_to", "returns", or "mutates" - pub kind: String, - #[napi(js_name = "paramIndex")] - pub param_index: Option, - pub expression: Option, - pub line: Option, - pub confidence: f64, -} - -/// A batch of dataflow edges for a single file. -#[napi(object)] -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct FileDataflowBatch { - /// Relative file path (for local-first node resolution) - pub file: String, - pub edges: Vec, -} - -/// Resolve a function name to a node ID, preferring local (same-file) matches. -fn resolve_node( - local_stmt: &mut rusqlite::Statement, - global_stmt: &mut rusqlite::Statement, - name: &str, - file: &str, - cache: &mut HashMap<(String, String), Option>, -) -> Option { - let key = (name.to_string(), file.to_string()); - if let Some(cached) = cache.get(&key) { - return *cached; - } - - // Local-first: same file - let result = local_stmt - .query_row(params![name, file], |row| row.get::<_, i64>(0)) - .ok(); - - let id = if result.is_some() { - result - } else { - // Global fallback - global_stmt - .query_row(params![name], |row| row.get::<_, i64>(0)) - .ok() - }; - - cache.insert(key, id); - id -} - -/// Bulk-insert dataflow edges into the database. -/// -/// For each file batch, resolves function names to node IDs (local-first, -/// then global) and inserts edges in a single transaction. -/// -/// Returns the total number of edges inserted. Returns 0 on any error. -#[napi] -pub fn bulk_insert_dataflow(db_path: String, batches: Vec) -> u32 { - if batches.is_empty() { - return 0; - } - - let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; - let mut conn = match Connection::open_with_flags(&db_path, flags) { - Ok(c) => c, - Err(_) => return 0, - }; - - let _ = conn.execute_batch( - "PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000", - ); - - // Bail out if the dataflow table doesn't exist - let has_table: bool = conn - .prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name='dataflow'") - .and_then(|mut s| s.query_row([], |_| Ok(true))) - .unwrap_or(false); - if !has_table { - return 0; - } - - // ── Phase 1: Pre-build node resolution cache ───────────────────────── - // Collect all unique (name, file) pairs we need to resolve - let mut resolve_cache: HashMap<(String, String), Option> = HashMap::new(); - { - let Ok(mut local_stmt) = conn.prepare( - "SELECT id FROM nodes WHERE name = ?1 AND file = ?2 AND kind IN ('function','method') LIMIT 1", - ) else { - return 0; - }; - let Ok(mut global_stmt) = conn.prepare( - "SELECT id FROM nodes WHERE name = ?1 AND kind IN ('function','method') ORDER BY file, line LIMIT 1", - ) else { - return 0; - }; - - for batch in &batches { - for edge in &batch.edges { - resolve_node( - &mut local_stmt, - &mut global_stmt, - &edge.source_name, - &batch.file, - &mut resolve_cache, - ); - resolve_node( - &mut local_stmt, - &mut global_stmt, - &edge.target_name, - &batch.file, - &mut resolve_cache, - ); - } - } - } - - // ── Phase 2: Bulk insert in a single transaction ───────────────────── - let Ok(tx) = conn.transaction() else { - return 0; - }; - - let mut total = 0u32; - { - let Ok(mut insert_stmt) = tx.prepare( - "INSERT INTO dataflow (source_id, target_id, kind, param_index, expression, line, confidence) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", - ) else { - return 0; - }; - - for batch in &batches { - for edge in &batch.edges { - let source_key = (edge.source_name.clone(), batch.file.clone()); - let target_key = (edge.target_name.clone(), batch.file.clone()); - - let Some(&Some(source_id)) = resolve_cache.get(&source_key) else { - continue; - }; - let Some(&Some(target_id)) = resolve_cache.get(&target_key) else { - continue; - }; - - match insert_stmt.execute(params![ - source_id, - target_id, - &edge.kind, - edge.param_index, - &edge.expression, - edge.line, - edge.confidence, - ]) { - Ok(_) => total += 1, - Err(_) => return 0, - } - } - } - } - - if tx.commit().is_err() { - return 0; - } - - total -} diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index a4b9f881..4147dc03 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -295,6 +295,7 @@ fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: && find_parent_of_types(node, &[ "function_declaration", "arrow_function", "function_expression", "method_definition", + "generator_function_declaration", "generator_function", ]).is_none() { symbols.definitions.push(Definition { diff --git a/crates/codegraph-core/src/extractors/rust_lang.rs b/crates/codegraph-core/src/extractors/rust_lang.rs index 550fc5db..c4c07c11 100644 --- a/crates/codegraph-core/src/extractors/rust_lang.rs +++ b/crates/codegraph-core/src/extractors/rust_lang.rs @@ -40,7 +40,13 @@ fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: } match node.kind() { "function_item" => { - if let Some(name_node) = node.child_by_field_name("name") { + // Skip default-impl functions inside traits — already emitted by trait_item handler + if node.parent() + .and_then(|p| p.parent()) + .map_or(false, |gp| gp.kind() == "trait_item") + { + // still recurse into children below + } else if let Some(name_node) = node.child_by_field_name("name") { let name = node_text(&name_node, source); let impl_type = find_current_impl(node, source); let (full_name, kind) = match &impl_type { diff --git a/crates/codegraph-core/src/insert_nodes.rs b/crates/codegraph-core/src/insert_nodes.rs new file mode 100644 index 00000000..e49006b0 --- /dev/null +++ b/crates/codegraph-core/src/insert_nodes.rs @@ -0,0 +1,313 @@ +//! Bulk node insertion via rusqlite — native replacement for the JS insert-nodes stage. +//! +//! Moves the entire insert-nodes loop to Rust: receives `InsertNodesBatch[]` from JS +//! and writes nodes, children, containment/parameter_of edges, exports, and file hashes +//! directly to SQLite without crossing back to JS. + +use std::collections::HashMap; + +use napi_derive::napi; +use rusqlite::{params, Connection, OpenFlags}; +use serde::{Deserialize, Serialize}; + +// ── Input types (received from JS via napi) ───────────────────────── + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InsertNodesChild { + pub name: String, + pub kind: String, + pub line: u32, + #[napi(js_name = "endLine")] + pub end_line: Option, + pub visibility: Option, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InsertNodesDefinition { + pub name: String, + pub kind: String, + pub line: u32, + #[napi(js_name = "endLine")] + pub end_line: Option, + pub visibility: Option, + pub children: Vec, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InsertNodesExport { + pub name: String, + pub kind: String, + pub line: u32, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InsertNodesBatch { + pub file: String, + pub definitions: Vec, + pub exports: Vec, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileHashEntry { + pub file: String, + pub hash: String, + /// `Math.floor(stat.mtimeMs)` from JS — f64 because JS `number`. + pub mtime: f64, + pub size: f64, +} + +// ── Public napi entry point ───────────────────────────────────────── + +/// Bulk-insert nodes, children, containment edges, exports, and file hashes +/// into the database. Runs all writes in a single SQLite transaction. +/// +/// Returns `true` on success, `false` on any error (DB open failure, +/// missing table, transaction failure) so the JS caller can fall back. +#[napi] +pub fn bulk_insert_nodes( + db_path: String, + batches: Vec, + file_hashes: Vec, + removed_files: Vec, +) -> bool { + let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; + let mut conn = match Connection::open_with_flags(&db_path, flags) { + Ok(c) => c, + Err(_) => return false, + }; + + let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); + + do_insert(&mut conn, &batches, &file_hashes, &removed_files).is_ok() +} + +// ── Internal implementation ───────────────────────────────────────── + +fn query_node_ids( + stmt: &mut rusqlite::CachedStatement, + file: &str, +) -> rusqlite::Result> { + let mut map = HashMap::new(); + let rows = stmt.query_map(params![file], |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + row.get::<_, u32>(3)?, + )) + })?; + for row in rows { + let (id, name, kind, line) = row?; + map.insert(format!("{name}|{kind}|{line}"), id); + } + Ok(map) +} + +fn do_insert( + conn: &mut Connection, + batches: &[InsertNodesBatch], + file_hashes: &[FileHashEntry], + removed_files: &[String], +) -> rusqlite::Result<()> { + let tx = conn.transaction()?; + + // ── Phase 1: Insert file nodes + definitions + export nodes ────── + { + let mut stmt = tx.prepare_cached( + "INSERT OR IGNORE INTO nodes \ + (name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)", + )?; + + for batch in batches { + // File node + stmt.execute(params![ + &batch.file, + "file", + &batch.file, + 0, + None::, + None::, + None::<&str>, + None::<&str>, + None::<&str> + ])?; + + // Definitions + for def in &batch.definitions { + let scope: Option<&str> = def.name.rfind('.').map(|i| &def.name[..i]); + stmt.execute(params![ + &def.name, + &def.kind, + &batch.file, + def.line, + def.end_line, + None::, + &def.name, + scope, + &def.visibility + ])?; + } + + // Exports (may duplicate definitions — OR IGNORE handles it) + for exp in &batch.exports { + stmt.execute(params![ + &exp.name, + &exp.kind, + &batch.file, + exp.line, + None::, + None::, + &exp.name, + None::<&str>, + None::<&str> + ])?; + } + } + } + + // ── Phase 1b: Mark exported nodes ──────────────────────────────── + { + let mut stmt = tx.prepare_cached( + "UPDATE nodes SET exported = 1 \ + WHERE name = ?1 AND kind = ?2 AND file = ?3 AND line = ?4", + )?; + for batch in batches { + for exp in &batch.exports { + stmt.execute(params![&exp.name, &exp.kind, &batch.file, exp.line])?; + } + } + } + + // ── Phase 2: Query node IDs, insert children, collect file→def edges + let mut contains_edges: Vec<(i64, i64)> = Vec::new(); + let mut param_of_edges: Vec<(i64, i64)> = Vec::new(); + + { + let mut id_stmt = + tx.prepare_cached("SELECT id, name, kind, line FROM nodes WHERE file = ?1")?; + let mut child_stmt = tx.prepare_cached( + "INSERT OR IGNORE INTO nodes \ + (name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)", + )?; + + for batch in batches { + let node_ids = query_node_ids(&mut id_stmt, &batch.file)?; + let file_id = node_ids.get(&format!("{}|file|0", &batch.file)).copied(); + + for def in &batch.definitions { + let def_key = format!("{}|{}|{}", def.name, def.kind, def.line); + let def_id = node_ids.get(&def_key).copied(); + + // file → definition containment edge + if let (Some(fid), Some(did)) = (file_id, def_id) { + contains_edges.push((fid, did)); + } + + let def_id = match def_id { + Some(id) if !def.children.is_empty() => id, + _ => continue, + }; + + for child in &def.children { + let qname = format!("{}.{}", def.name, child.name); + child_stmt.execute(params![ + &child.name, + &child.kind, + &batch.file, + child.line, + child.end_line, + def_id, + &qname, + &def.name, + &child.visibility + ])?; + } + } + } + } + + // ── Phase 3: Re-fetch IDs (including children), add def→child edges + { + let mut id_stmt = + tx.prepare_cached("SELECT id, name, kind, line FROM nodes WHERE file = ?1")?; + + for batch in batches { + let node_ids = query_node_ids(&mut id_stmt, &batch.file)?; + + for def in &batch.definitions { + if def.children.is_empty() { + continue; + } + let def_key = format!("{}|{}|{}", def.name, def.kind, def.line); + let def_id = match node_ids.get(&def_key) { + Some(&id) => id, + None => continue, + }; + + for child in &def.children { + let child_key = format!("{}|{}|{}", child.name, child.kind, child.line); + if let Some(&child_id) = node_ids.get(&child_key) { + contains_edges.push((def_id, child_id)); + if child.kind == "parameter" { + param_of_edges.push((child_id, def_id)); + } + } + } + } + } + } + + // ── Insert all edges ───────────────────────────────────────────── + { + let mut stmt = tx.prepare_cached( + "INSERT OR IGNORE INTO edges (source_id, target_id, kind, confidence, dynamic) \ + VALUES (?1, ?2, ?3, ?4, ?5)", + )?; + for &(src, tgt) in &contains_edges { + stmt.execute(params![src, tgt, "contains", 1.0, 0])?; + } + for &(src, tgt) in ¶m_of_edges { + stmt.execute(params![src, tgt, "parameter_of", 1.0, 0])?; + } + } + + // ── Phase 4: File hashes ───────────────────────────────────────── + let has_file_hashes = tx + .prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name='file_hashes'") + .and_then(|mut s| s.query_row([], |_| Ok(true))) + .unwrap_or(false); + + if has_file_hashes { + { + let mut upsert = tx.prepare_cached( + "INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) \ + VALUES (?1, ?2, ?3, ?4)", + )?; + for entry in file_hashes { + upsert.execute(params![ + &entry.file, + &entry.hash, + entry.mtime as i64, + entry.size as i64 + ])?; + } + } + + if !removed_files.is_empty() { + let mut delete = + tx.prepare_cached("DELETE FROM file_hashes WHERE file = ?1")?; + for file in removed_files { + delete.execute(params![file])?; + } + } + } + + tx.commit() +} diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index 984359b2..533fb2df 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -1,15 +1,14 @@ pub mod ast_db; pub mod cfg; -pub mod cfg_db; pub mod complexity; pub mod constants; pub mod cycles; pub mod dataflow; -pub mod dataflow_db; pub mod edge_builder; pub mod extractors; pub mod import_resolution; pub mod incremental; +pub mod insert_nodes; pub mod parallel; pub mod parser_registry; pub mod types; diff --git a/src/domain/graph/builder/stages/insert-nodes.ts b/src/domain/graph/builder/stages/insert-nodes.ts index 769bec6d..da8c62c7 100644 --- a/src/domain/graph/builder/stages/insert-nodes.ts +++ b/src/domain/graph/builder/stages/insert-nodes.ts @@ -3,10 +3,15 @@ * * Batch-inserts file nodes, definitions, exports, children, and contains/parameter_of edges. * Updates file hashes for incremental builds. + * + * When the native engine is available, delegates all SQLite writes to Rust via + * `bulkInsertNodes` — eliminating JS↔C boundary overhead. Falls back to the + * JS implementation on failure or when native is unavailable. */ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { bulkNodeIdsByFile } from '../../../../db/index.js'; +import { loadNative } from '../../../../infrastructure/native.js'; import type { BetterSqlite3Database, ExtractorOutput, @@ -32,7 +37,112 @@ interface PrecomputedFileData { _reverseDepOnly?: boolean; } -// ── Phase 1: Insert file nodes, definitions, exports ──────────────────── +// ── Native fast-path ───────────────────────────────────────────────── + +function tryNativeInsert(ctx: PipelineContext): boolean { + const native = loadNative(); + if (!native?.bulkInsertNodes) return false; + + const { dbPath, allSymbols, filesToParse, metadataUpdates, rootDir, removed } = ctx; + if (!dbPath) return false; + + // Marshal allSymbols → InsertNodesBatch[] + const batches: Array<{ + file: string; + definitions: Array<{ + name: string; + kind: string; + line: number; + endLine?: number | null; + visibility?: string | null; + children: Array<{ + name: string; + kind: string; + line: number; + endLine?: number | null; + visibility?: string | null; + }>; + }>; + exports: Array<{ name: string; kind: string; line: number }>; + }> = []; + + for (const [relPath, symbols] of allSymbols) { + batches.push({ + file: relPath, + definitions: symbols.definitions.map((def) => ({ + name: def.name, + kind: def.kind, + line: def.line, + endLine: def.endLine ?? null, + visibility: def.visibility ?? null, + children: (def.children ?? []).map((c) => ({ + name: c.name, + kind: c.kind, + line: c.line, + endLine: c.endLine ?? null, + visibility: c.visibility ?? null, + })), + })), + exports: symbols.exports.map((exp) => ({ + name: exp.name, + kind: exp.kind, + line: exp.line, + })), + }); + } + + // Build file hash entries + const precomputedData = new Map(); + for (const item of filesToParse) { + if (item.relPath) precomputedData.set(item.relPath, item as PrecomputedFileData); + } + + const fileHashes: Array<{ file: string; hash: string; mtime: number; size: number }> = []; + for (const [relPath] of allSymbols) { + const precomputed = precomputedData.get(relPath); + if (precomputed?._reverseDepOnly) { + continue; // file unchanged, hash already correct + } + if (precomputed?.hash) { + let mtime: number; + let size: number; + if (precomputed.stat) { + mtime = precomputed.stat.mtime; + size = precomputed.stat.size; + } else { + const rawStat = fileStat(path.join(rootDir, relPath)); + mtime = rawStat ? Math.floor(rawStat.mtimeMs) : 0; + size = rawStat ? rawStat.size : 0; + } + fileHashes.push({ file: relPath, hash: precomputed.hash, mtime, size }); + } else { + const absPath = path.join(rootDir, relPath); + let code: string | null; + try { + code = readFileSafe(absPath); + } catch { + code = null; + } + if (code !== null) { + const stat = fileStat(absPath); + const mtime = stat ? Math.floor(stat.mtimeMs) : 0; + const size = stat ? stat.size : 0; + fileHashes.push({ file: relPath, hash: fileHash(code), mtime, size }); + } + } + } + + // Also include metadata-only updates (self-heal mtime/size without re-parse) + for (const item of metadataUpdates) { + const mtime = item.stat ? Math.floor(item.stat.mtime) : 0; + const size = item.stat ? item.stat.size : 0; + fileHashes.push({ file: item.relPath, hash: item.hash, mtime, size }); + } + + return native.bulkInsertNodes(dbPath, batches, fileHashes, removed); +} + +// ── JS fallback: Phase 1 ──────────────────────────────────────────── function insertDefinitionsAndExports( db: BetterSqlite3Database, @@ -90,7 +200,7 @@ function insertDefinitionsAndExports( } } -// ── Phase 2+3: Insert children and containment edges (two nodeIdMap passes) ── +// ── JS fallback: Phase 2+3 ────────────────────────────────────────── function insertChildrenAndEdges( db: BetterSqlite3Database, @@ -165,7 +275,7 @@ function insertChildrenAndEdges( batchInsertEdges(db, edgeRows); } -// ── Phase 4: Update file hashes ───────────────────────────────────────── +// ── JS fallback: Phase 4 ──────────────────────────────────────────── function updateFileHashes( _db: BetterSqlite3Database, @@ -218,11 +328,27 @@ function updateFileHashes( } } -// ── Main entry point ──────────────────────────────────────────────────── +// ── Main entry point ──────────────────────────────────────────────── export async function insertNodes(ctx: PipelineContext): Promise { const { db, allSymbols, filesToParse, metadataUpdates, rootDir, removed } = ctx; + // Populate fileSymbols before any DB writes (used by later stages) + for (const [relPath, symbols] of allSymbols) { + ctx.fileSymbols.set(relPath, symbols); + } + + const t0 = performance.now(); + + // Try native Rust path first — single transaction, no JS↔C overhead + if (ctx.engineName === 'native' && tryNativeInsert(ctx)) { + ctx.timing.insertMs = performance.now() - t0; + + // Removed-file hash cleanup is handled inside the native call + return; + } + + // JS fallback const precomputedData = new Map(); for (const item of filesToParse) { if (item.relPath) precomputedData.set(item.relPath, item as PrecomputedFileData); @@ -237,18 +363,12 @@ export async function insertNodes(ctx: PipelineContext): Promise { upsertHash = null; } - // Populate fileSymbols before the transaction so it is a pure input - for (const [relPath, symbols] of allSymbols) { - ctx.fileSymbols.set(relPath, symbols); - } - const insertAll = db.transaction(() => { insertDefinitionsAndExports(db, allSymbols); insertChildrenAndEdges(db, allSymbols); updateFileHashes(db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash); }); - const t0 = performance.now(); insertAll(); ctx.timing.insertMs = performance.now() - t0; diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 68abde2d..70b7bde6 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -85,6 +85,7 @@ const COMMON_QUERY_PATTERNS: string[] = [ '(variable_declarator name: (identifier) @varfn_name value: (arrow_function) @varfn_value)', '(variable_declarator name: (identifier) @varfn_name value: (function_expression) @varfn_value)', '(method_definition name: (property_identifier) @meth_name) @meth_node', + '(method_definition name: (private_property_identifier) @meth_name) @meth_node', '(import_statement source: (string) @imp_source) @imp_node', '(export_statement) @exp_node', '(call_expression function: (identifier) @callfn_name) @callfn_node', diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index ef88a6c2..fc32576c 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -260,51 +260,69 @@ function extractSymbolsQuery(tree: TreeSitterTree, query: TreeSitterQuery): Extr return { definitions, calls, imports, classes, exports: exps, typeMap }; } +/** Node types that define a function scope — constants inside these are skipped. */ +const FUNCTION_SCOPE_TYPES = new Set([ + 'function_declaration', + 'arrow_function', + 'function_expression', + 'method_definition', + 'generator_function_declaration', + 'generator_function', +]); + /** - * Walk program-level children to extract `const x = ` as constants. - * The query-based fast path has no pattern for lexical_declaration/variable_declaration, - * so constants are missed. This targeted walk fills that gap without a full tree traversal. + * Recursively walk the AST to extract `const x = ` as constants. + * Skips nodes inside function scopes so only file-level / block-level constants + * are captured — matching the native engine's behaviour. */ -function extractConstantsWalk(rootNode: TreeSitterNode, definitions: Definition[]): void { - for (let i = 0; i < rootNode.childCount; i++) { - const node = rootNode.child(i); - if (!node) continue; +function extractConstantsWalk(node: TreeSitterNode, definitions: Definition[]): void { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + + // Don't descend into function scopes + if (FUNCTION_SCOPE_TYPES.has(child.type)) continue; - let declNode = node; + let declNode = child; // Handle `export const …` — unwrap the export_statement to its declaration child - if (node.type === 'export_statement') { - const inner = node.childForFieldName('declaration'); - if (!inner) continue; - declNode = inner; + if (child.type === 'export_statement') { + const inner = child.childForFieldName('declaration'); + if (inner) declNode = inner; } const t = declNode.type; - if (t !== 'lexical_declaration' && t !== 'variable_declaration') continue; - if (!declNode.text.startsWith('const ')) continue; - - for (let j = 0; j < declNode.childCount; j++) { - const declarator = declNode.child(j); - if (!declarator || declarator.type !== 'variable_declarator') continue; - const nameN = declarator.childForFieldName('name'); - const valueN = declarator.childForFieldName('value'); - if (!nameN || nameN.type !== 'identifier' || !valueN) continue; - // Skip functions — already captured by query patterns - const valType = valueN.type; - if ( - valType === 'arrow_function' || - valType === 'function_expression' || - valType === 'function' - ) - continue; - if (isConstantValue(valueN)) { - definitions.push({ - name: nameN.text, - kind: 'constant', - line: declNode.startPosition.row + 1, - endLine: nodeEndLine(declNode), - }); + if (t === 'lexical_declaration' || t === 'variable_declaration') { + if (declNode.text.startsWith('const ')) { + for (let j = 0; j < declNode.childCount; j++) { + const declarator = declNode.child(j); + if (!declarator || declarator.type !== 'variable_declarator') continue; + const nameN = declarator.childForFieldName('name'); + const valueN = declarator.childForFieldName('value'); + if (!nameN || nameN.type !== 'identifier' || !valueN) continue; + // Skip functions — already captured by query patterns + const valType = valueN.type; + if ( + valType === 'arrow_function' || + valType === 'function_expression' || + valType === 'function' + ) + continue; + if (isConstantValue(valueN)) { + definitions.push({ + name: nameN.text, + kind: 'constant', + line: declNode.startPosition.row + 1, + endLine: nodeEndLine(declNode), + }); + } + } } } + + // Recurse into non-function, non-export-statement children (blocks, if-statements, etc.) + if (child.type !== 'export_statement') { + extractConstantsWalk(child, definitions); + } } } diff --git a/src/extractors/rust.ts b/src/extractors/rust.ts index e74f2e78..031834c7 100644 --- a/src/extractors/rust.ts +++ b/src/extractors/rust.ts @@ -227,7 +227,7 @@ function extractRustParameters(paramListNode: TreeSitterNode | null): SubDeclara const param = paramListNode.child(i); if (!param) continue; if (param.type === 'self_parameter') { - params.push({ name: 'self', kind: 'parameter', line: param.startPosition.row + 1 }); + // Skip self parameters — matches native engine behaviour } else if (param.type === 'parameter') { const pattern = param.childForFieldName('pattern'); if (pattern) { diff --git a/src/features/cfg.ts b/src/features/cfg.ts index 7cdfbb5e..389ee3c2 100644 --- a/src/features/cfg.ts +++ b/src/features/cfg.ts @@ -17,7 +17,6 @@ import { openReadonlyOrFail, } from '../db/index.js'; import { debug, info } from '../infrastructure/logger.js'; -import { loadNative } from '../infrastructure/native.js'; import { paginateResult } from '../shared/paginate.js'; import type { BetterSqlite3Database, Definition, NodeRow, TreeSitterNode } from '../types.js'; import { findNodes } from './shared/find-nodes.js'; @@ -286,93 +285,6 @@ export async function buildCFGData( // skip WASM parser init, tree parsing, and JS visitor entirely — just persist. const allNative = allCfgNative(fileSymbols); - // ── Native bulk-insert fast path ────────────────────────────────────── - // When all CFG data is pre-computed by Rust and no files need WASM visitor, - // bypass JS iteration entirely — collect batches and hand them to rusqlite. - if (allNative) { - const native = loadNative(); - if (native?.bulkInsertCfg) { - let needsJsFallback = false; - const batches: Array<{ - name: string; - file: string; - line: number; - blocks: Array<{ - index: number; - type: string; - startLine?: number | null; - endLine?: number | null; - label?: string | null; - }>; - edges: Array<{ - sourceIndex: number; - targetIndex: number; - kind: string; - }>; - }> = []; - - for (const [relPath, symbols] of fileSymbols) { - const ext = path.extname(relPath).toLowerCase(); - if (!CFG_EXTENSIONS.has(ext)) continue; - - // Files with _tree were WASM-parsed and need the slow path - if (symbols._tree) { - needsJsFallback = true; - break; - } - - for (const def of symbols.definitions) { - if (def.kind !== 'function' && def.kind !== 'method') continue; - if (!def.line) continue; - - const cfgData = def.cfg as unknown as - | { blocks: CfgBuildBlock[]; edges: CfgBuildEdge[] } - | null - | undefined; - - batches.push({ - name: def.name, - file: relPath, - line: def.line, - blocks: cfgData?.blocks?.length - ? cfgData.blocks.map((b) => ({ - index: b.index, - type: b.type, - startLine: b.startLine, - endLine: b.endLine, - label: b.label, - })) - : [], - edges: cfgData?.blocks?.length - ? (cfgData.edges || []).map((e) => ({ - sourceIndex: e.sourceIndex, - targetIndex: e.targetIndex, - kind: e.kind, - })) - : [], - }); - } - } - - if (!needsJsFallback) { - const processed = native.bulkInsertCfg(db.name, batches); - const expectedFunctions = batches.filter((b) => b.blocks.length > 0).length; - if (processed === batches.length || expectedFunctions === 0) { - if (expectedFunctions > 0) { - info(`CFG: ${expectedFunctions} functions analyzed (native bulk)`); - } - return; - } - debug( - `CFG: bulk insert expected ${batches.length} functions, got ${processed} — falling back to JS`, - ); - // fall through to JS path - } - // fall through to JS path - } - } - - // ── JS fallback path ────────────────────────────────────────────────── const extToLang = buildExtToLangMap(); let parsers: unknown = null; let getParserFn: unknown = null; diff --git a/src/features/dataflow.ts b/src/features/dataflow.ts index 5c249c9a..8315b524 100644 --- a/src/features/dataflow.ts +++ b/src/features/dataflow.ts @@ -22,7 +22,6 @@ import { createDataflowVisitor } from '../ast-analysis/visitors/dataflow-visitor import { hasDataflowTable, openReadonlyOrFail } from '../db/index.js'; import { ALL_SYMBOL_KINDS, normalizeSymbol } from '../domain/queries.js'; import { debug, info } from '../infrastructure/logger.js'; -import { loadNative } from '../infrastructure/native.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; import type { BetterSqlite3Database, NodeRow, TreeSitterNode } from '../types.js'; @@ -245,109 +244,6 @@ export async function buildDataflowEdges( _engineOpts?: unknown, ): Promise { const extToLang = buildExtToLangMap(); - - // ── Native bulk-insert fast path ────────────────────────────────────── - const native = loadNative(); - if (native?.bulkInsertDataflow) { - let needsJsFallback = false; - const batches: Array<{ - file: string; - edges: Array<{ - sourceName: string; - targetName: string; - kind: string; - paramIndex?: number | null; - expression?: string | null; - line?: number | null; - confidence: number; - }>; - }> = []; - - for (const [relPath, symbols] of fileSymbols) { - const ext = path.extname(relPath).toLowerCase(); - if (!DATAFLOW_EXTENSIONS.has(ext)) continue; - - // If we have pre-computed dataflow (from native extraction or unified walk), - // collect the edges directly - const data = symbols.dataflow; - if (!data) { - // Need WASM fallback for this file - if (!symbols._tree) { - needsJsFallback = true; - break; - } - // Has _tree but no dataflow — will be handled by visitor in engine, - // but if we got here the engine already ran. Skip this file. - continue; - } - - const fileEdges: (typeof batches)[0]['edges'] = []; - - for (const flow of data.argFlows as ArgFlow[]) { - if (flow.callerFunc && flow.calleeName) { - fileEdges.push({ - sourceName: flow.callerFunc, - targetName: flow.calleeName, - kind: 'flows_to', - paramIndex: flow.argIndex, - expression: flow.expression, - line: flow.line, - confidence: flow.confidence, - }); - } - } - - for (const assignment of data.assignments as Assignment[]) { - if (assignment.sourceCallName && assignment.callerFunc) { - fileEdges.push({ - sourceName: assignment.sourceCallName, - targetName: assignment.callerFunc, - kind: 'returns', - paramIndex: null, - expression: assignment.expression, - line: assignment.line, - confidence: 1.0, - }); - } - } - - for (const mut of data.mutations as Mutation[]) { - if (mut.funcName && mut.binding?.type === 'param') { - fileEdges.push({ - sourceName: mut.funcName, - targetName: mut.funcName, - kind: 'mutates', - paramIndex: null, - expression: mut.mutatingExpr, - line: mut.line, - confidence: 1.0, - }); - } - } - - if (fileEdges.length > 0) { - batches.push({ file: relPath, edges: fileEdges }); - } - } - - if (!needsJsFallback) { - const inserted = native.bulkInsertDataflow(db.name, batches); - const expectedEdges = batches.reduce((s, b) => s + b.edges.length, 0); - if (inserted === expectedEdges || expectedEdges === 0) { - if (inserted > 0) { - info(`Dataflow: ${inserted} edges inserted (native bulk)`); - } - return; - } - debug( - `Dataflow: bulk insert expected ${expectedEdges} edges, got ${inserted} — falling back to JS`, - ); - // fall through to JS path - } - // fall through to JS path - } - - // ── JS fallback path ────────────────────────────────────────────────── const { parsers, getParserFn } = await initDataflowParsers(fileSymbols); const insert = db.prepare( diff --git a/src/types.ts b/src/types.ts index bdbc29a9..f593ae37 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1815,41 +1815,29 @@ export interface NativeAddon { }>; }>, ): number; - bulkInsertCfg( + bulkInsertNodes( dbPath: string, batches: Array<{ - name: string; file: string; - line: number; - blocks: Array<{ - index: number; - type: string; - startLine?: number | null; - endLine?: number | null; - label?: string | null; - }>; - edges: Array<{ - sourceIndex: number; - targetIndex: number; - kind: string; - }>; - }>, - ): number; - bulkInsertDataflow( - dbPath: string, - batches: Array<{ - file: string; - edges: Array<{ - sourceName: string; - targetName: string; + definitions: Array<{ + name: string; kind: string; - paramIndex?: number | null; - expression?: string | null; - line?: number | null; - confidence: number; + line: number; + endLine?: number | null; + visibility?: string | null; + children: Array<{ + name: string; + kind: string; + line: number; + endLine?: number | null; + visibility?: string | null; + }>; }>; + exports: Array<{ name: string; kind: string; line: number }>; }>, - ): number; + fileHashes: Array<{ file: string; hash: string; mtime: number; size: number }>, + removedFiles: string[], + ): boolean; engineVersion(): string; ParseTreeCache: new () => NativeParseTreeCache; }