diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index d968ad1c..e7cd155d 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -24,6 +24,10 @@ tree-sitter-ruby = "0.23" tree-sitter-php = "0.23" tree-sitter-hcl = "1" rayon = "1" +# `bundled` embeds a second SQLite copy (better-sqlite3 already bundles one). +# This is intentional: Windows CI lacks a system SQLite, and WAL coordination +# between the two instances is handled safely at the OS level. +rusqlite = { version = "0.32", features = ["bundled"] } send_wrapper = "0.6" [build-dependencies] diff --git a/crates/codegraph-core/src/ast_db.rs b/crates/codegraph-core/src/ast_db.rs new file mode 100644 index 00000000..4f317db1 --- /dev/null +++ b/crates/codegraph-core/src/ast_db.rs @@ -0,0 +1,165 @@ +//! Bulk AST node insertion via rusqlite. +//! +//! Bypasses the JS iteration loop by opening the SQLite database directly +//! from Rust and inserting all AST nodes in a single transaction. +//! Parent node IDs are resolved by querying the `nodes` table. + +use std::collections::HashMap; + +use napi_derive::napi; +use rusqlite::{params, Connection, OpenFlags}; +use serde::{Deserialize, Serialize}; + +/// A single AST node to insert (received from JS). +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AstInsertNode { + pub line: u32, + pub kind: String, + pub name: String, + pub text: Option, + pub receiver: Option, +} + +/// A batch of AST nodes for a single file. +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileAstBatch { + pub file: String, + pub nodes: Vec, +} + +/// A definition row from the `nodes` table used for parent resolution. +struct NodeDef { + id: i64, + line: u32, + end_line: Option, +} + +/// Find the narrowest enclosing definition for a given source line. +/// Returns the node ID of the best match, or None if no definition encloses this line. +/// +/// Mirrors the JS `findParentDef` semantics: a definition with `end_line = NULL` +/// is treated as always enclosing, with a negative sentinel span so it is preferred +/// over definitions that have an explicit (wider) `end_line`. +fn find_parent_id(defs: &[NodeDef], line: u32) -> Option { + let mut best_id: Option = None; + let mut best_span: i64 = i64::MAX; + for d in defs { + if d.line <= line { + let span: i64 = match d.end_line { + Some(el) if el >= line => (el - d.line) as i64, + Some(_) => continue, + // JS: (def.endLine ?? 0) - def.line → negative, always preferred + None => -(d.line as i64), + }; + if span < best_span { + best_id = Some(d.id); + best_span = span; + } + } + } + best_id +} + +/// Bulk-insert AST nodes into the database, resolving `parent_node_id` +/// from the `nodes` table. Runs all inserts in a single SQLite transaction. +/// +/// Returns the number of rows inserted. Returns 0 on any error (DB open +/// failure, missing table, transaction failure). +#[napi] +pub fn bulk_insert_ast_nodes(db_path: String, batches: Vec) -> u32 { + if batches.is_empty() { + return 0; + } + + let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; + let mut conn = match Connection::open_with_flags(&db_path, flags) { + Ok(c) => c, + Err(_) => return 0, + }; + + // Match the JS-side performance pragmas (including busy_timeout for WAL contention) + let _ = conn.execute_batch( + "PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000", + ); + + // Bail out if the ast_nodes table doesn't exist (schema too old) + let has_table: bool = conn + .prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name='ast_nodes'") + .and_then(|mut s| s.query_row([], |_| Ok(true))) + .unwrap_or(false); + if !has_table { + return 0; + } + + // ── Phase 1: Pre-fetch node definitions for parent resolution ──────── + let mut file_defs: HashMap> = HashMap::new(); + { + let Ok(mut stmt) = + conn.prepare("SELECT id, line, end_line FROM nodes WHERE file = ?1") + else { + return 0; + }; + + for batch in &batches { + if batch.nodes.is_empty() || file_defs.contains_key(&batch.file) { + continue; + } + let defs: Vec = stmt + .query_map(params![&batch.file], |row| { + Ok(NodeDef { + id: row.get(0)?, + line: row.get(1)?, + end_line: row.get(2)?, + }) + }) + .map(|rows| rows.filter_map(|r| r.ok()).collect()) + .unwrap_or_default(); + file_defs.insert(batch.file.clone(), defs); + } + } // `stmt` dropped — releases the immutable borrow on `conn` + + // ── Phase 2: Bulk insert in a single transaction ───────────────────── + let Ok(tx) = conn.transaction() else { + return 0; + }; + + let mut total = 0u32; + { + let Ok(mut insert_stmt) = tx.prepare( + "INSERT INTO ast_nodes (file, line, kind, name, text, receiver, parent_node_id) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", + ) else { + return 0; + }; + + for batch in &batches { + let empty = Vec::new(); + let defs = file_defs.get(&batch.file).unwrap_or(&empty); + + for node in &batch.nodes { + let parent_id = find_parent_id(defs, node.line); + + match insert_stmt.execute(params![ + &batch.file, + node.line, + &node.kind, + &node.name, + &node.text, + &node.receiver, + parent_id, + ]) { + Ok(_) => total += 1, + Err(_) => return 0, // abort; tx rolls back on drop + } + } + } + } // `insert_stmt` dropped + + if tx.commit().is_err() { + return 0; + } + + total +} diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index 6d3aa6d0..391f0854 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -1,3 +1,4 @@ +pub mod ast_db; pub mod cfg; pub mod complexity; pub mod constants; diff --git a/src/features/ast.ts b/src/features/ast.ts index 55307fa0..6edd428f 100644 --- a/src/features/ast.ts +++ b/src/features/ast.ts @@ -6,6 +6,7 @@ import { createAstStoreVisitor } from '../ast-analysis/visitors/ast-store-visito import { bulkNodeIdsByFile, openReadonlyOrFail } from '../db/index.js'; import { buildFileConditionSQL } from '../db/query-builder.js'; import { debug } from '../infrastructure/logger.js'; +import { loadNative } from '../infrastructure/native.js'; import { outputResult } from '../infrastructure/result-formatter.js'; import { paginateResult } from '../shared/paginate.js'; import type { ASTNodeKind, BetterSqlite3Database, Definition, TreeSitterNode } from '../types.js'; @@ -67,6 +68,54 @@ export async function buildAstNodes( _rootDir: string, _engineOpts?: unknown, ): Promise { + // ── Native bulk-insert fast path ────────────────────────────────────── + const native = loadNative(); + if (native?.bulkInsertAstNodes) { + let needsJsFallback = false; + const batches: Array<{ + file: string; + nodes: Array<{ + line: number; + kind: string; + name: string; + text?: string | null; + receiver?: string | null; + }>; + }> = []; + + for (const [relPath, symbols] of fileSymbols) { + if (Array.isArray(symbols.astNodes)) { + batches.push({ + file: relPath, + nodes: symbols.astNodes.map((n) => ({ + line: n.line, + kind: n.kind, + name: n.name, + text: n.text, + receiver: n.receiver, + })), + }); + } else if (symbols.calls || symbols._tree) { + needsJsFallback = true; + break; + } + } + + if (!needsJsFallback) { + const expectedNodes = batches.reduce((s, b) => s + b.nodes.length, 0); + const inserted = native.bulkInsertAstNodes(db.name, batches); + if (inserted === expectedNodes) { + debug(`AST extraction (native bulk): ${inserted} nodes stored`); + return; + } + debug( + `AST extraction (native bulk): expected ${expectedNodes}, got ${inserted} — falling back to JS`, + ); + // fall through to JS path + } + } + + // ── JS fallback path ────────────────────────────────────────────────── let insertStmt: ReturnType; try { insertStmt = db.prepare( diff --git a/src/types.ts b/src/types.ts index 7dc1236b..41058cce 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1802,6 +1802,19 @@ export interface NativeAddon { computeConfidence(callerFile: string, targetFile: string, importedFrom: string | null): number; detectCycles(edges: Array<{ source: string; target: string }>): string[][]; buildCallEdges(files: unknown[], nodes: unknown[], builtinReceivers: string[]): unknown[]; + bulkInsertAstNodes( + dbPath: string, + batches: Array<{ + file: string; + nodes: Array<{ + line: number; + kind: string; + name: string; + text?: string | null; + receiver?: string | null; + }>; + }>, + ): number; engineVersion(): string; ParseTreeCache: new () => NativeParseTreeCache; }