Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ Only **3 runtime dependencies** — everything else is optional or a devDependen

| Dependency | What it does | | |
|---|---|---|---|
| [better-sqlite3](https://github.com/WiseLibs/better-sqlite3) | Fast, synchronous SQLite driver | ![GitHub stars](https://img.shields.io/github/stars/WiseLibs/better-sqlite3?style=flat-square&label=%E2%AD%90) | ![npm downloads](https://img.shields.io/npm/dw/better-sqlite3?style=flat-square&label=%F0%9F%93%A5%2Fwk) |
| [better-sqlite3](https://github.com/WiseLibs/better-sqlite3) | SQLite driver (WASM engine; lazy-loaded, not used for native-engine reads) | ![GitHub stars](https://img.shields.io/github/stars/WiseLibs/better-sqlite3?style=flat-square&label=%E2%AD%90) | ![npm downloads](https://img.shields.io/npm/dw/better-sqlite3?style=flat-square&label=%F0%9F%93%A5%2Fwk) |
| [commander](https://github.com/tj/commander.js) | CLI argument parsing | ![GitHub stars](https://img.shields.io/github/stars/tj/commander.js?style=flat-square&label=%E2%AD%90) | ![npm downloads](https://img.shields.io/npm/dw/commander?style=flat-square&label=%F0%9F%93%A5%2Fwk) |
| [web-tree-sitter](https://github.com/tree-sitter/tree-sitter) | WASM tree-sitter bindings | ![GitHub stars](https://img.shields.io/github/stars/tree-sitter/tree-sitter?style=flat-square&label=%E2%AD%90) | ![npm downloads](https://img.shields.io/npm/dw/web-tree-sitter?style=flat-square&label=%F0%9F%93%A5%2Fwk) |

Expand Down
27 changes: 4 additions & 23 deletions crates/codegraph-core/src/ast_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
use std::collections::HashMap;

use napi_derive::napi;
use rusqlite::{params, Connection, OpenFlags};
use rusqlite::{params, Connection};
use serde::{Deserialize, Serialize};

/// A single AST node to insert (received from JS).
Expand Down Expand Up @@ -62,28 +62,9 @@ fn find_parent_id(defs: &[NodeDef], line: u32) -> Option<i64> {
best_id
}

/// Bulk-insert AST nodes into the database, resolving `parent_node_id`
/// from the `nodes` table. Runs all inserts in a single SQLite transaction.
///
/// Returns the number of rows inserted. Returns 0 on any error (DB open
/// failure, missing table, transaction failure).
#[napi]
pub fn bulk_insert_ast_nodes(db_path: String, batches: Vec<FileAstBatch>) -> u32 {
if batches.is_empty() {
return 0;
}

let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX;
let conn = match Connection::open_with_flags(&db_path, flags) {
Ok(c) => c,
Err(_) => return 0,
};

// Match the JS-side performance pragmas (including busy_timeout for WAL contention)
let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000");

do_insert_ast_nodes(&conn, &batches).unwrap_or(0)
}
// NOTE: The standalone `bulk_insert_ast_nodes` napi export was removed in Phase 6.17.
// All callers now use `NativeDatabase::bulk_insert_ast_nodes()` which reuses the
// persistent connection, eliminating the double-connection antipattern.

/// Internal implementation: insert AST nodes using an existing connection.
/// Used by both the standalone `bulk_insert_ast_nodes` function and `NativeDatabase`.
Expand Down
24 changes: 4 additions & 20 deletions crates/codegraph-core/src/edges_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//! implements edges directly to SQLite without marshaling back to JS.

use napi_derive::napi;
use rusqlite::{Connection, OpenFlags};
use rusqlite::Connection;

/// A single edge row to insert: [source_id, target_id, kind, confidence, dynamic].
#[napi(object)]
Expand All @@ -20,25 +20,9 @@ pub struct EdgeRow {
pub dynamic: u32,
}

/// Bulk-insert edge rows into the database via rusqlite.
/// Runs all writes in a single SQLite transaction with chunked multi-value
/// INSERT statements for maximum throughput.
///
/// Returns `true` on success, `false` on any error so the JS caller can
/// fall back to the JS batch insert path.
#[napi]
pub fn bulk_insert_edges(db_path: String, edges: Vec<EdgeRow>) -> bool {
if edges.is_empty() {
return true;
}
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX;
let conn = match Connection::open_with_flags(&db_path, flags) {
Ok(c) => c,
Err(_) => return false,
};
let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000");
do_insert_edges(&conn, &edges).is_ok()
}
// NOTE: The standalone `bulk_insert_edges` napi export was removed in Phase 6.17.
// All callers now use `NativeDatabase::bulk_insert_edges()` which reuses the
// persistent connection, eliminating the double-connection antipattern.

/// 199 rows × 5 params = 995 bind parameters per statement, safely under
/// the legacy `SQLITE_MAX_VARIABLE_NUMBER` default of 999.
Expand Down
27 changes: 4 additions & 23 deletions crates/codegraph-core/src/insert_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
use std::collections::HashMap;

use napi_derive::napi;
use rusqlite::{params, Connection, OpenFlags};
use rusqlite::{params, Connection};
use serde::{Deserialize, Serialize};

// ── Input types (received from JS via napi) ─────────────────────────
Expand Down Expand Up @@ -63,28 +63,9 @@ pub struct FileHashEntry {

// ── Public napi entry point ─────────────────────────────────────────

/// Bulk-insert nodes, children, containment edges, exports, and file hashes
/// into the database. Runs all writes in a single SQLite transaction.
///
/// Returns `true` on success, `false` on any error (DB open failure,
/// missing table, transaction failure) so the JS caller can fall back.
#[napi]
pub fn bulk_insert_nodes(
db_path: String,
batches: Vec<InsertNodesBatch>,
file_hashes: Vec<FileHashEntry>,
removed_files: Vec<String>,
) -> bool {
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX;
let conn = match Connection::open_with_flags(&db_path, flags) {
Ok(c) => c,
Err(_) => return false,
};

let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000");

do_insert_nodes(&conn, &batches, &file_hashes, &removed_files).is_ok()
}
// NOTE: The standalone `bulk_insert_nodes` napi export was removed in Phase 6.17.
// All callers now use `NativeDatabase::bulk_insert_nodes()` which reuses the
// persistent connection, eliminating the double-connection antipattern.

// ── Internal implementation ─────────────────────────────────────────

Expand Down
18 changes: 15 additions & 3 deletions crates/codegraph-core/src/native_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,8 +305,15 @@ impl NativeDatabase {
| OpenFlags::SQLITE_OPEN_NO_MUTEX;
let conn = Connection::open_with_flags(&db_path, flags)
.map_err(|e| napi::Error::from_reason(format!("Failed to open DB: {e}")))?;
// 64 entries comfortably holds the 40+ prepare_cached() queries in read_queries.rs
// plus build-path queries, avoiding LRU eviction (default is 16).
conn.set_prepared_statement_cache_capacity(64);
conn.execute_batch(
"PRAGMA journal_mode = WAL; PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000;",
"PRAGMA journal_mode = WAL; \
PRAGMA synchronous = NORMAL; \
PRAGMA busy_timeout = 5000; \
PRAGMA mmap_size = 268435456; \
PRAGMA temp_store = MEMORY;",
)
.map_err(|e| napi::Error::from_reason(format!("Failed to set pragmas: {e}")))?;
Ok(Self {
Expand All @@ -321,8 +328,13 @@ impl NativeDatabase {
let flags = OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_NO_MUTEX;
let conn = Connection::open_with_flags(&db_path, flags)
.map_err(|e| napi::Error::from_reason(format!("Failed to open DB readonly: {e}")))?;
conn.execute_batch("PRAGMA busy_timeout = 5000;")
.map_err(|e| napi::Error::from_reason(format!("Failed to set pragmas: {e}")))?;
conn.set_prepared_statement_cache_capacity(64);
conn.execute_batch(
"PRAGMA busy_timeout = 5000; \
PRAGMA mmap_size = 268435456; \
PRAGMA temp_store = MEMORY;",
)
.map_err(|e| napi::Error::from_reason(format!("Failed to set pragmas: {e}")))?;
Ok(Self {
conn: SendWrapper::new(Some(conn)),
db_path,
Expand Down
30 changes: 5 additions & 25 deletions crates/codegraph-core/src/roles_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use std::collections::HashMap;

use napi_derive::napi;
use rusqlite::{Connection, OpenFlags};
use rusqlite::Connection;

// ── Constants ────────────────────────────────────────────────────────

Expand Down Expand Up @@ -68,30 +68,10 @@ pub struct RoleSummary {

// ── Public napi entry points ─────────────────────────────────────────

/// Full role classification: queries all nodes, computes fan-in/fan-out,
/// classifies roles, and batch-updates the `role` column.
/// Returns a summary of role counts, or null on failure.
#[napi]
pub fn classify_roles_full(db_path: String) -> Option<RoleSummary> {
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX;
let conn = Connection::open_with_flags(&db_path, flags).ok()?;
let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000");
do_classify_full(&conn).ok()
}

/// Incremental role classification: only reclassifies nodes from changed files
/// plus their immediate edge neighbours.
/// Returns a summary of role counts for the affected nodes, or null on failure.
#[napi]
pub fn classify_roles_incremental(
db_path: String,
changed_files: Vec<String>,
) -> Option<RoleSummary> {
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX;
let conn = Connection::open_with_flags(&db_path, flags).ok()?;
let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000");
do_classify_incremental(&conn, &changed_files).ok()
}
// NOTE: The standalone `classify_roles_full` and `classify_roles_incremental`
// napi exports were removed in Phase 6.17. All callers now use the corresponding
// NativeDatabase methods which reuse the persistent connection, eliminating the
// double-connection antipattern.

// ── Shared helpers ───────────────────────────────────────────────────

Expand Down
20 changes: 20 additions & 0 deletions src/db/better-sqlite3.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* Lazy-loaded better-sqlite3 constructor.
*
* Centralises the `createRequire` + cache pattern so every call site that
* needs a JS-side SQLite handle can `import { getDatabase } from '…/db/better-sqlite3.js'`
* instead of duplicating the boilerplate. The native engine path (NativeDatabase /
* rusqlite) never touches this module.
*/
import { createRequire } from 'node:module';

const _require = createRequire(import.meta.url);
let _Database: any;

/** Return the `better-sqlite3` Database constructor, loading it on first call. */
export function getDatabase(): new (...args: any[]) => any {
if (!_Database) {
_Database = _require('better-sqlite3');
}
return _Database;
}
4 changes: 3 additions & 1 deletion src/db/connection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ import { execFileSync } from 'node:child_process';
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import Database from 'better-sqlite3';
import { debug, warn } from '../infrastructure/logger.js';
import { getNative, isNativeAvailable } from '../infrastructure/native.js';
import { DbError } from '../shared/errors.js';
import type { BetterSqlite3Database, NativeDatabase } from '../types.js';
import { getDatabase } from './better-sqlite3.js';
import { Repository } from './repository/base.js';
import { NativeRepository } from './repository/native-repository.js';
import { SqliteRepository } from './repository/sqlite-repository.js';
Expand Down Expand Up @@ -150,6 +150,7 @@ export function openDb(dbPath: string): LockedDatabase {
const dir = path.dirname(dbPath);
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
acquireAdvisoryLock(dbPath);
const Database = getDatabase();
const db = new Database(dbPath) as unknown as LockedDatabase;
db.pragma('journal_mode = WAL');
db.pragma('busy_timeout = 5000');
Expand Down Expand Up @@ -295,6 +296,7 @@ export function openReadonlyOrFail(customPath?: string): BetterSqlite3Database {
{ file: dbPath },
);
}
const Database = getDatabase();
const db = new Database(dbPath, { readonly: true }) as unknown as BetterSqlite3Database;

// Warn once per process if the DB was built with a different codegraph version
Expand Down
15 changes: 5 additions & 10 deletions src/domain/graph/builder/stages/build-edges.ts
Original file line number Diff line number Diff line change
Expand Up @@ -673,30 +673,25 @@ export async function buildEdges(ctx: PipelineContext): Promise<void> {

// When using native edge insert, skip JS insert here — do it after tx commits.
// Otherwise insert edges within this transaction for atomicity.
const useNativeEdgeInsert = !!(ctx.nativeDb?.bulkInsertEdges || native?.bulkInsertEdges);
const useNativeEdgeInsert = !!ctx.nativeDb?.bulkInsertEdges;
if (!useNativeEdgeInsert) {
batchInsertEdges(db, allEdgeRows);
}
});
computeEdgesTx();

// Phase 2: Native rusqlite bulk insert (outside better-sqlite3 transaction
// to avoid SQLITE_BUSY contention). Prefer NativeDatabase persistent
// connection (6.15), fall back to standalone function (6.12).
if ((ctx.nativeDb?.bulkInsertEdges || native?.bulkInsertEdges) && allEdgeRows.length > 0) {
// to avoid SQLITE_BUSY contention). Uses NativeDatabase persistent connection.
// Standalone napi functions were removed in 6.17.
if (ctx.nativeDb?.bulkInsertEdges && allEdgeRows.length > 0) {
const nativeEdges = allEdgeRows.map((r) => ({
sourceId: r[0],
targetId: r[1],
kind: r[2],
confidence: r[3],
dynamic: r[4],
}));
let ok: boolean;
if (ctx.nativeDb?.bulkInsertEdges) {
ok = ctx.nativeDb.bulkInsertEdges(nativeEdges);
} else {
ok = native!.bulkInsertEdges(db.name, nativeEdges);
}
const ok = ctx.nativeDb.bulkInsertEdges(nativeEdges);
if (!ok) {
debug('Native bulkInsertEdges failed — falling back to JS batchInsertEdges');
batchInsertEdges(db, allEdgeRows);
Expand Down
28 changes: 2 additions & 26 deletions src/domain/graph/builder/stages/build-structure.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import path from 'node:path';
import { performance } from 'node:perf_hooks';
import { debug } from '#infrastructure/logger.js';
import { loadNative } from '#infrastructure/native.js';
import { normalizePath } from '#shared/constants.js';
import type { ExtractorOutput } from '#types';
import type { PipelineContext } from '../context.js';
Expand Down Expand Up @@ -95,7 +94,8 @@ export async function buildStructure(ctx: PipelineContext): Promise<void> {
try {
let roleSummary: Record<string, number> | null = null;

// Try NativeDatabase persistent connection first (6.15), then standalone (6.12)
// Use NativeDatabase persistent connection (Phase 6.15+).
// Standalone napi functions were removed in 6.17 — falls through to JS if nativeDb unavailable.
if (ctx.nativeDb?.classifyRolesFull) {
const nativeResult =
changedFileList && changedFileList.length > 0
Expand All @@ -116,30 +116,6 @@ export async function buildStructure(ctx: PipelineContext): Promise<void> {
leaf: nativeResult.leaf,
};
}
} else if (ctx.engineName === 'native') {
const native = loadNative();
if (native?.classifyRolesFull) {
const dbPath = db.name;
const nativeResult =
changedFileList && changedFileList.length > 0
? native.classifyRolesIncremental?.(dbPath, changedFileList)
: native.classifyRolesFull(dbPath);
if (nativeResult) {
roleSummary = {
entry: nativeResult.entry,
core: nativeResult.core,
utility: nativeResult.utility,
adapter: nativeResult.adapter,
dead: nativeResult.dead,
'dead-leaf': nativeResult.deadLeaf,
'dead-entry': nativeResult.deadEntry,
'dead-ffi': nativeResult.deadFfi,
'dead-unresolved': nativeResult.deadUnresolved,
'test-only': nativeResult.testOnly,
leaf: nativeResult.leaf,
};
}
}
}

// Fall back to JS path
Expand Down
17 changes: 5 additions & 12 deletions src/domain/graph/builder/stages/insert-nodes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import path from 'node:path';
import { performance } from 'node:perf_hooks';
import { bulkNodeIdsByFile } from '../../../../db/index.js';
import { loadNative } from '../../../../infrastructure/native.js';
import type {
BetterSqlite3Database,
ExtractorOutput,
Expand Down Expand Up @@ -40,13 +39,11 @@ interface PrecomputedFileData {
// ── Native fast-path ─────────────────────────────────────────────────

function tryNativeInsert(ctx: PipelineContext): boolean {
// Prefer NativeDatabase persistent connection (6.15), fall back to standalone (6.12)
const hasNativeDb = !!ctx.nativeDb?.bulkInsertNodes;
const native = hasNativeDb ? null : loadNative();
if (!hasNativeDb && !native?.bulkInsertNodes) return false;
// Use NativeDatabase persistent connection (Phase 6.15+).
// Standalone napi functions were removed in 6.17 — falls through to JS if nativeDb unavailable.
if (!ctx.nativeDb?.bulkInsertNodes) return false;

const { dbPath, allSymbols, filesToParse, metadataUpdates, rootDir, removed } = ctx;
if (!hasNativeDb && !dbPath) return false;
const { allSymbols, filesToParse, metadataUpdates, rootDir, removed } = ctx;

// Marshal allSymbols → InsertNodesBatch[]
const batches: Array<{
Expand Down Expand Up @@ -141,11 +138,7 @@ function tryNativeInsert(ctx: PipelineContext): boolean {
fileHashes.push({ file: item.relPath, hash: item.hash, mtime, size });
}

// Route through persistent NativeDatabase when available (6.15)
if (ctx.nativeDb?.bulkInsertNodes) {
return ctx.nativeDb.bulkInsertNodes(batches, fileHashes, removed);
}
return native!.bulkInsertNodes(dbPath!, batches, fileHashes, removed);
return ctx.nativeDb.bulkInsertNodes(batches, fileHashes, removed);
}

// ── JS fallback: Phase 1 ────────────────────────────────────────────
Expand Down
Loading
Loading