Skip to content
127 changes: 45 additions & 82 deletions docs/roadmap/ROADMAP.md

Large diffs are not rendered by default.

43 changes: 33 additions & 10 deletions src/ast-analysis/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,20 +114,31 @@ async function ensureWasmTreesIfNeeded(
const ext = path.extname(relPath).toLowerCase();
const defs = symbols.definitions || [];

// Only consider definitions with a real function body.
// Interface/type property signatures are extracted as methods but correctly
// lack complexity/CFG data from the native engine. Exclude them by:
// 1. Single-line span (endLine === line) — type property on one line
// 2. Dotted names (e.g. "Interface.prop") — child definitions of types
const hasFuncBody = (d: {
name: string;
kind: string;
line: number;
endLine?: number | null;
}) =>
(d.kind === 'function' || d.kind === 'method') &&
d.line > 0 &&
d.endLine != null &&
d.endLine > d.line &&
!d.name.includes('.');

const needsComplexity =
doComplexity &&
COMPLEXITY_EXTENSIONS.has(ext) &&
defs.some((d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity);
defs.some((d) => hasFuncBody(d) && !d.complexity);
const needsCfg =
doCfg &&
CFG_EXTENSIONS.has(ext) &&
defs.some(
(d) =>
(d.kind === 'function' || d.kind === 'method') &&
d.line &&
d.cfg !== null &&
!Array.isArray(d.cfg?.blocks),
);
defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks));
const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext);

if (needsComplexity || needsCfg || needsDataflow) {
Expand Down Expand Up @@ -186,8 +197,17 @@ function setupVisitors(
const cRules = COMPLEXITY_RULES.get(langId);
const hRules = HALSTEAD_RULES.get(langId);
if (doComplexity && cRules) {
// Only trigger WASM complexity for definitions with real function bodies.
// Interface/type property signatures (dotted names, single-line span)
// correctly lack native complexity data and should not trigger a fallback.
const needsWasmComplexity = defs.some(
(d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity,
(d) =>
(d.kind === 'function' || d.kind === 'method') &&
d.line > 0 &&
d.endLine != null &&
d.endLine > d.line &&
!d.name.includes('.') &&
!d.complexity,
);
if (needsWasmComplexity) {
complexityVisitor = createComplexityVisitor(cRules, hRules, { fileLevelWalk: true, langId });
Expand All @@ -213,7 +233,10 @@ function setupVisitors(
const needsWasmCfg = defs.some(
(d) =>
(d.kind === 'function' || d.kind === 'method') &&
d.line &&
d.line > 0 &&
d.endLine != null &&
d.endLine > d.line &&
!d.name.includes('.') &&
d.cfg !== null &&
!Array.isArray(d.cfg?.blocks),
);
Expand Down
72 changes: 57 additions & 15 deletions src/domain/graph/builder/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -208,23 +208,64 @@ export function purgeFilesFromGraph(
}

/** Batch INSERT chunk size for multi-value INSERTs. */
const BATCH_CHUNK = 200;
const BATCH_CHUNK = 500;

// Statement caches keyed by chunk size — avoids recompiling for every batch.
const nodeStmtCache = new WeakMap<BetterSqlite3.Database, Map<number, BetterSqlite3.Statement>>();
const edgeStmtCache = new WeakMap<BetterSqlite3.Database, Map<number, BetterSqlite3.Statement>>();

function getNodeStmt(db: BetterSqlite3.Database, chunkSize: number): BetterSqlite3.Statement {
let cache = nodeStmtCache.get(db);
if (!cache) {
cache = new Map();
nodeStmtCache.set(db, cache);
}
let stmt = cache.get(chunkSize);
if (!stmt) {
const ph = '(?,?,?,?,?,?,?,?,?)';
stmt = db.prepare(
'INSERT OR IGNORE INTO nodes (name,kind,file,line,end_line,parent_id,qualified_name,scope,visibility) VALUES ' +
Array.from({ length: chunkSize }, () => ph).join(','),
);
cache.set(chunkSize, stmt);
}
return stmt;
}

function getEdgeStmt(db: BetterSqlite3.Database, chunkSize: number): BetterSqlite3.Statement {
let cache = edgeStmtCache.get(db);
if (!cache) {
cache = new Map();
edgeStmtCache.set(db, cache);
}
let stmt = cache.get(chunkSize);
if (!stmt) {
const ph = '(?,?,?,?,?)';
stmt = db.prepare(
'INSERT INTO edges (source_id,target_id,kind,confidence,dynamic) VALUES ' +
Array.from({ length: chunkSize }, () => ph).join(','),
);
cache.set(chunkSize, stmt);
}
return stmt;
}

/**
* Batch-insert node rows via multi-value INSERT statements.
* Each row: [name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility]
*/
export function batchInsertNodes(db: BetterSqlite3.Database, rows: unknown[][]): void {
if (!rows.length) return;
const ph = '(?,?,?,?,?,?,?,?,?)';
for (let i = 0; i < rows.length; i += BATCH_CHUNK) {
const chunk = rows.slice(i, i + BATCH_CHUNK);
const end = Math.min(i + BATCH_CHUNK, rows.length);
const chunkSize = end - i;
const stmt = getNodeStmt(db, chunkSize);
const vals: unknown[] = [];
for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]);
db.prepare(
'INSERT OR IGNORE INTO nodes (name,kind,file,line,end_line,parent_id,qualified_name,scope,visibility) VALUES ' +
chunk.map(() => ph).join(','),
).run(...vals);
for (let j = i; j < end; j++) {
const r = rows[j] as unknown[];
vals.push(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]);
}
stmt.run(...vals);
}
}

Expand All @@ -234,14 +275,15 @@ export function batchInsertNodes(db: BetterSqlite3.Database, rows: unknown[][]):
*/
export function batchInsertEdges(db: BetterSqlite3.Database, rows: unknown[][]): void {
if (!rows.length) return;
const ph = '(?,?,?,?,?)';
for (let i = 0; i < rows.length; i += BATCH_CHUNK) {
const chunk = rows.slice(i, i + BATCH_CHUNK);
const end = Math.min(i + BATCH_CHUNK, rows.length);
const chunkSize = end - i;
const stmt = getEdgeStmt(db, chunkSize);
const vals: unknown[] = [];
for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4]);
db.prepare(
'INSERT INTO edges (source_id,target_id,kind,confidence,dynamic) VALUES ' +
chunk.map(() => ph).join(','),
).run(...vals);
for (let j = i; j < end; j++) {
const r = rows[j] as unknown[];
vals.push(r[0], r[1], r[2], r[3], r[4]);
}
stmt.run(...vals);
}
}
88 changes: 55 additions & 33 deletions src/domain/graph/builder/stages/insert-nodes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import path from 'node:path';
import { performance } from 'node:perf_hooks';
import type BetterSqlite3 from 'better-sqlite3';
import { bulkNodeIdsByFile } from '../../../../db/index.js';
import type { ExtractorOutput, MetadataUpdate, NodeIdRow } from '../../../../types.js';
import type { ExtractorOutput, MetadataUpdate } from '../../../../types.js';
import type { PipelineContext } from '../context.js';
import {
batchInsertEdges,
Expand All @@ -35,6 +35,7 @@ function insertDefinitionsAndExports(
allSymbols: Map<string, ExtractorOutput>,
): void {
const phase1Rows: unknown[][] = [];
const exportKeys: unknown[][] = [];
for (const [relPath, symbols] of allSymbols) {
phase1Rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
for (const def of symbols.definitions) {
Expand All @@ -54,38 +55,68 @@ function insertDefinitionsAndExports(
}
for (const exp of symbols.exports) {
phase1Rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
}
}
batchInsertNodes(db, phase1Rows);

// Mark exported symbols
const markExported = db.prepare(
'UPDATE nodes SET exported = 1 WHERE name = ? AND kind = ? AND file = ? AND line = ?',
);
for (const [relPath, symbols] of allSymbols) {
for (const exp of symbols.exports) {
markExported.run(exp.name, exp.kind, relPath, exp.line);
// Mark exported symbols in batches (cache prepared statements by chunk size)
if (exportKeys.length > 0) {
const EXPORT_CHUNK = 500;
const exportStmtCache = new Map<number, BetterSqlite3.Statement>();
for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
const chunkSize = end - i;
let updateStmt = exportStmtCache.get(chunkSize);
if (!updateStmt) {
const conditions = Array.from(
{ length: chunkSize },
() => '(name = ? AND kind = ? AND file = ? AND line = ?)',
).join(' OR ');
updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
exportStmtCache.set(chunkSize, updateStmt);
}
const vals: unknown[] = [];
for (let j = i; j < end; j++) {
const k = exportKeys[j] as unknown[];
vals.push(k[0], k[1], k[2], k[3]);
}
updateStmt.run(...vals);
}
}
}

// ── Phase 2: Insert children (needs parent IDs) ────────────────────────
// ── Phase 2+3: Insert children and containment edges (two nodeIdMap passes) ──

function insertChildren(
function insertChildrenAndEdges(
db: BetterSqlite3.Database,
allSymbols: Map<string, ExtractorOutput>,
): void {
const childRows: unknown[][] = [];
const edgeRows: unknown[][] = [];

for (const [relPath, symbols] of allSymbols) {
// First pass: collect file→def edges and child rows
const nodeIdMap = new Map<string, number>();
for (const row of bulkNodeIdsByFile(db, relPath)) {
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
}

const fileId = nodeIdMap.get(`${relPath}|file|0`);

for (const def of symbols.definitions) {
if (!def.children?.length) continue;
const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);

// Containment edge: file -> definition
if (fileId && defId) {
edgeRows.push([fileId, defId, 'contains', 1.0, 0]);
}

if (!def.children?.length) continue;
if (!defId) continue;

for (const child of def.children) {
// Child node
const qualifiedName = `${def.name}.${child.name}`;
childRows.push([
child.name,
Expand All @@ -101,40 +132,32 @@ function insertChildren(
}
}
}
batchInsertNodes(db, childRows);
}

// ── Phase 3: Insert containment + parameter_of edges ────────────────────
// Insert children first (so they exist for edge lookup)
batchInsertNodes(db, childRows);

function insertContainmentEdges(
db: BetterSqlite3.Database,
allSymbols: Map<string, ExtractorOutput>,
): void {
const edgeRows: unknown[][] = [];
// Now re-fetch IDs to include newly-inserted children, then add child edges
for (const [relPath, symbols] of allSymbols) {
const nodeIdMap = new Map<string, number>();
for (const row of bulkNodeIdsByFile(db, relPath)) {
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
}
const fileId = nodeIdMap.get(`${relPath}|file|0`);
for (const def of symbols.definitions) {
if (!def.children?.length) continue;
const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
if (fileId && defId) {
edgeRows.push([fileId, defId, 'contains', 1.0, 0]);
}
if (def.children?.length && defId) {
for (const child of def.children) {
const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`);
if (childId) {
edgeRows.push([defId, childId, 'contains', 1.0, 0]);
if (child.kind === 'parameter') {
edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]);
}
if (!defId) continue;
for (const child of def.children) {
const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`);
if (childId) {
edgeRows.push([defId, childId, 'contains', 1.0, 0]);
if (child.kind === 'parameter') {
edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]);
}
}
}
}
}

batchInsertEdges(db, edgeRows);
}

Expand Down Expand Up @@ -217,8 +240,7 @@ export async function insertNodes(ctx: PipelineContext): Promise<void> {

const insertAll = db.transaction(() => {
insertDefinitionsAndExports(db, allSymbols);
insertChildren(db, allSymbols);
insertContainmentEdges(db, allSymbols);
insertChildrenAndEdges(db, allSymbols);
updateFileHashes(db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash);
});

Expand Down
13 changes: 10 additions & 3 deletions src/features/cfg.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,14 @@ interface FileSymbols {
*/
function hasNativeCfgForFile(symbols: FileSymbols): boolean {
return symbols.definitions
.filter((d) => (d.kind === 'function' || d.kind === 'method') && d.line)
.filter(
(d) =>
(d.kind === 'function' || d.kind === 'method') &&
d.line > 0 &&
d.endLine != null &&
d.endLine > d.line &&
!d.name.includes('.'),
)
.every((d) => d.cfg === null || (d.cfg?.blocks?.length ?? 0) > 0);
}

Expand Down Expand Up @@ -206,15 +213,15 @@ function buildVisitorCfgMap(
return nameNode ? nameNode.text : null;
},
};
const walkResults = walkWithVisitors(tree!.rootNode, [visitor], langId, walkerOpts);
const walkResults = walkWithVisitors(tree?.rootNode, [visitor], langId, walkerOpts);
// biome-ignore lint/complexity/useLiteralKeys: noPropertyAccessFromIndexSignature requires bracket notation
const cfgResults = (walkResults['cfg'] || []) as VisitorCfgResult[];
const visitorCfgByLine = new Map<number, VisitorCfgResult[]>();
for (const r of cfgResults) {
if (r.funcNode) {
const line = r.funcNode.startPosition.row + 1;
if (!visitorCfgByLine.has(line)) visitorCfgByLine.set(line, []);
visitorCfgByLine.get(line)!.push(r);
visitorCfgByLine.get(line)?.push(r);
}
}
return visitorCfgByLine;
Expand Down
Loading
Loading