Skip to content

Commit 0435c41

Browse files
authored
perf(native): fix WASM fallback bypass and batch SQL inserts (#606)
* chore: add benchmark npm script and stale embeddings warning Add `npm run benchmark` script to make benchmark execution discoverable instead of requiring manual `node --import ./scripts/ts-resolve-loader.js` invocation. Warn users when embeddings predate the last graph rebuild so they know to re-run `codegraph embed` for fresh search results. Impact: 1 functions changed, 8 affected * perf(native): fix WASM fallback bypass and batch SQL inserts Fix interface property signatures (dotted names, single-line spans) incorrectly triggering WASM tree creation on native builds across engine.ts, complexity.ts, and cfg.ts. Add statement caching and batch UPDATE optimizations for insert and role classification stages. Native full build: 2001ms vs WASM 3116ms (1.6x faster). Key wins: complexity 4.2x, cfg 3.2x, parse 2.4x faster. Impact: 26 functions changed, 25 affected * fix(structure): remove superfluous optional chaining on hotspot query stmt (#606) Impact: 1 functions changed, 0 affected * fix(builder): cache export-marking UPDATE statement per chunk size (#606) Impact: 1 functions changed, 1 affected * fix(types): resolve TS strict-mode errors in structure.ts and complexity.ts (#606) Impact: 3 functions changed, 2 affected * fix: correct misleading comments and cache role UPDATE stmts (#606) - Fix misleading "single nodeIdMap pass" comment in insertChildrenAndEdges (actually two passes: one before and one after batchInsertNodes) - Cache role UPDATE prepared statements per chunk size in classifyNodeRoles, consistent with WeakMap-based caching pattern used in helpers.ts Impact: 2 functions changed, 4 affected
1 parent 820e9ff commit 0435c41

7 files changed

Lines changed: 306 additions & 231 deletions

File tree

docs/roadmap/ROADMAP.md

Lines changed: 45 additions & 82 deletions
Large diffs are not rendered by default.

src/ast-analysis/engine.ts

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -114,20 +114,31 @@ async function ensureWasmTreesIfNeeded(
114114
const ext = path.extname(relPath).toLowerCase();
115115
const defs = symbols.definitions || [];
116116

117+
// Only consider definitions with a real function body.
118+
// Interface/type property signatures are extracted as methods but correctly
119+
// lack complexity/CFG data from the native engine. Exclude them by:
120+
// 1. Single-line span (endLine === line) — type property on one line
121+
// 2. Dotted names (e.g. "Interface.prop") — child definitions of types
122+
const hasFuncBody = (d: {
123+
name: string;
124+
kind: string;
125+
line: number;
126+
endLine?: number | null;
127+
}) =>
128+
(d.kind === 'function' || d.kind === 'method') &&
129+
d.line > 0 &&
130+
d.endLine != null &&
131+
d.endLine > d.line &&
132+
!d.name.includes('.');
133+
117134
const needsComplexity =
118135
doComplexity &&
119136
COMPLEXITY_EXTENSIONS.has(ext) &&
120-
defs.some((d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity);
137+
defs.some((d) => hasFuncBody(d) && !d.complexity);
121138
const needsCfg =
122139
doCfg &&
123140
CFG_EXTENSIONS.has(ext) &&
124-
defs.some(
125-
(d) =>
126-
(d.kind === 'function' || d.kind === 'method') &&
127-
d.line &&
128-
d.cfg !== null &&
129-
!Array.isArray(d.cfg?.blocks),
130-
);
141+
defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks));
131142
const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext);
132143

133144
if (needsComplexity || needsCfg || needsDataflow) {
@@ -186,8 +197,17 @@ function setupVisitors(
186197
const cRules = COMPLEXITY_RULES.get(langId);
187198
const hRules = HALSTEAD_RULES.get(langId);
188199
if (doComplexity && cRules) {
200+
// Only trigger WASM complexity for definitions with real function bodies.
201+
// Interface/type property signatures (dotted names, single-line span)
202+
// correctly lack native complexity data and should not trigger a fallback.
189203
const needsWasmComplexity = defs.some(
190-
(d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity,
204+
(d) =>
205+
(d.kind === 'function' || d.kind === 'method') &&
206+
d.line > 0 &&
207+
d.endLine != null &&
208+
d.endLine > d.line &&
209+
!d.name.includes('.') &&
210+
!d.complexity,
191211
);
192212
if (needsWasmComplexity) {
193213
complexityVisitor = createComplexityVisitor(cRules, hRules, { fileLevelWalk: true, langId });
@@ -213,7 +233,10 @@ function setupVisitors(
213233
const needsWasmCfg = defs.some(
214234
(d) =>
215235
(d.kind === 'function' || d.kind === 'method') &&
216-
d.line &&
236+
d.line > 0 &&
237+
d.endLine != null &&
238+
d.endLine > d.line &&
239+
!d.name.includes('.') &&
217240
d.cfg !== null &&
218241
!Array.isArray(d.cfg?.blocks),
219242
);

src/domain/graph/builder/helpers.ts

Lines changed: 57 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -208,23 +208,64 @@ export function purgeFilesFromGraph(
208208
}
209209

210210
/** Batch INSERT chunk size for multi-value INSERTs. */
211-
const BATCH_CHUNK = 200;
211+
const BATCH_CHUNK = 500;
212+
213+
// Statement caches keyed by chunk size — avoids recompiling for every batch.
214+
const nodeStmtCache = new WeakMap<BetterSqlite3.Database, Map<number, BetterSqlite3.Statement>>();
215+
const edgeStmtCache = new WeakMap<BetterSqlite3.Database, Map<number, BetterSqlite3.Statement>>();
216+
217+
function getNodeStmt(db: BetterSqlite3.Database, chunkSize: number): BetterSqlite3.Statement {
218+
let cache = nodeStmtCache.get(db);
219+
if (!cache) {
220+
cache = new Map();
221+
nodeStmtCache.set(db, cache);
222+
}
223+
let stmt = cache.get(chunkSize);
224+
if (!stmt) {
225+
const ph = '(?,?,?,?,?,?,?,?,?)';
226+
stmt = db.prepare(
227+
'INSERT OR IGNORE INTO nodes (name,kind,file,line,end_line,parent_id,qualified_name,scope,visibility) VALUES ' +
228+
Array.from({ length: chunkSize }, () => ph).join(','),
229+
);
230+
cache.set(chunkSize, stmt);
231+
}
232+
return stmt;
233+
}
234+
235+
function getEdgeStmt(db: BetterSqlite3.Database, chunkSize: number): BetterSqlite3.Statement {
236+
let cache = edgeStmtCache.get(db);
237+
if (!cache) {
238+
cache = new Map();
239+
edgeStmtCache.set(db, cache);
240+
}
241+
let stmt = cache.get(chunkSize);
242+
if (!stmt) {
243+
const ph = '(?,?,?,?,?)';
244+
stmt = db.prepare(
245+
'INSERT INTO edges (source_id,target_id,kind,confidence,dynamic) VALUES ' +
246+
Array.from({ length: chunkSize }, () => ph).join(','),
247+
);
248+
cache.set(chunkSize, stmt);
249+
}
250+
return stmt;
251+
}
212252

213253
/**
214254
* Batch-insert node rows via multi-value INSERT statements.
215255
* Each row: [name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility]
216256
*/
217257
export function batchInsertNodes(db: BetterSqlite3.Database, rows: unknown[][]): void {
218258
if (!rows.length) return;
219-
const ph = '(?,?,?,?,?,?,?,?,?)';
220259
for (let i = 0; i < rows.length; i += BATCH_CHUNK) {
221-
const chunk = rows.slice(i, i + BATCH_CHUNK);
260+
const end = Math.min(i + BATCH_CHUNK, rows.length);
261+
const chunkSize = end - i;
262+
const stmt = getNodeStmt(db, chunkSize);
222263
const vals: unknown[] = [];
223-
for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]);
224-
db.prepare(
225-
'INSERT OR IGNORE INTO nodes (name,kind,file,line,end_line,parent_id,qualified_name,scope,visibility) VALUES ' +
226-
chunk.map(() => ph).join(','),
227-
).run(...vals);
264+
for (let j = i; j < end; j++) {
265+
const r = rows[j] as unknown[];
266+
vals.push(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]);
267+
}
268+
stmt.run(...vals);
228269
}
229270
}
230271

@@ -234,14 +275,15 @@ export function batchInsertNodes(db: BetterSqlite3.Database, rows: unknown[][]):
234275
*/
235276
export function batchInsertEdges(db: BetterSqlite3.Database, rows: unknown[][]): void {
236277
if (!rows.length) return;
237-
const ph = '(?,?,?,?,?)';
238278
for (let i = 0; i < rows.length; i += BATCH_CHUNK) {
239-
const chunk = rows.slice(i, i + BATCH_CHUNK);
279+
const end = Math.min(i + BATCH_CHUNK, rows.length);
280+
const chunkSize = end - i;
281+
const stmt = getEdgeStmt(db, chunkSize);
240282
const vals: unknown[] = [];
241-
for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4]);
242-
db.prepare(
243-
'INSERT INTO edges (source_id,target_id,kind,confidence,dynamic) VALUES ' +
244-
chunk.map(() => ph).join(','),
245-
).run(...vals);
283+
for (let j = i; j < end; j++) {
284+
const r = rows[j] as unknown[];
285+
vals.push(r[0], r[1], r[2], r[3], r[4]);
286+
}
287+
stmt.run(...vals);
246288
}
247289
}

src/domain/graph/builder/stages/insert-nodes.ts

Lines changed: 55 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import path from 'node:path';
88
import { performance } from 'node:perf_hooks';
99
import type BetterSqlite3 from 'better-sqlite3';
1010
import { bulkNodeIdsByFile } from '../../../../db/index.js';
11-
import type { ExtractorOutput, MetadataUpdate, NodeIdRow } from '../../../../types.js';
11+
import type { ExtractorOutput, MetadataUpdate } from '../../../../types.js';
1212
import type { PipelineContext } from '../context.js';
1313
import {
1414
batchInsertEdges,
@@ -35,6 +35,7 @@ function insertDefinitionsAndExports(
3535
allSymbols: Map<string, ExtractorOutput>,
3636
): void {
3737
const phase1Rows: unknown[][] = [];
38+
const exportKeys: unknown[][] = [];
3839
for (const [relPath, symbols] of allSymbols) {
3940
phase1Rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
4041
for (const def of symbols.definitions) {
@@ -54,38 +55,68 @@ function insertDefinitionsAndExports(
5455
}
5556
for (const exp of symbols.exports) {
5657
phase1Rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
58+
exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
5759
}
5860
}
5961
batchInsertNodes(db, phase1Rows);
6062

61-
// Mark exported symbols
62-
const markExported = db.prepare(
63-
'UPDATE nodes SET exported = 1 WHERE name = ? AND kind = ? AND file = ? AND line = ?',
64-
);
65-
for (const [relPath, symbols] of allSymbols) {
66-
for (const exp of symbols.exports) {
67-
markExported.run(exp.name, exp.kind, relPath, exp.line);
63+
// Mark exported symbols in batches (cache prepared statements by chunk size)
64+
if (exportKeys.length > 0) {
65+
const EXPORT_CHUNK = 500;
66+
const exportStmtCache = new Map<number, BetterSqlite3.Statement>();
67+
for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
68+
const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
69+
const chunkSize = end - i;
70+
let updateStmt = exportStmtCache.get(chunkSize);
71+
if (!updateStmt) {
72+
const conditions = Array.from(
73+
{ length: chunkSize },
74+
() => '(name = ? AND kind = ? AND file = ? AND line = ?)',
75+
).join(' OR ');
76+
updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
77+
exportStmtCache.set(chunkSize, updateStmt);
78+
}
79+
const vals: unknown[] = [];
80+
for (let j = i; j < end; j++) {
81+
const k = exportKeys[j] as unknown[];
82+
vals.push(k[0], k[1], k[2], k[3]);
83+
}
84+
updateStmt.run(...vals);
6885
}
6986
}
7087
}
7188

72-
// ── Phase 2: Insert children (needs parent IDs) ────────────────────────
89+
// ── Phase 2+3: Insert children and containment edges (two nodeIdMap passes) ──
7390

74-
function insertChildren(
91+
function insertChildrenAndEdges(
7592
db: BetterSqlite3.Database,
7693
allSymbols: Map<string, ExtractorOutput>,
7794
): void {
7895
const childRows: unknown[][] = [];
96+
const edgeRows: unknown[][] = [];
97+
7998
for (const [relPath, symbols] of allSymbols) {
99+
// First pass: collect file→def edges and child rows
80100
const nodeIdMap = new Map<string, number>();
81101
for (const row of bulkNodeIdsByFile(db, relPath)) {
82102
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
83103
}
104+
105+
const fileId = nodeIdMap.get(`${relPath}|file|0`);
106+
84107
for (const def of symbols.definitions) {
85-
if (!def.children?.length) continue;
86108
const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
109+
110+
// Containment edge: file -> definition
111+
if (fileId && defId) {
112+
edgeRows.push([fileId, defId, 'contains', 1.0, 0]);
113+
}
114+
115+
if (!def.children?.length) continue;
87116
if (!defId) continue;
117+
88118
for (const child of def.children) {
119+
// Child node
89120
const qualifiedName = `${def.name}.${child.name}`;
90121
childRows.push([
91122
child.name,
@@ -101,40 +132,32 @@ function insertChildren(
101132
}
102133
}
103134
}
104-
batchInsertNodes(db, childRows);
105-
}
106135

107-
// ── Phase 3: Insert containment + parameter_of edges ────────────────────
136+
// Insert children first (so they exist for edge lookup)
137+
batchInsertNodes(db, childRows);
108138

109-
function insertContainmentEdges(
110-
db: BetterSqlite3.Database,
111-
allSymbols: Map<string, ExtractorOutput>,
112-
): void {
113-
const edgeRows: unknown[][] = [];
139+
// Now re-fetch IDs to include newly-inserted children, then add child edges
114140
for (const [relPath, symbols] of allSymbols) {
115141
const nodeIdMap = new Map<string, number>();
116142
for (const row of bulkNodeIdsByFile(db, relPath)) {
117143
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
118144
}
119-
const fileId = nodeIdMap.get(`${relPath}|file|0`);
120145
for (const def of symbols.definitions) {
146+
if (!def.children?.length) continue;
121147
const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
122-
if (fileId && defId) {
123-
edgeRows.push([fileId, defId, 'contains', 1.0, 0]);
124-
}
125-
if (def.children?.length && defId) {
126-
for (const child of def.children) {
127-
const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`);
128-
if (childId) {
129-
edgeRows.push([defId, childId, 'contains', 1.0, 0]);
130-
if (child.kind === 'parameter') {
131-
edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]);
132-
}
148+
if (!defId) continue;
149+
for (const child of def.children) {
150+
const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`);
151+
if (childId) {
152+
edgeRows.push([defId, childId, 'contains', 1.0, 0]);
153+
if (child.kind === 'parameter') {
154+
edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]);
133155
}
134156
}
135157
}
136158
}
137159
}
160+
138161
batchInsertEdges(db, edgeRows);
139162
}
140163

@@ -217,8 +240,7 @@ export async function insertNodes(ctx: PipelineContext): Promise<void> {
217240

218241
const insertAll = db.transaction(() => {
219242
insertDefinitionsAndExports(db, allSymbols);
220-
insertChildren(db, allSymbols);
221-
insertContainmentEdges(db, allSymbols);
243+
insertChildrenAndEdges(db, allSymbols);
222244
updateFileHashes(db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash);
223245
});
224246

src/features/cfg.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,14 @@ interface FileSymbols {
9191
*/
9292
function hasNativeCfgForFile(symbols: FileSymbols): boolean {
9393
return symbols.definitions
94-
.filter((d) => (d.kind === 'function' || d.kind === 'method') && d.line)
94+
.filter(
95+
(d) =>
96+
(d.kind === 'function' || d.kind === 'method') &&
97+
d.line > 0 &&
98+
d.endLine != null &&
99+
d.endLine > d.line &&
100+
!d.name.includes('.'),
101+
)
95102
.every((d) => d.cfg === null || (d.cfg?.blocks?.length ?? 0) > 0);
96103
}
97104

@@ -206,15 +213,15 @@ function buildVisitorCfgMap(
206213
return nameNode ? nameNode.text : null;
207214
},
208215
};
209-
const walkResults = walkWithVisitors(tree!.rootNode, [visitor], langId, walkerOpts);
216+
const walkResults = walkWithVisitors(tree?.rootNode, [visitor], langId, walkerOpts);
210217
// biome-ignore lint/complexity/useLiteralKeys: noPropertyAccessFromIndexSignature requires bracket notation
211218
const cfgResults = (walkResults['cfg'] || []) as VisitorCfgResult[];
212219
const visitorCfgByLine = new Map<number, VisitorCfgResult[]>();
213220
for (const r of cfgResults) {
214221
if (r.funcNode) {
215222
const line = r.funcNode.startPosition.row + 1;
216223
if (!visitorCfgByLine.has(line)) visitorCfgByLine.set(line, []);
217-
visitorCfgByLine.get(line)!.push(r);
224+
visitorCfgByLine.get(line)?.push(r);
218225
}
219226
}
220227
return visitorCfgByLine;

0 commit comments

Comments
 (0)