Skip to content

Commit 0809023

Browse files
authored
perf(queries): batched native Rust query methods for read path (#698)
* perf(queries): batched native Rust query methods for read path Add 4 batched NativeDatabase methods that run multiple SQLite queries in a single napi call, eliminating JS↔Rust boundary crossings: - getGraphStats: replaces ~11 separate queries in module-map statsData - getDataflowEdges: replaces 6 directional queries per node in dataflow - getHotspots: replaces 4 eagerly-prepared queries in structure-query - batchFanMetrics: replaces N*2 loop queries in branch-compare Add openReadonlyWithNative() connection helper that opens a NativeDatabase alongside better-sqlite3 for incremental adoption. Wire native fast paths with JS fallback in module-map.ts, dataflow.ts, structure-query.ts, and branch-compare.ts. * fix(native): move batched query methods into #[napi] impl block and address review feedback (#698) - Move get_graph_stats, get_dataflow_edges, get_hotspots, batch_fan_metrics into a #[napi]-annotated impl block so napi-rs can generate bindings for methods that take &self - Remove dead total_files computation from get_graph_stats (unused by TS caller) - Use SQL bind parameters for kind in query_outgoing/query_incoming instead of string interpolation - Replace unwrap_or(0) with map_err in batch_fan_metrics to propagate errors instead of silently returning zeros * fix(native): resolve Rust compile errors in batched query methods (#698) - Use fully-qualified rusqlite::Connection in inner function signatures since #[napi] macro generates a module scope that hides the type - Add explicit rusqlite::Row type annotations on closure parameters needed after switching from format!() to params![] bind parameters
1 parent f5d8059 commit 0809023

10 files changed

Lines changed: 969 additions & 26 deletions

File tree

crates/codegraph-core/src/native_db.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1043,7 +1043,7 @@ impl NativeDatabase {
10431043
}
10441044

10451045
/// Check if a table exists in the database.
1046-
fn has_table(conn: &Connection, table: &str) -> bool {
1046+
pub(crate) fn has_table(conn: &Connection, table: &str) -> bool {
10471047
conn.query_row(
10481048
"SELECT 1 FROM sqlite_master WHERE type='table' AND name=?1",
10491049
params![table],

crates/codegraph-core/src/read_queries.rs

Lines changed: 411 additions & 1 deletion
Large diffs are not rendered by default.

crates/codegraph-core/src/read_types.rs

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,3 +175,128 @@ pub struct NativeComplexityMetrics {
175175
pub maintainability_index: Option<f64>,
176176
pub halstead_volume: Option<f64>,
177177
}
178+
179+
// ── Batched query return types ─────────────────────────────────────────
180+
181+
/// Kind + count pair for GROUP BY queries.
182+
#[napi(object)]
183+
#[derive(Debug, Clone)]
184+
pub struct KindCount {
185+
pub kind: String,
186+
pub count: i32,
187+
}
188+
189+
/// Role + count pair for role distribution queries.
190+
#[napi(object)]
191+
#[derive(Debug, Clone)]
192+
pub struct RoleCount {
193+
pub role: String,
194+
pub count: i32,
195+
}
196+
197+
/// File hotspot entry with fan-in/fan-out.
198+
#[napi(object)]
199+
#[derive(Debug, Clone)]
200+
pub struct FileHotspot {
201+
pub file: String,
202+
pub fan_in: i32,
203+
pub fan_out: i32,
204+
}
205+
206+
/// Complexity summary statistics.
207+
#[napi(object)]
208+
#[derive(Debug, Clone)]
209+
pub struct ComplexitySummary {
210+
pub analyzed: i32,
211+
pub avg_cognitive: f64,
212+
pub avg_cyclomatic: f64,
213+
pub max_cognitive: i32,
214+
pub max_cyclomatic: i32,
215+
pub avg_mi: f64,
216+
pub min_mi: f64,
217+
}
218+
219+
/// Embedding metadata.
220+
#[napi(object)]
221+
#[derive(Debug, Clone)]
222+
pub struct EmbeddingInfo {
223+
pub count: i32,
224+
pub model: Option<String>,
225+
pub dim: Option<i32>,
226+
pub built_at: Option<String>,
227+
}
228+
229+
/// Quality metrics for graph stats.
230+
#[napi(object)]
231+
#[derive(Debug, Clone)]
232+
pub struct QualityMetrics {
233+
pub callable_total: i32,
234+
pub callable_with_callers: i32,
235+
pub call_edges: i32,
236+
pub high_conf_call_edges: i32,
237+
}
238+
239+
/// Combined graph statistics — replaces ~11 separate queries in module-map.ts.
240+
#[napi(object)]
241+
#[derive(Debug, Clone)]
242+
pub struct GraphStats {
243+
pub total_nodes: i32,
244+
pub total_edges: i32,
245+
pub nodes_by_kind: Vec<KindCount>,
246+
pub edges_by_kind: Vec<KindCount>,
247+
pub role_counts: Vec<RoleCount>,
248+
pub quality: QualityMetrics,
249+
pub hotspots: Vec<FileHotspot>,
250+
pub complexity: Option<ComplexitySummary>,
251+
pub embeddings: Option<EmbeddingInfo>,
252+
}
253+
254+
/// Dataflow edge with joined node info.
255+
#[napi(object)]
256+
#[derive(Debug, Clone)]
257+
pub struct DataflowQueryEdge {
258+
pub name: String,
259+
pub kind: String,
260+
pub file: String,
261+
pub line: Option<i32>,
262+
pub param_index: Option<i32>,
263+
pub expression: Option<String>,
264+
pub confidence: Option<f64>,
265+
}
266+
267+
/// All 6 directional dataflow edge sets for a node.
268+
#[napi(object)]
269+
#[derive(Debug, Clone)]
270+
pub struct DataflowEdgesResult {
271+
pub flows_to_out: Vec<DataflowQueryEdge>,
272+
pub flows_to_in: Vec<DataflowQueryEdge>,
273+
pub returns_out: Vec<DataflowQueryEdge>,
274+
pub returns_in: Vec<DataflowQueryEdge>,
275+
pub mutates_out: Vec<DataflowQueryEdge>,
276+
pub mutates_in: Vec<DataflowQueryEdge>,
277+
}
278+
279+
/// Hotspot row from node_metrics join.
280+
#[napi(object)]
281+
#[derive(Debug, Clone)]
282+
pub struct NativeHotspotRow {
283+
pub name: String,
284+
pub kind: String,
285+
pub line_count: Option<i32>,
286+
pub symbol_count: Option<i32>,
287+
pub import_count: Option<i32>,
288+
pub export_count: Option<i32>,
289+
pub fan_in: Option<i32>,
290+
pub fan_out: Option<i32>,
291+
pub cohesion: Option<f64>,
292+
pub file_count: Option<i32>,
293+
}
294+
295+
/// Fan-in/fan-out metrics for a single node.
296+
#[napi(object)]
297+
#[derive(Debug, Clone)]
298+
pub struct FanMetric {
299+
pub node_id: i32,
300+
pub fan_in: i32,
301+
pub fan_out: i32,
302+
}

src/db/connection.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,3 +399,44 @@ export function openRepo(
399399
},
400400
};
401401
}
402+
403+
/**
404+
* Open a readonly DB with an optional NativeDatabase alongside it.
405+
*
406+
* Returns the better-sqlite3 handle (for backwards compat) plus an optional
407+
* NativeDatabase for modules that can use batched Rust query methods.
408+
* Callers should use nativeDb when available and fall back to db.prepare().
409+
*/
410+
export function openReadonlyWithNative(customPath?: string): {
411+
db: BetterSqlite3Database;
412+
nativeDb: NativeDatabase | undefined;
413+
close(): void;
414+
} {
415+
const db = openReadonlyOrFail(customPath);
416+
417+
let nativeDb: NativeDatabase | undefined;
418+
if (isNativeAvailable()) {
419+
try {
420+
const dbPath = findDbPath(customPath);
421+
const native = getNative();
422+
nativeDb = native.NativeDatabase.openReadonly(dbPath);
423+
} catch (e) {
424+
debug(`openReadonlyWithNative: native path failed: ${(e as Error).message}`);
425+
}
426+
}
427+
428+
return {
429+
db,
430+
nativeDb,
431+
close() {
432+
db.close();
433+
if (nativeDb) {
434+
try {
435+
nativeDb.close();
436+
} catch {
437+
// already closed or not closeable
438+
}
439+
}
440+
},
441+
};
442+
}

src/db/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ export {
1111
flushDeferredClose,
1212
openDb,
1313
openReadonlyOrFail,
14+
openReadonlyWithNative,
1415
openRepo,
1516
} from './connection.js';
1617
export { getBuildMeta, initSchema, MIGRATIONS, setBuildMeta } from './migrations.js';

src/domain/analysis/module-map.ts

Lines changed: 103 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import path from 'node:path';
2-
import { openReadonlyOrFail, testFilterSQL } from '../../db/index.js';
2+
import { openReadonlyOrFail, openReadonlyWithNative, testFilterSQL } from '../../db/index.js';
33
import { cachedStmt } from '../../db/repository/cached-stmt.js';
44
import { loadConfig } from '../../infrastructure/config.js';
55
import { debug } from '../../infrastructure/logger.js';
@@ -381,20 +381,115 @@ export function moduleMapData(customDbPath: string, limit = 20, opts: { noTests?
381381
}
382382

383383
export function statsData(customDbPath: string, opts: { noTests?: boolean; config?: any } = {}) {
384-
const db = openReadonlyOrFail(customDbPath);
384+
const { db, nativeDb, close } = openReadonlyWithNative(customDbPath);
385385
try {
386386
const noTests = opts.noTests || false;
387387
const config = opts.config || loadConfig();
388-
const testFilter = testFilterSQL('n.file', noTests);
389388

389+
// These always need JS (non-SQL logic)
390+
const files = countFilesByLanguage(db, noTests);
391+
const fileCycles = findCycles(db, { fileLevel: true, noTests });
392+
const fnCycles = findCycles(db, { fileLevel: false, noTests });
393+
394+
// ── Native fast path: batch all SQL aggregations in one napi call ──
395+
if (nativeDb?.getGraphStats) {
396+
const s = nativeDb.getGraphStats(noTests);
397+
const nodesByKind: Record<string, number> = {};
398+
for (const k of s.nodesByKind) nodesByKind[k.kind] = k.count;
399+
const edgesByKind: Record<string, number> = {};
400+
for (const k of s.edgesByKind) edgesByKind[k.kind] = k.count;
401+
const roles: Record<string, number> & { dead?: number } = {};
402+
let deadTotal = 0;
403+
for (const r of s.roleCounts) {
404+
roles[r.role] = r.count;
405+
if (r.role.startsWith(DEAD_ROLE_PREFIX)) deadTotal += r.count;
406+
}
407+
if (deadTotal > 0) roles.dead = deadTotal;
408+
409+
const callerCoverage =
410+
s.quality.callableTotal > 0 ? s.quality.callableWithCallers / s.quality.callableTotal : 0;
411+
const callConfidence =
412+
s.quality.callEdges > 0 ? s.quality.highConfCallEdges / s.quality.callEdges : 0;
413+
414+
// False-positive analysis still uses JS (needs FALSE_POSITIVE_NAMES set)
415+
const fpThreshold = config.analysis?.falsePositiveCallers ?? FALSE_POSITIVE_CALLER_THRESHOLD;
416+
const fpRows = db
417+
.prepare(`
418+
SELECT n.name, n.file, n.line, COUNT(e.source_id) as caller_count
419+
FROM nodes n
420+
LEFT JOIN edges e ON n.id = e.target_id AND e.kind = 'calls'
421+
WHERE n.kind IN ('function', 'method')
422+
GROUP BY n.id
423+
HAVING caller_count > ?
424+
ORDER BY caller_count DESC
425+
`)
426+
.all(fpThreshold) as Array<{
427+
name: string;
428+
file: string;
429+
line: number;
430+
caller_count: number;
431+
}>;
432+
const falsePositiveWarnings = fpRows
433+
.filter((r) =>
434+
FALSE_POSITIVE_NAMES.has(r.name.includes('.') ? r.name.split('.').pop()! : r.name),
435+
)
436+
.map((r) => ({ name: r.name, file: r.file, line: r.line, callerCount: r.caller_count }));
437+
let fpEdgeCount = 0;
438+
for (const fp of falsePositiveWarnings) fpEdgeCount += fp.callerCount;
439+
const falsePositiveRatio = s.quality.callEdges > 0 ? fpEdgeCount / s.quality.callEdges : 0;
440+
const score = Math.round(
441+
callerCoverage * 40 + callConfidence * 40 + (1 - falsePositiveRatio) * 20,
442+
);
443+
444+
return {
445+
nodes: { total: s.totalNodes, byKind: nodesByKind },
446+
edges: { total: s.totalEdges, byKind: edgesByKind },
447+
files,
448+
cycles: { fileLevel: fileCycles.length, functionLevel: fnCycles.length },
449+
hotspots: s.hotspots.map((h) => ({ file: h.file, fanIn: h.fanIn, fanOut: h.fanOut })),
450+
embeddings: s.embeddings
451+
? {
452+
count: s.embeddings.count,
453+
model: s.embeddings.model,
454+
dim: s.embeddings.dim,
455+
builtAt: s.embeddings.builtAt,
456+
}
457+
: null,
458+
quality: {
459+
score,
460+
callerCoverage: {
461+
ratio: callerCoverage,
462+
covered: s.quality.callableWithCallers,
463+
total: s.quality.callableTotal,
464+
},
465+
callConfidence: {
466+
ratio: callConfidence,
467+
highConf: s.quality.highConfCallEdges,
468+
total: s.quality.callEdges,
469+
},
470+
falsePositiveWarnings,
471+
},
472+
roles,
473+
complexity: s.complexity
474+
? {
475+
analyzed: s.complexity.analyzed,
476+
avgCognitive: s.complexity.avgCognitive,
477+
avgCyclomatic: s.complexity.avgCyclomatic,
478+
maxCognitive: s.complexity.maxCognitive,
479+
maxCyclomatic: s.complexity.maxCyclomatic,
480+
avgMI: s.complexity.avgMi,
481+
minMI: s.complexity.minMi,
482+
}
483+
: null,
484+
};
485+
}
486+
487+
// ── JS fallback ───────────────────────────────────────────────────
488+
const testFilter = testFilterSQL('n.file', noTests);
390489
const testFileIds = noTests ? buildTestFileIds(db) : null;
391490

392491
const { total: totalNodes, byKind: nodesByKind } = countNodesByKind(db, testFileIds);
393492
const { total: totalEdges, byKind: edgesByKind } = countEdgesByKind(db, testFileIds);
394-
const files = countFilesByLanguage(db, noTests);
395-
396-
const fileCycles = findCycles(db, { fileLevel: true, noTests });
397-
const fnCycles = findCycles(db, { fileLevel: false, noTests });
398493

399494
const hotspots = findHotspots(db, noTests, 5);
400495
const embeddings = getEmbeddingsInfo(db);
@@ -415,6 +510,6 @@ export function statsData(customDbPath: string, opts: { noTests?: boolean; confi
415510
complexity,
416511
};
417512
} finally {
418-
db.close();
513+
close();
419514
}
420515
}

0 commit comments

Comments
 (0)