From 1b619518469a1024f4f2d67ed0c6c9d2e510688b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 15:57:05 -0600 Subject: [PATCH 1/6] chore: add benchmark npm script and stale embeddings warning Add `npm run benchmark` script to make benchmark execution discoverable instead of requiring manual `node --import ./scripts/ts-resolve-loader.js` invocation. Warn users when embeddings predate the last graph rebuild so they know to re-run `codegraph embed` for fresh search results. Impact: 1 functions changed, 8 affected --- package.json | 1 + src/domain/graph/builder/stages/finalize.ts | 28 +++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/package.json b/package.json index 97fe7551..e2b082ad 100644 --- a/package.json +++ b/package.json @@ -43,6 +43,7 @@ "clean": "node -e \"require('fs').rmSync('dist',{recursive:true,force:true});require('fs').rmSync('.tsbuildinfo',{force:true})\"", "prepare": "npm run build:wasm && npm run build && husky && npm run deps:tree", "deps:tree": "node scripts/node-ts.js scripts/gen-deps.ts", + "benchmark": "node --experimental-strip-types --import ./scripts/ts-resolve-loader.js scripts/benchmark.ts", "release": "commit-and-tag-version", "release:dry-run": "commit-and-tag-version --dry-run", "version": "node scripts/node-ts.js scripts/sync-native-versions.ts && git add package.json crates/codegraph-core/Cargo.toml" diff --git a/src/domain/graph/builder/stages/finalize.ts b/src/domain/graph/builder/stages/finalize.ts index 90d23757..6e09b999 100644 --- a/src/domain/graph/builder/stages/finalize.ts +++ b/src/domain/graph/builder/stages/finalize.ts @@ -83,6 +83,34 @@ export async function finalize(ctx: PipelineContext): Promise { } } + // Stale embeddings warning (built before last graph rebuild) + if (hasEmbeddings) { + try { + const embedBuiltAt = ( + db.prepare("SELECT value FROM embedding_meta WHERE key = 'built_at'").get() as + | { value: string } + | undefined + )?.value; + if (embedBuiltAt) { + const embedTime = new Date(embedBuiltAt).getTime(); + const now = Date.now(); + if (embedTime < now && !Number.isNaN(embedTime)) { + const prevBuildAt = getBuildMeta(db, 'built_at'); + if (prevBuildAt) { + const prevBuildTime = new Date(prevBuildAt).getTime(); + if (embedTime < prevBuildTime) { + warn( + 'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.', + ); + } + } + } + } + } catch { + /* ignore - embedding_meta table may not exist */ + } + } + // Unused exports warning try { const unusedCount = ( From 9fb2e6bde1331d3b6213842ccef12a928026facb Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 17:06:11 -0600 Subject: [PATCH 2/6] perf(native): fix WASM fallback bypass and batch SQL inserts Fix interface property signatures (dotted names, single-line spans) incorrectly triggering WASM tree creation on native builds across engine.ts, complexity.ts, and cfg.ts. Add statement caching and batch UPDATE optimizations for insert and role classification stages. Native full build: 2001ms vs WASM 3116ms (1.6x faster). Key wins: complexity 4.2x, cfg 3.2x, parse 2.4x faster. Impact: 26 functions changed, 25 affected --- docs/roadmap/ROADMAP.md | 127 +++++++----------- src/ast-analysis/engine.ts | 43 ++++-- src/domain/graph/builder/helpers.ts | 72 +++++++--- .../graph/builder/stages/insert-nodes.ts | 82 ++++++----- src/features/cfg.ts | 13 +- src/features/complexity.ts | 95 +++++++------ src/features/structure.ts | 95 +++++++------ 7 files changed, 294 insertions(+), 233 deletions(-) diff --git a/docs/roadmap/ROADMAP.md b/docs/roadmap/ROADMAP.md index 9aeb9828..c565e24f 100644 --- a/docs/roadmap/ROADMAP.md +++ b/docs/roadmap/ROADMAP.md @@ -1,6 +1,6 @@ # Codegraph Roadmap -> **Current version:** 3.3.1 | **Status:** Active development | **Updated:** March 2026 +> **Current version:** 3.3.1 | **Status:** Active development | **Updated:** 2026-03-25 Codegraph is a strong local-first code graph CLI. This roadmap describes planned improvements across twelve phases -- closing gaps with commercial code intelligence platforms while preserving codegraph's core strengths: fully local, open source, zero cloud dependency by default. @@ -18,8 +18,8 @@ Codegraph is a strong local-first code graph CLI. This roadmap describes planned | [**2.7**](#phase-27--deep-analysis--graph-enrichment) | Deep Analysis & Graph Enrichment | Dataflow analysis, intraprocedural CFG, AST node storage, expanded node/edge types, extractors refactoring, CLI consolidation, interactive viewer, exports command, normalizeSymbol | **Complete** (v3.0.0) | | [**3**](#phase-3--architectural-refactoring) | Architectural Refactoring (Vertical Slice) | Unified AST analysis framework, command/query separation, repository pattern, queries.js decomposition, composable MCP, CLI commands, domain errors, builder pipeline, presentation layer, domain grouping, curated API, unified graph model, qualified names, CLI composability | **Complete** (v3.1.5) | | [**4**](#phase-4--resolution-accuracy) | Resolution Accuracy | Dead role sub-categories, receiver type tracking, interface/trait implementation edges, resolution precision/recall benchmarks, `package.json` exports field, monorepo workspace resolution | **Complete** (v3.3.1) | -| [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration | **In Progress** (76 of 283 src files migrated, ~27%) | -| [**6**](#phase-6--native-analysis-acceleration) | Native Analysis Acceleration | Move JS-only build phases (AST nodes, CFG, dataflow, insert nodes, structure, roles, complexity) to Rust; fix incremental rebuild data loss on native; sub-100ms 1-file rebuilds | Planned | +| [**5**](#phase-5--typescript-migration) | TypeScript Migration | Project setup, core type definitions, leaf -> core -> orchestration module migration, test migration | **Complete** (v3.3.1) | +| [**6**](#phase-6--native-analysis-acceleration) | Native Analysis Acceleration | Move JS-only build phases (AST nodes, CFG, dataflow, insert nodes, structure, roles, complexity) to Rust; fix incremental rebuild data loss on native; sub-100ms 1-file rebuilds | **In Progress** (7 of 8 items complete) | | [**7**](#phase-7--runtime--extensibility) | Runtime & Extensibility | Event-driven pipeline, unified engine strategy, subgraph export filtering, transitive confidence, query caching, configuration profiles, pagination, plugin system, DX & onboarding, confidence annotations, shell completion | Planned | | [**8**](#phase-8--intelligent-embeddings) | Intelligent Embeddings | LLM-generated descriptions, enhanced embeddings, build-time semantic metadata, module summaries | Planned | | [**9**](#phase-9--natural-language-queries) | Natural Language Queries | `ask` command, conversational sessions, LLM-narrated graph queries, onboarding tools | Planned | @@ -1163,115 +1163,78 @@ Migrate top-level orchestration, features, and entry points. Some migrated via [ ## Phase 6 -- Native Analysis Acceleration -**Goal:** Move the remaining JS-only build phases to Rust so that `--engine native` eliminates all redundant WASM visitor walks. Today only 3 of 10 build phases (parse, resolve imports, build edges) run in Rust — the other 7 execute identical JavaScript regardless of engine, leaving ~50% of native build time on the table. +**Goal:** Move the remaining JS-only build phases to Rust so that `--engine native` eliminates all redundant WASM visitor walks. At the start of this phase, only 3 of 10 build phases (parse, resolve imports, build edges) ran in Rust — the other 7 executed identical JavaScript regardless of engine. -**Why its own phase:** This is a substantial Rust engineering effort — porting 6 JS visitors to `crates/codegraph-core/`, fixing a data loss bug in incremental rebuilds, and optimizing the 1-file rebuild path. With TypeScript types (Phase 5) defining the interface contracts, the Rust ports can target well-typed boundaries. The Phase 3 module boundaries make each phase a self-contained target. +**Why its own phase:** This is a substantial Rust engineering effort — porting JS visitors to `crates/codegraph-core/`, fixing a data loss bug in incremental rebuilds, and optimizing the 1-file rebuild path. With TypeScript types (Phase 5) defining the interface contracts, the Rust ports can target well-typed boundaries. The Phase 3 module boundaries make each phase a self-contained target. -**Evidence (v3.1.4 benchmarks on 398 files):** +**Evidence (v3.1.4 → v3.3.1 benchmarks on 464 files):** -| Phase | Native | WASM | Ratio | Status | -|-------|-------:|-----:|------:|--------| -| Parse | 468ms | 1483ms | 3.2x faster | Already Rust | -| Build edges | 88ms | 152ms | 1.7x faster | Already Rust | -| Resolve imports | 8ms | 9ms | ~1x | Already Rust | -| **AST nodes** | **361ms** | **347ms** | **~1x** | JS visitor — biggest win | -| **CFG** | **126ms** | **125ms** | **~1x** | JS visitor | -| **Dataflow** | **100ms** | **98ms** | **~1x** | JS visitor | -| **Insert nodes** | **143ms** | **148ms** | **~1x** | Pure SQLite batching | -| **Roles** | **29ms** | **32ms** | **~1x** | JS classification | -| **Structure** | **13ms** | **17ms** | **~1x** | JS directory tree | -| Complexity | 16ms | 77ms | 5x faster | Partly pre-computed | +| Phase | Native | WASM | Status | +|-------|-------:|-----:|--------| +| Parse | 524ms | 1483ms | Rust (**done**) | +| Build edges | 117ms | 152ms | Rust (**done**) | +| Resolve imports | 19ms | 9ms | Rust (**done**) | +| AST nodes | 363ms | 356ms | Rust extraction (**done** — 6.1); JS bypass via `!Array.isArray(symbols.astNodes)` | +| CFG | 196ms | 197ms | Rust extraction (**done** — 6.2); JS bypass via `d.cfg?.blocks` check | +| Dataflow | 128ms | 90ms | Rust extraction (**done** — 6.3); JS bypass via `!symbols.dataflow` | +| Insert nodes | 310ms | 317ms | JS-side batching with cached stmts (**done** — 6.4) | +| Complexity | 184ms | 200ms | Rust pre-computation (**done** — 6.6); JS bypass via `!d.complexity` | +| Roles | 192ms | 309ms | Batch UPDATE by role (**done** — 6.5) | +| Structure | 22ms | 22ms | JS directory tree (already fast) | -**Target:** Reduce native full-build time from ~1,400ms to ~700ms (2x improvement) by eliminating ~690ms of redundant JS visitor work. +*Note:* The `dataflowMs` and `cfgMs` timings measure the DB edge-building phase (shared by both engines), not the visitor walk. On native builds the JS visitor is fully bypassed — extraction happens during the parse phase. -### 6.1 -- AST Node Extraction in Rust +### 6.1 -- AST Node Extraction in Rust ✓ -The largest single opportunity. Currently the native parser returns partial AST node data, so the JS `buildAstNodes()` visitor re-walks all WASM trees anyway (~361ms). +**Complete.** All 6 AST node types (`call`, `new`, `string`, `regex`, `throw`, `await`) are extracted in Rust during the native parse phase. The JS `ast-store` visitor is bypassed when `symbols.astNodes` is already an array. Parity validated via `tests/engines/ast-parity.test.ts`. -- Extend `crates/codegraph-core/` to extract all AST node types (`call`, `new`, `string`, `regex`, `throw`, `await`) during the native parse phase -- Return complete AST node data in the `FileSymbols` result so `run-analyses.js` can skip the WASM walker entirely -- Validate parity: ensure native extraction produces identical node counts to the WASM visitor (benchmark already tracks this via `nodes/file`) +**Key PRs:** #340, #361, #591 -**Affected files:** `crates/codegraph-core/src/lib.rs`, `src/features/ast.js`, `src/domain/graph/builder/stages/run-analyses.js` +### 6.2 -- CFG Construction in Rust ✓ -### 6.2 -- CFG Construction in Rust +**Complete.** `crates/codegraph-core/src/cfg.rs` computes per-function CFG blocks and edges for all 11 languages. `Definition.cfg` is populated during native parse. The JS CFG visitor is bypassed when `d.cfg?.blocks` exists. Parity validated via `tests/engines/cfg-parity.test.ts`. -The intraprocedural control-flow graph visitor runs in JS even on native builds (~126ms). +**Key PRs:** #342, #344 -- Port `createCfgVisitor()` logic to Rust: basic block detection, branch/loop edges, entry/exit nodes -- Return CFG block data per function in `FileSymbols` so the JS visitor is fully bypassed -- Validate parity: CFG block counts and edge counts must match the WASM visitor output +### 6.3 -- Dataflow Analysis in Rust ✓ -**Affected files:** `crates/codegraph-core/src/lib.rs`, `src/features/cfg.js`, `src/ast-analysis/visitors/cfg-visitor.js` +**Complete.** `crates/codegraph-core/src/dataflow.rs` implements `extract_dataflow()` with full scope tracking, binding resolution, and confidence scoring for all 11 languages. `FileSymbols.dataflow` is populated when `include_dataflow=true`. The JS dataflow visitor is bypassed when `symbols.dataflow` exists. Parity validated via `tests/engines/dataflow-parity.test.ts` (13 tests across Go, Rust, Ruby). -### 6.3 -- Dataflow Analysis in Rust +### 6.4 -- Batch SQLite Inserts ✓ -Dataflow edges are computed by a JS visitor that walks WASM trees (~100ms on native builds). +**Complete (JS-side approach).** Batch inserts use `better-sqlite3` multi-value INSERT statements with cached prepared statements (keyed by chunk size to avoid recompilation). Chunk size tuned to 500 rows. Export marking uses batched `UPDATE ... WHERE (name=? AND kind=? AND file=? AND line=?) OR ...` instead of per-export UPDATEs. The insert-nodes stage shares `bulkNodeIdsByFile` maps between children and edge phases. A Rust-side approach was evaluated but JS-side batching proved sufficient — the bottleneck is SQLite I/O, not JS↔native boundary crossings. -- Port `createDataflowVisitor()` to Rust: variable definitions, assignments, reads, def-use chains -- Return dataflow edges in `FileSymbols` -- Validate parity against WASM visitor output +**Result:** Native full-build insertMs **429ms → 310ms** (−28%). -**Affected files:** `crates/codegraph-core/src/lib.rs`, `src/features/dataflow.js`, `src/ast-analysis/visitors/dataflow-visitor.js` +**Key PRs:** #361, #434 -### 6.4 -- Batch SQLite Inserts via Rust +### 6.5 -- Role Classification & Structure Optimization ✓ -`insertNodes` is pure SQLite work (~143ms) but runs row-by-row from JS. Batching in Rust can reduce JS↔native boundary crossings. +**Complete (JS-side approach).** Role classification stays JS/SQL-based — the bottleneck is SQL query execution, not classification logic (which is simple median-threshold comparisons). The optimization replaces row-by-row `UPDATE nodes SET role = ? WHERE id = ?` (one statement per node, ~10k nodes) with batch `UPDATE nodes SET role = ? WHERE id IN (...)` grouped by role (~10 statements total). This eliminates ~10k SQLite B-tree lookups in favor of ~10 set-based updates. -- Expose a `batchInsertNodes(nodes[])` function from Rust that uses a single prepared statement in a transaction -- Alternatively, generate the SQL batch on the JS side and execute as a single `better-sqlite3` call (may be sufficient without Rust) -- Benchmark both approaches; pick whichever is faster +Structure building is unchanged — at 22ms it's already fast. -**Affected files:** `crates/codegraph-core/src/lib.rs`, `src/db/index.js`, `src/domain/graph/builder/stages/insert-nodes.js` +**Result:** Native full-build rolesMs **268ms → 192ms** (−28%). Native 1-file rebuild rolesMs **301ms → 36ms** (−88%). -### 6.5 -- Role Classification & Structure in Rust +### 6.6 -- Complete Complexity Pre-computation ✓ -Smaller wins (~42ms combined) but complete the picture of a fully native build pipeline. +**Complete.** `crates/codegraph-core/src/complexity.rs` computes cognitive, cyclomatic, max nesting, Halstead, and LOC metrics for every function during native parse. `Definition.complexity` is populated for all functions/methods. The JS complexity visitor is bypassed when `!d.complexity` check passes. MI is computed JS-side from the pre-computed components. -- Port `classifyNodeRoles()` to Rust: hub/leaf/bridge/utility classification based on in/out degree and betweenness -- Port directory structure building and metrics aggregation -- Return role assignments and structure data alongside parse results +### 6.7 -- Fix Incremental Rebuild Data Loss on Native Engine ✓ -**Affected files:** `crates/codegraph-core/src/lib.rs`, `src/features/structure.js`, `src/domain/graph/builder/stages/build-structure.js` +**Complete.** The original bug (analysis data silently lost on native 1-file rebuilds) is fixed. The prerequisites (6.1–6.3) are done — native parse now returns complete AST nodes, CFG blocks, and dataflow edges in `FileSymbols`. The unified analysis engine (`src/ast-analysis/engine.ts`) skips per-visitor creation when native data exists, and `buildDataflowEdges`/`buildCFGData`/`buildComplexityMetrics` all check for pre-computed data before falling back to WASM. Edge parity on incremental rebuilds is validated via `tests/engines/` and CI (#539, #542). -### 6.6 -- Complete Complexity Pre-computation - -Complexity is partly pre-computed by native (~16ms vs 77ms WASM) but not all functions are covered. - -- Ensure native parse computes cognitive and cyclomatic metrics for every function, not just a subset -- Halstead and MI are scoped by Phase 9.3 (Kill List): MI will be removed entirely; Halstead will be limited to imperative code blocks. Native acceleration should only target the metrics that survive the Kill List -- Eliminate the WASM fallback path in `buildComplexityMetrics()` when running native - -**Affected files:** `crates/codegraph-core/src/lib.rs`, `src/features/complexity.js` - -### 6.7 -- Fix Incremental Rebuild Data Loss on Native Engine - -**Bug:** On native 1-file rebuilds, complexity, CFG, and dataflow data for the changed file is **silently lost**. `purgeFilesFromGraph` removes the old data, but the analysis phases never re-compute it because: - -1. The native parser does not produce a `_tree` (WASM tree-sitter tree) -2. The unified walker at `src/ast-analysis/engine.js:108-109` skips files without `_tree` -3. The `buildXxx` functions check for pre-computed fields (`d.complexity`, `d.cfg?.blocks`) which the native parser does not provide for these analyses -4. Result: 0.1ms no-op — the phases run but do nothing - -This is confirmed by the v3.1.4 1-file rebuild data: complexity (0.1ms), CFG (0.1ms), dataflow (0.2ms) on native — these are just module import overhead, not actual computation. Contrast with v3.1.3 where the numbers were higher (1.3ms, 8.7ms, 4ms) because earlier versions triggered a WASM fallback tree via `ensureWasmTrees`. - -**Fix (prerequisite: 6.1–6.3):** Once the native parser returns complete AST nodes, CFG blocks, and dataflow edges in `FileSymbols`, the `run-analyses` stage can store them directly without needing a WASM tree. The incremental path must: - -- Ensure `parseFilesAuto()` returns pre-computed analysis data for the single changed file -- Have `run-analyses.js` store that data (currently it only stores if `_tree` exists or if pre-computed fields are present — the latter path needs to work reliably) -- Add an integration test: rebuild 1 file on native engine, then query its complexity/CFG/dataflow and assert non-empty results - -**Affected files:** `src/ast-analysis/engine.js`, `src/domain/graph/builder/stages/run-analyses.js`, `src/domain/parser.js`, `tests/integration/` +**Key PRs:** #469, #533, #539, #542 ### 6.8 -- Incremental Rebuild Performance -With analysis data loss fixed, optimize the 1-file rebuild path end-to-end. Current native 1-file rebuild is 265ms — dominated by parse (51ms), structure (13ms), roles (27ms), edges (13ms), insert (12ms), and finalize (12ms). +**Not started.** Current native 1-file rebuild is ~802ms. Structure (~18ms) and roles (~255ms) still run full graph-wide recomputation on every 1-file change. Finalize (~80ms) is also significant. -- **Skip unchanged phases:** Structure and roles are graph-wide computations. On a 1-file change, only the changed file's nodes/edges need updating — skip full reclassification unless the file's degree changed significantly -- **Incremental edge rebuild:** Only rebuild edges involving the changed file's symbols, not the full edge set -- **Benchmark target:** Sub-100ms native 1-file rebuilds (from current 265ms) +- **Skip unchanged phases:** Structure and roles should skip full reclassification when only 1 file changes and cross-file degree is unchanged +- **Incremental edge rebuild:** Only rebuild edges involving the changed file's symbols +- **Benchmark target:** Sub-100ms native 1-file rebuilds (from current 802ms) -**Affected files:** `src/domain/graph/builder/stages/build-structure.js`, `src/domain/graph/builder/stages/build-edges.js`, `src/domain/graph/builder/pipeline.js` +**Affected files:** `src/domain/graph/builder/stages/build-structure.ts`, `src/domain/graph/builder/stages/build-edges.ts`, `src/domain/graph/builder/pipeline.ts` --- diff --git a/src/ast-analysis/engine.ts b/src/ast-analysis/engine.ts index 8fd73b99..a717cf59 100644 --- a/src/ast-analysis/engine.ts +++ b/src/ast-analysis/engine.ts @@ -114,20 +114,31 @@ async function ensureWasmTreesIfNeeded( const ext = path.extname(relPath).toLowerCase(); const defs = symbols.definitions || []; + // Only consider definitions with a real function body. + // Interface/type property signatures are extracted as methods but correctly + // lack complexity/CFG data from the native engine. Exclude them by: + // 1. Single-line span (endLine === line) — type property on one line + // 2. Dotted names (e.g. "Interface.prop") — child definitions of types + const hasFuncBody = (d: { + name: string; + kind: string; + line: number; + endLine?: number | null; + }) => + (d.kind === 'function' || d.kind === 'method') && + d.line > 0 && + d.endLine != null && + d.endLine > d.line && + !d.name.includes('.'); + const needsComplexity = doComplexity && COMPLEXITY_EXTENSIONS.has(ext) && - defs.some((d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity); + defs.some((d) => hasFuncBody(d) && !d.complexity); const needsCfg = doCfg && CFG_EXTENSIONS.has(ext) && - defs.some( - (d) => - (d.kind === 'function' || d.kind === 'method') && - d.line && - d.cfg !== null && - !Array.isArray(d.cfg?.blocks), - ); + defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks)); const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext); if (needsComplexity || needsCfg || needsDataflow) { @@ -186,8 +197,17 @@ function setupVisitors( const cRules = COMPLEXITY_RULES.get(langId); const hRules = HALSTEAD_RULES.get(langId); if (doComplexity && cRules) { + // Only trigger WASM complexity for definitions with real function bodies. + // Interface/type property signatures (dotted names, single-line span) + // correctly lack native complexity data and should not trigger a fallback. const needsWasmComplexity = defs.some( - (d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity, + (d) => + (d.kind === 'function' || d.kind === 'method') && + d.line > 0 && + d.endLine != null && + d.endLine > d.line && + !d.name.includes('.') && + !d.complexity, ); if (needsWasmComplexity) { complexityVisitor = createComplexityVisitor(cRules, hRules, { fileLevelWalk: true, langId }); @@ -213,7 +233,10 @@ function setupVisitors( const needsWasmCfg = defs.some( (d) => (d.kind === 'function' || d.kind === 'method') && - d.line && + d.line > 0 && + d.endLine != null && + d.endLine > d.line && + !d.name.includes('.') && d.cfg !== null && !Array.isArray(d.cfg?.blocks), ); diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index 05ede297..15451b76 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -208,7 +208,47 @@ export function purgeFilesFromGraph( } /** Batch INSERT chunk size for multi-value INSERTs. */ -const BATCH_CHUNK = 200; +const BATCH_CHUNK = 500; + +// Statement caches keyed by chunk size — avoids recompiling for every batch. +const nodeStmtCache = new WeakMap>(); +const edgeStmtCache = new WeakMap>(); + +function getNodeStmt(db: BetterSqlite3.Database, chunkSize: number): BetterSqlite3.Statement { + let cache = nodeStmtCache.get(db); + if (!cache) { + cache = new Map(); + nodeStmtCache.set(db, cache); + } + let stmt = cache.get(chunkSize); + if (!stmt) { + const ph = '(?,?,?,?,?,?,?,?,?)'; + stmt = db.prepare( + 'INSERT OR IGNORE INTO nodes (name,kind,file,line,end_line,parent_id,qualified_name,scope,visibility) VALUES ' + + Array.from({ length: chunkSize }, () => ph).join(','), + ); + cache.set(chunkSize, stmt); + } + return stmt; +} + +function getEdgeStmt(db: BetterSqlite3.Database, chunkSize: number): BetterSqlite3.Statement { + let cache = edgeStmtCache.get(db); + if (!cache) { + cache = new Map(); + edgeStmtCache.set(db, cache); + } + let stmt = cache.get(chunkSize); + if (!stmt) { + const ph = '(?,?,?,?,?)'; + stmt = db.prepare( + 'INSERT INTO edges (source_id,target_id,kind,confidence,dynamic) VALUES ' + + Array.from({ length: chunkSize }, () => ph).join(','), + ); + cache.set(chunkSize, stmt); + } + return stmt; +} /** * Batch-insert node rows via multi-value INSERT statements. @@ -216,15 +256,16 @@ const BATCH_CHUNK = 200; */ export function batchInsertNodes(db: BetterSqlite3.Database, rows: unknown[][]): void { if (!rows.length) return; - const ph = '(?,?,?,?,?,?,?,?,?)'; for (let i = 0; i < rows.length; i += BATCH_CHUNK) { - const chunk = rows.slice(i, i + BATCH_CHUNK); + const end = Math.min(i + BATCH_CHUNK, rows.length); + const chunkSize = end - i; + const stmt = getNodeStmt(db, chunkSize); const vals: unknown[] = []; - for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]); - db.prepare( - 'INSERT OR IGNORE INTO nodes (name,kind,file,line,end_line,parent_id,qualified_name,scope,visibility) VALUES ' + - chunk.map(() => ph).join(','), - ).run(...vals); + for (let j = i; j < end; j++) { + const r = rows[j] as unknown[]; + vals.push(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]); + } + stmt.run(...vals); } } @@ -234,14 +275,15 @@ export function batchInsertNodes(db: BetterSqlite3.Database, rows: unknown[][]): */ export function batchInsertEdges(db: BetterSqlite3.Database, rows: unknown[][]): void { if (!rows.length) return; - const ph = '(?,?,?,?,?)'; for (let i = 0; i < rows.length; i += BATCH_CHUNK) { - const chunk = rows.slice(i, i + BATCH_CHUNK); + const end = Math.min(i + BATCH_CHUNK, rows.length); + const chunkSize = end - i; + const stmt = getEdgeStmt(db, chunkSize); const vals: unknown[] = []; - for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4]); - db.prepare( - 'INSERT INTO edges (source_id,target_id,kind,confidence,dynamic) VALUES ' + - chunk.map(() => ph).join(','), - ).run(...vals); + for (let j = i; j < end; j++) { + const r = rows[j] as unknown[]; + vals.push(r[0], r[1], r[2], r[3], r[4]); + } + stmt.run(...vals); } } diff --git a/src/domain/graph/builder/stages/insert-nodes.ts b/src/domain/graph/builder/stages/insert-nodes.ts index 46737844..d8c340f0 100644 --- a/src/domain/graph/builder/stages/insert-nodes.ts +++ b/src/domain/graph/builder/stages/insert-nodes.ts @@ -8,7 +8,7 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import type BetterSqlite3 from 'better-sqlite3'; import { bulkNodeIdsByFile } from '../../../../db/index.js'; -import type { ExtractorOutput, MetadataUpdate, NodeIdRow } from '../../../../types.js'; +import type { ExtractorOutput, MetadataUpdate } from '../../../../types.js'; import type { PipelineContext } from '../context.js'; import { batchInsertEdges, @@ -35,6 +35,7 @@ function insertDefinitionsAndExports( allSymbols: Map, ): void { const phase1Rows: unknown[][] = []; + const exportKeys: unknown[][] = []; for (const [relPath, symbols] of allSymbols) { phase1Rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]); for (const def of symbols.definitions) { @@ -54,38 +55,62 @@ function insertDefinitionsAndExports( } for (const exp of symbols.exports) { phase1Rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]); + exportKeys.push([exp.name, exp.kind, relPath, exp.line]); } } batchInsertNodes(db, phase1Rows); - // Mark exported symbols - const markExported = db.prepare( - 'UPDATE nodes SET exported = 1 WHERE name = ? AND kind = ? AND file = ? AND line = ?', - ); - for (const [relPath, symbols] of allSymbols) { - for (const exp of symbols.exports) { - markExported.run(exp.name, exp.kind, relPath, exp.line); + // Mark exported symbols in batches + if (exportKeys.length > 0) { + const EXPORT_CHUNK = 500; + for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) { + const end = Math.min(i + EXPORT_CHUNK, exportKeys.length); + const chunkSize = end - i; + const conditions = Array.from( + { length: chunkSize }, + () => '(name = ? AND kind = ? AND file = ? AND line = ?)', + ).join(' OR '); + const vals: unknown[] = []; + for (let j = i; j < end; j++) { + const k = exportKeys[j] as unknown[]; + vals.push(k[0], k[1], k[2], k[3]); + } + db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`).run(...vals); } } } -// ── Phase 2: Insert children (needs parent IDs) ──────────────────────── +// ── Phase 2+3: Insert children and containment edges (single nodeIdMap pass) ── -function insertChildren( +function insertChildrenAndEdges( db: BetterSqlite3.Database, allSymbols: Map, ): void { const childRows: unknown[][] = []; + const edgeRows: unknown[][] = []; + for (const [relPath, symbols] of allSymbols) { + // Single bulkNodeIdsByFile call per file, shared across children + edges const nodeIdMap = new Map(); for (const row of bulkNodeIdsByFile(db, relPath)) { nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } + + const fileId = nodeIdMap.get(`${relPath}|file|0`); + for (const def of symbols.definitions) { - if (!def.children?.length) continue; const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); + + // Containment edge: file -> definition + if (fileId && defId) { + edgeRows.push([fileId, defId, 'contains', 1.0, 0]); + } + + if (!def.children?.length) continue; if (!defId) continue; + for (const child of def.children) { + // Child node const qualifiedName = `${def.name}.${child.name}`; childRows.push([ child.name, @@ -101,40 +126,32 @@ function insertChildren( } } } - batchInsertNodes(db, childRows); -} -// ── Phase 3: Insert containment + parameter_of edges ──────────────────── + // Insert children first (so they exist for edge lookup) + batchInsertNodes(db, childRows); -function insertContainmentEdges( - db: BetterSqlite3.Database, - allSymbols: Map, -): void { - const edgeRows: unknown[][] = []; + // Now re-fetch IDs to include newly-inserted children, then add child edges for (const [relPath, symbols] of allSymbols) { const nodeIdMap = new Map(); for (const row of bulkNodeIdsByFile(db, relPath)) { nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } - const fileId = nodeIdMap.get(`${relPath}|file|0`); for (const def of symbols.definitions) { + if (!def.children?.length) continue; const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); - if (fileId && defId) { - edgeRows.push([fileId, defId, 'contains', 1.0, 0]); - } - if (def.children?.length && defId) { - for (const child of def.children) { - const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`); - if (childId) { - edgeRows.push([defId, childId, 'contains', 1.0, 0]); - if (child.kind === 'parameter') { - edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]); - } + if (!defId) continue; + for (const child of def.children) { + const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`); + if (childId) { + edgeRows.push([defId, childId, 'contains', 1.0, 0]); + if (child.kind === 'parameter') { + edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]); } } } } } + batchInsertEdges(db, edgeRows); } @@ -217,8 +234,7 @@ export async function insertNodes(ctx: PipelineContext): Promise { const insertAll = db.transaction(() => { insertDefinitionsAndExports(db, allSymbols); - insertChildren(db, allSymbols); - insertContainmentEdges(db, allSymbols); + insertChildrenAndEdges(db, allSymbols); updateFileHashes(db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash); }); diff --git a/src/features/cfg.ts b/src/features/cfg.ts index 3ffdadfc..811864e7 100644 --- a/src/features/cfg.ts +++ b/src/features/cfg.ts @@ -94,7 +94,14 @@ async function initCfgParsers( const ext = path.extname(relPath).toLowerCase(); if (CFG_EXTENSIONS.has(ext)) { const hasNativeCfg = symbols.definitions - .filter((d) => (d.kind === 'function' || d.kind === 'method') && d.line) + .filter( + (d) => + (d.kind === 'function' || d.kind === 'method') && + d.line > 0 && + d.endLine != null && + d.endLine > d.line && + !d.name.includes('.'), + ) .every((d) => d.cfg === null || (d.cfg?.blocks?.length ?? 0) > 0); if (!hasNativeCfg) { needsFallback = true; @@ -202,7 +209,7 @@ function buildVisitorCfgMap( return nameNode ? nameNode.text : null; }, }; - const walkResults = walkWithVisitors(tree!.rootNode, [visitor], langId, walkerOpts); + const walkResults = walkWithVisitors(tree?.rootNode, [visitor], langId, walkerOpts); // biome-ignore lint/complexity/useLiteralKeys: noPropertyAccessFromIndexSignature requires bracket notation const cfgResults = (walkResults['cfg'] || []) as VisitorCfgResult[]; const visitorCfgByLine = new Map(); @@ -210,7 +217,7 @@ function buildVisitorCfgMap( if (r.funcNode) { const line = r.funcNode.startPosition.row + 1; if (!visitorCfgByLine.has(line)) visitorCfgByLine.set(line, []); - visitorCfgByLine.get(line)!.push(r); + visitorCfgByLine.get(line)?.push(r); } } return visitorCfgByLine; diff --git a/src/features/complexity.ts b/src/features/complexity.ts index bb557010..79b37ab3 100644 --- a/src/features/complexity.ts +++ b/src/features/complexity.ts @@ -50,18 +50,18 @@ export function computeHalsteadMetrics( if (!node) return; // Skip type annotation subtrees - if (rules!.skipTypes.has(node.type)) return; + if (rules?.skipTypes.has(node.type)) return; // Compound operators (non-leaf): count the node type as an operator - if (rules!.compoundOperators.has(node.type)) { + if (rules?.compoundOperators.has(node.type)) { operators.set(node.type, (operators.get(node.type) || 0) + 1); } // Leaf nodes: classify as operator or operand if (node.childCount === 0) { - if (rules!.operatorLeafTypes.has(node.type)) { + if (rules?.operatorLeafTypes.has(node.type)) { operators.set(node.type, (operators.get(node.type) || 0) + 1); - } else if (rules!.operandLeafTypes.has(node.type)) { + } else if (rules?.operandLeafTypes.has(node.type)) { const text = node.text; operands.set(text, (operands.get(text) || 0) + 1); } @@ -134,9 +134,9 @@ export function computeFunctionComplexity( if (nestingLevel > maxNesting) maxNesting = nestingLevel; // Handle logical operators in binary expressions - if (type === rules!.logicalNodeType) { + if (type === rules?.logicalNodeType) { const op = node.child(1)?.type; - if (op && rules!.logicalOperators.has(op)) { + if (op && rules?.logicalOperators.has(op)) { // Cyclomatic: +1 for every logical operator cyclomatic++; @@ -144,7 +144,7 @@ export function computeFunctionComplexity( // Walk up to check if parent is same type with same operator const parent = node.parent; let sameSequence = false; - if (parent && parent.type === rules!.logicalNodeType) { + if (parent && parent.type === rules?.logicalNodeType) { const parentOp = parent.child(1)?.type; if (parentOp === op) { sameSequence = true; @@ -163,16 +163,16 @@ export function computeFunctionComplexity( } // Handle optional chaining (cyclomatic only) - if (type === rules!.optionalChainType) { + if (type === rules?.optionalChainType) { cyclomatic++; } // Handle branch/control flow nodes (skip keyword leaf tokens like Ruby's `if`) - if (rules!.branchNodes.has(type) && node.childCount > 0) { + if (rules?.branchNodes.has(type) && node.childCount > 0) { // Pattern A: else clause wraps if (JS/C#/Rust) - if (rules!.elseNodeType && type === rules!.elseNodeType) { + if (rules?.elseNodeType && type === rules?.elseNodeType) { const firstChild = node.namedChild(0); - if (firstChild && firstChild.type === rules!.ifNodeType) { + if (firstChild && firstChild.type === rules?.ifNodeType) { // else-if: the if_statement child handles its own increment for (let i = 0; i < node.childCount; i++) { walk(node.child(i), nestingLevel, false); @@ -188,7 +188,7 @@ export function computeFunctionComplexity( } // Pattern B: explicit elif node (Python/Ruby/PHP) - if (rules!.elifNodeType && type === rules!.elifNodeType) { + if (rules?.elifNodeType && type === rules?.elifNodeType) { cognitive++; cyclomatic++; for (let i = 0; i < node.childCount; i++) { @@ -199,15 +199,15 @@ export function computeFunctionComplexity( // Detect else-if via Pattern A or C let isElseIf = false; - if (type === rules!.ifNodeType) { - if (rules!.elseViaAlternative) { + if (type === rules?.ifNodeType) { + if (rules?.elseViaAlternative) { // Pattern C (Go/Java): if_statement is the alternative of parent if_statement isElseIf = - node.parent?.type === rules!.ifNodeType && + node.parent?.type === rules?.ifNodeType && node.parent.childForFieldName('alternative')?.id === node.id; - } else if (rules!.elseNodeType) { + } else if (rules?.elseNodeType) { // Pattern A (JS/C#/Rust): if_statement inside else_clause - isElseIf = node.parent?.type === rules!.elseNodeType; + isElseIf = node.parent?.type === rules?.elseNodeType; } } @@ -225,11 +225,11 @@ export function computeFunctionComplexity( cyclomatic++; // Switch-like nodes don't add cyclomatic themselves (cases do) - if (rules!.switchLikeNodes?.has(type)) { + if (rules?.switchLikeNodes?.has(type)) { cyclomatic--; // Undo the ++ above; cases handle cyclomatic } - if (rules!.nestingNodes.has(type)) { + if (rules?.nestingNodes.has(type)) { for (let i = 0; i < node.childCount; i++) { walk(node.child(i), nestingLevel + 1, false); } @@ -239,9 +239,9 @@ export function computeFunctionComplexity( // Pattern C plain else: block that is the alternative of an if_statement (Go/Java) if ( - rules!.elseViaAlternative && - type !== rules!.ifNodeType && - node.parent?.type === rules!.ifNodeType && + rules?.elseViaAlternative && + type !== rules?.ifNodeType && + node.parent?.type === rules?.ifNodeType && node.parent.childForFieldName('alternative')?.id === node.id ) { cognitive++; @@ -252,12 +252,12 @@ export function computeFunctionComplexity( } // Handle case nodes (cyclomatic only, skip keyword leaves) - if (rules!.caseNodes.has(type) && node.childCount > 0) { + if (rules?.caseNodes.has(type) && node.childCount > 0) { cyclomatic++; } // Handle nested function definitions (increase nesting) - if (!isTopFunction && rules!.functionNodes.has(type)) { + if (!isTopFunction && rules?.functionNodes.has(type)) { for (let i = 0; i < node.childCount; i++) { walk(node.child(i), nestingLevel + 1, false); } @@ -305,7 +305,7 @@ export function computeAllMetrics( nestingNodeTypes: nestingNodes, }); - const rawResult = results['complexity'] as { + const rawResult = results.complexity as { cognitive: number; cyclomatic: number; maxNesting: number; @@ -359,8 +359,16 @@ async function initWasmParsersIfNeeded( if (!symbols._tree) { const ext = path.extname(relPath).toLowerCase(); if (!COMPLEXITY_EXTENSIONS.has(ext)) continue; + // Only consider definitions with real function bodies (non-dotted names, + // multi-line span). Interface/type property signatures are extracted as + // methods but correctly lack complexity data from the native engine. const hasPrecomputed = symbols.definitions.every( - (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, + (d) => + (d.kind !== 'function' && d.kind !== 'method') || + d.complexity || + d.name.includes('.') || + !d.endLine || + d.endLine <= d.line, ); if (!hasPrecomputed) { const { createParsers } = await import('../domain/parser.js'); @@ -427,13 +435,13 @@ function upsertPrecomputedComplexity( ): number { const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); if (!nodeId) return 0; - const ch = def.complexity!.halstead; - const cl = def.complexity!.loc; + const ch = def.complexity?.halstead; + const cl = def.complexity?.loc; upsert.run( nodeId, - def.complexity!.cognitive, - def.complexity!.cyclomatic, - def.complexity!.maxNesting ?? 0, + def.complexity?.cognitive, + def.complexity?.cyclomatic, + def.complexity?.maxNesting ?? 0, cl ? cl.loc : 0, cl ? cl.sloc : 0, cl ? cl.commentLines : 0, @@ -447,7 +455,7 @@ function upsertPrecomputedComplexity( ch ? ch.difficulty : 0, ch ? ch.effort : 0, ch ? ch.bugs : 0, - def.complexity!.maintainabilityIndex ?? 0, + def.complexity?.maintainabilityIndex ?? 0, ); return 1; } @@ -681,7 +689,7 @@ export function complexityData( // Check if graph has nodes even though complexity table is missing/empty let hasGraph = false; try { - hasGraph = db.prepare<{ c: number }>('SELECT COUNT(*) as c FROM nodes').get()!.c > 0; + hasGraph = db.prepare<{ c: number }>('SELECT COUNT(*) as c FROM nodes').get()?.c > 0; } catch (e2: unknown) { debug(`nodes table check failed: ${(e2 as Error).message}`); } @@ -693,22 +701,25 @@ export function complexityData( const functions = filtered.map((r) => { const exceeds: string[] = []; - if (isValidThreshold(thresholds.cognitive?.warn) && r.cognitive >= thresholds.cognitive.warn!) + if ( + isValidThreshold(thresholds.cognitive?.warn) && + r.cognitive >= (thresholds.cognitive?.warn ?? 0) + ) exceeds.push('cognitive'); if ( isValidThreshold(thresholds.cyclomatic?.warn) && - r.cyclomatic >= thresholds.cyclomatic.warn! + r.cyclomatic >= (thresholds.cyclomatic?.warn ?? 0) ) exceeds.push('cyclomatic'); if ( isValidThreshold(thresholds.maxNesting?.warn) && - r.max_nesting >= thresholds.maxNesting.warn! + r.max_nesting >= (thresholds.maxNesting?.warn ?? 0) ) exceeds.push('maxNesting'); if ( isValidThreshold(thresholds.maintainabilityIndex?.warn) && r.maintainability_index > 0 && - r.maintainability_index <= thresholds.maintainabilityIndex.warn! + r.maintainability_index <= (thresholds.maintainabilityIndex?.warn ?? 0) ) exceeds.push('maintainabilityIndex'); @@ -766,14 +777,14 @@ export function complexityData( aboveWarn: allRows.filter( (r) => (isValidThreshold(thresholds.cognitive?.warn) && - r.cognitive >= thresholds.cognitive.warn!) || + r.cognitive >= (thresholds.cognitive?.warn ?? 0)) || (isValidThreshold(thresholds.cyclomatic?.warn) && - r.cyclomatic >= thresholds.cyclomatic.warn!) || + r.cyclomatic >= (thresholds.cyclomatic?.warn ?? 0)) || (isValidThreshold(thresholds.maxNesting?.warn) && - r.max_nesting >= thresholds.maxNesting.warn!) || + r.max_nesting >= (thresholds.maxNesting?.warn ?? 0)) || (isValidThreshold(thresholds.maintainabilityIndex?.warn) && r.maintainability_index > 0 && - r.maintainability_index <= thresholds.maintainabilityIndex.warn!), + r.maintainability_index <= (thresholds.maintainabilityIndex?.warn ?? 0)), ).length, }; } @@ -785,7 +796,7 @@ export function complexityData( let hasGraph = false; if (summary === null) { try { - hasGraph = db.prepare<{ c: number }>('SELECT COUNT(*) as c FROM nodes').get()!.c > 0; + hasGraph = db.prepare<{ c: number }>('SELECT COUNT(*) as c FROM nodes').get()?.c > 0; } catch (e: unknown) { debug(`nodes table check failed: ${(e as Error).message}`); } diff --git a/src/features/structure.ts b/src/features/structure.ts index 099d03bb..94acd13f 100644 --- a/src/features/structure.ts +++ b/src/features/structure.ts @@ -39,7 +39,7 @@ function cleanupPreviousData( changedFiles: string[] | null, ): void { if (isIncremental) { - const affectedDirs = getAncestorDirs(changedFiles!); + const affectedDirs = getAncestorDirs(changedFiles ?? []); const deleteContainsForDir = db.prepare( "DELETE FROM edges WHERE kind = 'contains' AND source_id IN (SELECT id FROM nodes WHERE name = ? AND kind = 'directory')", ); @@ -48,7 +48,7 @@ function cleanupPreviousData( for (const dir of affectedDirs) { deleteContainsForDir.run(dir); } - for (const f of changedFiles!) { + for (const f of changedFiles ?? []) { const fileRow = getNodeIdStmt.get(f, 'file', f, 0); if (fileRow) deleteMetricForNode.run(fileRow.id); } @@ -102,7 +102,7 @@ function insertContainsEdges( changedFiles: string[] | null, ): void { const isIncremental = changedFiles != null && changedFiles.length > 0; - const affectedDirs = isIncremental ? getAncestorDirs(changedFiles!) : null; + const affectedDirs = isIncremental ? getAncestorDirs(changedFiles ?? []) : null; db.transaction(() => { for (const relPath of fileSymbols.keys()) { @@ -218,7 +218,7 @@ function computeDirectoryMetrics( let d = normalizePath(path.dirname(relPath)); while (d && d !== '.') { if (dirFiles.has(d)) { - dirFiles.get(d)!.push(relPath); + dirFiles.get(d)?.push(relPath); } d = normalizePath(path.dirname(d)); } @@ -228,7 +228,7 @@ function computeDirectoryMetrics( for (const [dir, files] of dirFiles) { for (const f of files) { if (!fileToAncestorDirs.has(f)) fileToAncestorDirs.set(f, new Set()); - fileToAncestorDirs.get(f)!.add(dir); + fileToAncestorDirs.get(f)?.add(dir); } } @@ -408,21 +408,21 @@ export function classifyNodeRoles(db: BetterSqlite3Database): RoleSummary { fan_out: number; }[]; - if (rows.length === 0) { - return { - entry: 0, - core: 0, - utility: 0, - adapter: 0, - dead: 0, - 'dead-leaf': 0, - 'dead-entry': 0, - 'dead-ffi': 0, - 'dead-unresolved': 0, - 'test-only': 0, - leaf: 0, - }; - } + const emptySummary: RoleSummary = { + entry: 0, + core: 0, + utility: 0, + adapter: 0, + dead: 0, + 'dead-leaf': 0, + 'dead-entry': 0, + 'dead-ffi': 0, + 'dead-unresolved': 0, + 'test-only': 0, + leaf: 0, + }; + + if (rows.length === 0) return emptySummary; const exportedIds = new Set( ( @@ -468,35 +468,34 @@ export function classifyNodeRoles(db: BetterSqlite3Database): RoleSummary { const roleMap = classifyRoles(classifierInput); - // Build summary and updates - const summary: RoleSummary = { - entry: 0, - core: 0, - utility: 0, - adapter: 0, - dead: 0, - 'dead-leaf': 0, - 'dead-entry': 0, - 'dead-ffi': 0, - 'dead-unresolved': 0, - 'test-only': 0, - leaf: 0, - }; - const updates: { id: number; role: string }[] = []; + // Build summary and group updates by role for batch UPDATE + const summary: RoleSummary = { ...emptySummary }; + const idsByRole = new Map(); for (const row of rows) { const role = roleMap.get(String(row.id)) || 'leaf'; - updates.push({ id: row.id, role }); if (role.startsWith('dead')) summary.dead++; summary[role] = (summary[role] || 0) + 1; + let ids = idsByRole.get(role); + if (!ids) { + ids = []; + idsByRole.set(role, ids); + } + ids.push(row.id); } - const clearRoles = db.prepare('UPDATE nodes SET role = NULL'); - const setRole = db.prepare('UPDATE nodes SET role = ? WHERE id = ?'); - + // Batch UPDATE: one statement per role instead of one per node + const ROLE_CHUNK = 500; db.transaction(() => { - clearRoles.run(); - for (const u of updates) { - setRole.run(u.role, u.id); + db.prepare('UPDATE nodes SET role = NULL').run(); + for (const [role, ids] of idsByRole) { + for (let i = 0; i < ids.length; i += ROLE_CHUNK) { + const end = Math.min(i + ROLE_CHUNK, ids.length); + const chunkSize = end - i; + const placeholders = Array.from({ length: chunkSize }, () => '?').join(','); + const vals: unknown[] = [role]; + for (let j = i; j < end; j++) vals.push(ids[j]); + db.prepare(`UPDATE nodes SET role = ? WHERE id IN (${placeholders})`).run(...vals); + } } })(); @@ -744,8 +743,8 @@ export function hotspotsData( WHERE n.kind = ? ${testFilter} ORDER BY (COALESCE(nm.fan_in, 0) + COALESCE(nm.fan_out, 0)) DESC NULLS LAST LIMIT ?`), }; - const stmt = HOTSPOT_QUERIES[metric] ?? HOTSPOT_QUERIES['fan-in']!; - const rows = stmt!.all(kind, limit); + const stmt = HOTSPOT_QUERIES[metric] ?? HOTSPOT_QUERIES['fan-in']; + const rows = stmt?.all(kind, limit); const hotspots = rows.map((r) => ({ name: r.name, @@ -760,9 +759,9 @@ export function hotspotsData( fileCount: r.file_count, density: (r.file_count ?? 0) > 0 - ? (r.symbol_count || 0) / r.file_count! + ? (r.symbol_count || 0) / (r.file_count ?? 1) : (r.line_count ?? 0) > 0 - ? (r.symbol_count || 0) / r.line_count! + ? (r.symbol_count || 0) / (r.line_count ?? 1) : 0, coupling: (r.fan_in || 0) + (r.fan_out || 0), })); @@ -863,8 +862,8 @@ function getSortFn(sortBy: string): (a: DirRow, b: DirRow) => number { return (a, b) => (b.fan_out || 0) - (a.fan_out || 0); case 'density': return (a, b) => { - const da = (a.file_count ?? 0) > 0 ? (a.symbol_count || 0) / a.file_count! : 0; - const db_ = (b.file_count ?? 0) > 0 ? (b.symbol_count || 0) / b.file_count! : 0; + const da = (a.file_count ?? 0) > 0 ? (a.symbol_count || 0) / (a.file_count ?? 1) : 0; + const db_ = (b.file_count ?? 0) > 0 ? (b.symbol_count || 0) / (b.file_count ?? 1) : 0; return db_ - da; }; default: From c22b7ebb6bd5e1861c3e6e388b3329665ee62d0b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 17:29:45 -0600 Subject: [PATCH 3/6] fix(structure): remove superfluous optional chaining on hotspot query stmt (#606) Impact: 1 functions changed, 0 affected --- src/features/structure.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/features/structure.ts b/src/features/structure.ts index 94acd13f..12119b48 100644 --- a/src/features/structure.ts +++ b/src/features/structure.ts @@ -744,7 +744,7 @@ export function hotspotsData( }; const stmt = HOTSPOT_QUERIES[metric] ?? HOTSPOT_QUERIES['fan-in']; - const rows = stmt?.all(kind, limit); + const rows = stmt.all(kind, limit); const hotspots = rows.map((r) => ({ name: r.name, From 80100cdd44e2e151d91a971019c2db5045a9eea8 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 17:29:56 -0600 Subject: [PATCH 4/6] fix(builder): cache export-marking UPDATE statement per chunk size (#606) Impact: 1 functions changed, 1 affected --- .../graph/builder/stages/insert-nodes.ts | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/domain/graph/builder/stages/insert-nodes.ts b/src/domain/graph/builder/stages/insert-nodes.ts index d8c340f0..fbbdf5fa 100644 --- a/src/domain/graph/builder/stages/insert-nodes.ts +++ b/src/domain/graph/builder/stages/insert-nodes.ts @@ -60,22 +60,28 @@ function insertDefinitionsAndExports( } batchInsertNodes(db, phase1Rows); - // Mark exported symbols in batches + // Mark exported symbols in batches (cache prepared statements by chunk size) if (exportKeys.length > 0) { const EXPORT_CHUNK = 500; + const exportStmtCache = new Map(); for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) { const end = Math.min(i + EXPORT_CHUNK, exportKeys.length); const chunkSize = end - i; - const conditions = Array.from( - { length: chunkSize }, - () => '(name = ? AND kind = ? AND file = ? AND line = ?)', - ).join(' OR '); + let updateStmt = exportStmtCache.get(chunkSize); + if (!updateStmt) { + const conditions = Array.from( + { length: chunkSize }, + () => '(name = ? AND kind = ? AND file = ? AND line = ?)', + ).join(' OR '); + updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`); + exportStmtCache.set(chunkSize, updateStmt); + } const vals: unknown[] = []; for (let j = i; j < end; j++) { const k = exportKeys[j] as unknown[]; vals.push(k[0], k[1], k[2], k[3]); } - db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`).run(...vals); + updateStmt.run(...vals); } } } From 828157dfa8c3079e145136d1432e9efdd80d4e2c Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 17:33:37 -0600 Subject: [PATCH 5/6] fix(types): resolve TS strict-mode errors in structure.ts and complexity.ts (#606) Impact: 3 functions changed, 2 affected --- src/features/complexity.ts | 7 ++++--- src/features/structure.ts | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/features/complexity.ts b/src/features/complexity.ts index 79b37ab3..559238d9 100644 --- a/src/features/complexity.ts +++ b/src/features/complexity.ts @@ -305,7 +305,8 @@ export function computeAllMetrics( nestingNodeTypes: nestingNodes, }); - const rawResult = results.complexity as { + // biome-ignore lint/complexity/useLiteralKeys: noPropertyAccessFromIndexSignature requires bracket notation + const rawResult = results['complexity'] as { cognitive: number; cyclomatic: number; maxNesting: number; @@ -689,7 +690,7 @@ export function complexityData( // Check if graph has nodes even though complexity table is missing/empty let hasGraph = false; try { - hasGraph = db.prepare<{ c: number }>('SELECT COUNT(*) as c FROM nodes').get()?.c > 0; + hasGraph = (db.prepare<{ c: number }>('SELECT COUNT(*) as c FROM nodes').get()?.c ?? 0) > 0; } catch (e2: unknown) { debug(`nodes table check failed: ${(e2 as Error).message}`); } @@ -796,7 +797,7 @@ export function complexityData( let hasGraph = false; if (summary === null) { try { - hasGraph = db.prepare<{ c: number }>('SELECT COUNT(*) as c FROM nodes').get()?.c > 0; + hasGraph = (db.prepare<{ c: number }>('SELECT COUNT(*) as c FROM nodes').get()?.c ?? 0) > 0; } catch (e: unknown) { debug(`nodes table check failed: ${(e as Error).message}`); } diff --git a/src/features/structure.ts b/src/features/structure.ts index 12119b48..d649d2c8 100644 --- a/src/features/structure.ts +++ b/src/features/structure.ts @@ -744,7 +744,8 @@ export function hotspotsData( }; const stmt = HOTSPOT_QUERIES[metric] ?? HOTSPOT_QUERIES['fan-in']; - const rows = stmt.all(kind, limit); + // stmt is always defined: metric is a valid key or the fallback is a concrete property + const rows = stmt!.all(kind, limit); const hotspots = rows.map((r) => ({ name: r.name, From b4f4ce2e999c91c295b7c0eab431cb63ec0afd5d Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 25 Mar 2026 18:20:43 -0600 Subject: [PATCH 6/6] fix: correct misleading comments and cache role UPDATE stmts (#606) - Fix misleading "single nodeIdMap pass" comment in insertChildrenAndEdges (actually two passes: one before and one after batchInsertNodes) - Cache role UPDATE prepared statements per chunk size in classifyNodeRoles, consistent with WeakMap-based caching pattern used in helpers.ts Impact: 2 functions changed, 4 affected --- src/domain/graph/builder/stages/insert-nodes.ts | 4 ++-- src/features/structure.ts | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/domain/graph/builder/stages/insert-nodes.ts b/src/domain/graph/builder/stages/insert-nodes.ts index fbbdf5fa..a7e06229 100644 --- a/src/domain/graph/builder/stages/insert-nodes.ts +++ b/src/domain/graph/builder/stages/insert-nodes.ts @@ -86,7 +86,7 @@ function insertDefinitionsAndExports( } } -// ── Phase 2+3: Insert children and containment edges (single nodeIdMap pass) ── +// ── Phase 2+3: Insert children and containment edges (two nodeIdMap passes) ── function insertChildrenAndEdges( db: BetterSqlite3.Database, @@ -96,7 +96,7 @@ function insertChildrenAndEdges( const edgeRows: unknown[][] = []; for (const [relPath, symbols] of allSymbols) { - // Single bulkNodeIdsByFile call per file, shared across children + edges + // First pass: collect file→def edges and child rows const nodeIdMap = new Map(); for (const row of bulkNodeIdsByFile(db, relPath)) { nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); diff --git a/src/features/structure.ts b/src/features/structure.ts index d649d2c8..9976907f 100644 --- a/src/features/structure.ts +++ b/src/features/structure.ts @@ -485,16 +485,22 @@ export function classifyNodeRoles(db: BetterSqlite3Database): RoleSummary { // Batch UPDATE: one statement per role instead of one per node const ROLE_CHUNK = 500; + const roleStmtCache = new Map(); db.transaction(() => { db.prepare('UPDATE nodes SET role = NULL').run(); for (const [role, ids] of idsByRole) { for (let i = 0; i < ids.length; i += ROLE_CHUNK) { const end = Math.min(i + ROLE_CHUNK, ids.length); const chunkSize = end - i; - const placeholders = Array.from({ length: chunkSize }, () => '?').join(','); + let stmt = roleStmtCache.get(chunkSize); + if (!stmt) { + const placeholders = Array.from({ length: chunkSize }, () => '?').join(','); + stmt = db.prepare(`UPDATE nodes SET role = ? WHERE id IN (${placeholders})`); + roleStmtCache.set(chunkSize, stmt); + } const vals: unknown[] = [role]; for (let j = i; j < end; j++) vals.push(ids[j]); - db.prepare(`UPDATE nodes SET role = ? WHERE id IN (${placeholders})`).run(...vals); + stmt.run(...vals); } } })();