From e95a6de9a674bf399a06929f85172bc0ae4148dc Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:55:44 -0600 Subject: [PATCH 1/8] perf(build): native Rust/rusqlite for roles classification and edge insertion (6.12) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Roles: move classifyNodeRolesFull/Incremental SQL + classification logic to Rust (roles_db.rs). Single rusqlite connection runs fan-in/fan-out queries, computes medians, classifies roles, and batch-updates nodes — eliminates ~10 JS<->SQLite round-trips. Edges: add bulk_insert_edges (edges_db.rs) that writes computed edges directly to SQLite via rusqlite instead of marshaling back to JS. Restructure buildEdges to run edge computation in better-sqlite3 transaction, then native insert outside to avoid connection contention. 1-file regression fix: skip native call-edge path for small incremental builds (≤3 files) where napi-rs marshaling overhead exceeds savings. Both paths fall back gracefully to JS when native is unavailable. --- crates/codegraph-core/src/edges_db.rs | 79 +++ crates/codegraph-core/src/lib.rs | 2 + crates/codegraph-core/src/roles_db.rs | 558 ++++++++++++++++++ .../graph/builder/stages/build-edges.ts | 43 +- .../graph/builder/stages/build-structure.ts | 48 +- src/types.ts | 39 ++ 6 files changed, 754 insertions(+), 15 deletions(-) create mode 100644 crates/codegraph-core/src/edges_db.rs create mode 100644 crates/codegraph-core/src/roles_db.rs diff --git a/crates/codegraph-core/src/edges_db.rs b/crates/codegraph-core/src/edges_db.rs new file mode 100644 index 00000000..ceb5d36b --- /dev/null +++ b/crates/codegraph-core/src/edges_db.rs @@ -0,0 +1,79 @@ +//! Bulk edge insertion via rusqlite — native replacement for the JS +//! `batchInsertEdges` helper. +//! +//! Used by the build-edges stage to write computed call/receiver/extends/ +//! implements edges directly to SQLite without marshaling back to JS. + +use napi_derive::napi; +use rusqlite::{params, Connection, OpenFlags}; + +/// A single edge row to insert: [source_id, target_id, kind, confidence, dynamic]. +#[napi(object)] +#[derive(Debug, Clone)] +pub struct EdgeRow { + #[napi(js_name = "sourceId")] + pub source_id: u32, + #[napi(js_name = "targetId")] + pub target_id: u32, + pub kind: String, + pub confidence: f64, + pub dynamic: u32, +} + +/// Bulk-insert edge rows into the database via rusqlite. +/// Runs all writes in a single SQLite transaction with chunked multi-value +/// INSERT statements for maximum throughput. +/// +/// Returns `true` on success, `false` on any error so the JS caller can +/// fall back to the JS batch insert path. +#[napi] +pub fn bulk_insert_edges(db_path: String, edges: Vec) -> bool { + if edges.is_empty() { + return true; + } + let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; + let mut conn = match Connection::open_with_flags(&db_path, flags) { + Ok(c) => c, + Err(_) => return false, + }; + let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); + do_insert(&mut conn, &edges).is_ok() +} + +const CHUNK: usize = 200; + +fn do_insert(conn: &mut Connection, edges: &[EdgeRow]) -> rusqlite::Result<()> { + let tx = conn.transaction()?; + + for chunk in edges.chunks(CHUNK) { + let placeholders: Vec = (0..chunk.len()) + .map(|i| { + let base = i * 5; + format!( + "(?{},?{},?{},?{},?{})", + base + 1, + base + 2, + base + 3, + base + 4, + base + 5 + ) + }) + .collect(); + let sql = format!( + "INSERT OR IGNORE INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES {}", + placeholders.join(",") + ); + let mut stmt = tx.prepare_cached(&sql)?; + for (i, edge) in chunk.iter().enumerate() { + let base = i * 5; + stmt.raw_bind_parameter(base as i32 + 1, edge.source_id)?; + stmt.raw_bind_parameter(base as i32 + 2, edge.target_id)?; + stmt.raw_bind_parameter(base as i32 + 3, edge.kind.as_str())?; + stmt.raw_bind_parameter(base as i32 + 4, edge.confidence)?; + stmt.raw_bind_parameter(base as i32 + 5, edge.dynamic)?; + } + stmt.raw_execute()?; + } + + tx.commit() +} diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index 533fb2df..00da9ec7 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -5,12 +5,14 @@ pub mod constants; pub mod cycles; pub mod dataflow; pub mod edge_builder; +pub mod edges_db; pub mod extractors; pub mod import_resolution; pub mod incremental; pub mod insert_nodes; pub mod parallel; pub mod parser_registry; +pub mod roles_db; pub mod types; use napi_derive::napi; diff --git a/crates/codegraph-core/src/roles_db.rs b/crates/codegraph-core/src/roles_db.rs new file mode 100644 index 00000000..e1a49207 --- /dev/null +++ b/crates/codegraph-core/src/roles_db.rs @@ -0,0 +1,558 @@ +//! Native role classification via rusqlite. +//! +//! Replaces the JS `classifyNodeRolesFull` / `classifyNodeRolesIncremental` +//! functions: runs fan-in/fan-out queries, computes medians, classifies roles, +//! and batch-updates nodes — all in a single Rust function with one DB +//! connection, eliminating JS<->SQLite round-trips. + +use std::collections::HashMap; + +use napi_derive::napi; +use rusqlite::{params, Connection, OpenFlags}; + +// ── Constants ──────────────────────────────────────────────────────── + +const FRAMEWORK_ENTRY_PREFIXES: &[&str] = &["route:", "event:", "command:"]; + +const LEAF_KINDS: &[&str] = &["parameter", "property", "constant"]; + +const TEST_FILE_PATTERNS: &[&str] = &[ + "%.test.%", + "%.spec.%", + "%__test__%", + "%__tests__%", + "%.stories.%", +]; + +// ── Output types ───────────────────────────────────────────────────── + +#[napi(object)] +#[derive(Debug, Clone, Default)] +pub struct RoleSummary { + pub entry: u32, + pub core: u32, + pub utility: u32, + pub adapter: u32, + pub dead: u32, + #[napi(js_name = "deadLeaf")] + pub dead_leaf: u32, + #[napi(js_name = "deadEntry")] + pub dead_entry: u32, + #[napi(js_name = "deadFfi")] + pub dead_ffi: u32, + #[napi(js_name = "deadUnresolved")] + pub dead_unresolved: u32, + #[napi(js_name = "testOnly")] + pub test_only: u32, + pub leaf: u32, +} + +// ── Public napi entry points ───────────────────────────────────────── + +/// Full role classification: queries all nodes, computes fan-in/fan-out, +/// classifies roles, and batch-updates the `role` column. +/// Returns a summary of role counts, or null on failure. +#[napi] +pub fn classify_roles_full(db_path: String) -> Option { + let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; + let mut conn = Connection::open_with_flags(&db_path, flags).ok()?; + let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); + do_classify_full(&mut conn).ok() +} + +/// Incremental role classification: only reclassifies nodes from changed files +/// plus their immediate edge neighbours. +/// Returns a summary of role counts for the affected nodes, or null on failure. +#[napi] +pub fn classify_roles_incremental( + db_path: String, + changed_files: Vec, +) -> Option { + let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; + let mut conn = Connection::open_with_flags(&db_path, flags).ok()?; + let _ = conn.execute_batch("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000"); + do_classify_incremental(&mut conn, &changed_files).ok() +} + +// ── Shared helpers ─────────────────────────────────────────────────── + +fn median(sorted: &[u32]) -> u32 { + if sorted.is_empty() { + return 0; + } + let mid = sorted.len() / 2; + if sorted.len() % 2 == 0 { + (sorted[mid - 1] + sorted[mid]) / 2 + } else { + sorted[mid] + } +} + +/// Dead sub-role classification matching JS `classifyDeadSubRole`. +fn classify_dead_sub_role(name: &str, kind: &str, file: &str) -> &'static str { + // Leaf kinds + if LEAF_KINDS.iter().any(|k| *k == kind) { + return "dead-leaf"; + } + // FFI boundary + let ffi_exts = [".rs", ".c", ".cpp", ".h", ".go", ".java", ".cs"]; + if ffi_exts.iter().any(|ext| file.ends_with(ext)) { + return "dead-ffi"; + } + // Framework entry points + if FRAMEWORK_ENTRY_PREFIXES + .iter() + .any(|p| name.starts_with(p)) + { + return "dead-entry"; + } + "dead-unresolved" +} + +/// Classify a single node into a role. +fn classify_node( + name: &str, + kind: &str, + file: &str, + fan_in: u32, + fan_out: u32, + is_exported: bool, + production_fan_in: u32, + median_fan_in: u32, + median_fan_out: u32, +) -> &'static str { + // Framework entry + if FRAMEWORK_ENTRY_PREFIXES + .iter() + .any(|p| name.starts_with(p)) + { + return "entry"; + } + + if fan_in == 0 && !is_exported { + // Test-only check: if node has test fan-in but zero total fan-in it's + // classified in the dead sub-role path (JS mirrors this) + return classify_dead_sub_role(name, kind, file); + } + + if fan_in == 0 && is_exported { + return "entry"; + } + + // Test-only: has callers but all are in test files + if fan_in > 0 && production_fan_in == 0 { + return "test-only"; + } + + let high_in = fan_in >= median_fan_in && fan_in > 0; + let high_out = fan_out >= median_fan_out && fan_out > 0; + + if high_in && !high_out { + "core" + } else if high_in && high_out { + "utility" + } else if !high_in && high_out { + "adapter" + } else { + "leaf" + } +} + +fn increment_summary(summary: &mut RoleSummary, role: &str) { + match role { + "entry" => summary.entry += 1, + "core" => summary.core += 1, + "utility" => summary.utility += 1, + "adapter" => summary.adapter += 1, + "leaf" => summary.leaf += 1, + "test-only" => summary.test_only += 1, + "dead-leaf" => { + summary.dead += 1; + summary.dead_leaf += 1; + } + "dead-ffi" => { + summary.dead += 1; + summary.dead_ffi += 1; + } + "dead-entry" => { + summary.dead += 1; + summary.dead_entry += 1; + } + "dead-unresolved" => { + summary.dead += 1; + summary.dead_unresolved += 1; + } + _ => summary.leaf += 1, + } +} + +/// Batch UPDATE nodes SET role = ? WHERE id IN (...) using chunked statements. +fn batch_update_roles( + tx: &rusqlite::Transaction, + ids_by_role: &HashMap<&str, Vec>, +) -> rusqlite::Result<()> { + const CHUNK: usize = 500; + + for (role, ids) in ids_by_role { + for chunk in ids.chunks(CHUNK) { + let placeholders: String = chunk.iter().map(|_| "?").collect::>().join(","); + let sql = format!("UPDATE nodes SET role = ?1 WHERE id IN ({})", placeholders); + let mut stmt = tx.prepare_cached(&sql)?; + // Bind role as param 1, then each id + stmt.raw_bind_parameter(1, *role)?; + for (i, id) in chunk.iter().enumerate() { + stmt.raw_bind_parameter(i as i32 + 2, *id)?; + } + stmt.raw_execute()?; + } + } + Ok(()) +} + +// ── Full classification ────────────────────────────────────────────── + +fn do_classify_full(conn: &mut Connection) -> rusqlite::Result { + let tx = conn.transaction()?; + let mut summary = RoleSummary::default(); + + // 1. Leaf kinds → dead-leaf (skip expensive fan-in/fan-out JOINs) + let leaf_ids: Vec = { + let mut stmt = + tx.prepare("SELECT id FROM nodes WHERE kind IN ('parameter', 'property')")?; + let rows = stmt.query_map([], |row| row.get::<_, i64>(0))?; + rows.filter_map(|r| r.ok()).collect() + }; + + // 2. Fan-in/fan-out for callable nodes + let rows: Vec<(i64, String, String, String, u32, u32)> = { + let mut stmt = tx.prepare( + "SELECT n.id, n.name, n.kind, n.file, + COALESCE(fi.cnt, 0) AS fan_in, + COALESCE(fo.cnt, 0) AS fan_out + FROM nodes n + LEFT JOIN ( + SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id + ) fi ON n.id = fi.target_id + LEFT JOIN ( + SELECT source_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY source_id + ) fo ON n.id = fo.source_id + WHERE n.kind NOT IN ('file', 'directory', 'parameter', 'property')", + )?; + let mapped = stmt.query_map([], |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + row.get::<_, String>(3)?, + row.get::<_, u32>(4)?, + row.get::<_, u32>(5)?, + )) + })?; + mapped.filter_map(|r| r.ok()).collect() + }; + + if rows.is_empty() && leaf_ids.is_empty() { + tx.commit()?; + return Ok(summary); + } + + // 3. Exported IDs (cross-file callers) + let exported_ids: std::collections::HashSet = { + let mut stmt = tx.prepare( + "SELECT DISTINCT e.target_id + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE e.kind = 'calls' AND caller.file != target.file", + )?; + let rows = stmt.query_map([], |row| row.get::<_, i64>(0))?; + rows.filter_map(|r| r.ok()).collect() + }; + + // 4. Production fan-in (excluding test files) + let prod_fan_in: HashMap = { + let test_filter = TEST_FILE_PATTERNS + .iter() + .map(|p| format!("AND caller.file NOT LIKE '{}'", p)) + .collect::>() + .join(" "); + let sql = format!( + "SELECT e.target_id, COUNT(*) AS cnt + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + WHERE e.kind = 'calls' {} + GROUP BY e.target_id", + test_filter + ); + let mut stmt = tx.prepare(&sql)?; + let mapped = stmt.query_map([], |row| Ok((row.get::<_, i64>(0)?, row.get::<_, u32>(1)?)))?; + mapped.filter_map(|r| r.ok()).collect() + }; + + // 5. Compute medians from non-zero values + let mut fan_in_vals: Vec = rows.iter().map(|r| r.4).filter(|&v| v > 0).collect(); + let mut fan_out_vals: Vec = rows.iter().map(|r| r.5).filter(|&v| v > 0).collect(); + fan_in_vals.sort_unstable(); + fan_out_vals.sort_unstable(); + let median_fan_in = median(&fan_in_vals); + let median_fan_out = median(&fan_out_vals); + + // 6. Classify and collect IDs by role + let mut ids_by_role: HashMap<&str, Vec> = HashMap::new(); + + if !leaf_ids.is_empty() { + summary.dead += leaf_ids.len() as u32; + summary.dead_leaf += leaf_ids.len() as u32; + ids_by_role.insert("dead-leaf", leaf_ids); + } + + for (id, name, kind, file, fan_in, fan_out) in &rows { + let is_exported = exported_ids.contains(id); + let prod_fi = prod_fan_in.get(id).copied().unwrap_or(0); + let role = classify_node( + name, + kind, + file, + *fan_in, + *fan_out, + is_exported, + prod_fi, + median_fan_in, + median_fan_out, + ); + increment_summary(&mut summary, role); + ids_by_role.entry(role).or_default().push(*id); + } + + // 7. Batch UPDATE: reset all roles then set per-role + tx.execute("UPDATE nodes SET role = NULL", [])?; + batch_update_roles(&tx, &ids_by_role)?; + + tx.commit()?; + Ok(summary) +} + +// ── Incremental classification ─────────────────────────────────────── + +fn do_classify_incremental( + conn: &mut Connection, + changed_files: &[String], +) -> rusqlite::Result { + let tx = conn.transaction()?; + let mut summary = RoleSummary::default(); + + // Build placeholders for changed files + let seed_ph: String = changed_files.iter().map(|_| "?").collect::>().join(","); + + // Expand affected set: include edge neighbours + let neighbour_sql = format!( + "SELECT DISTINCT n2.file FROM edges e + JOIN nodes n1 ON (e.source_id = n1.id OR e.target_id = n1.id) + JOIN nodes n2 ON (e.source_id = n2.id OR e.target_id = n2.id) + WHERE e.kind = 'calls' + AND n1.file IN ({}) + AND n2.file NOT IN ({}) + AND n2.kind NOT IN ('file', 'directory')", + seed_ph, seed_ph + ); + let neighbour_files: Vec = { + let mut stmt = tx.prepare(&neighbour_sql)?; + // Bind changed_files twice (for both IN clauses) + let mut idx = 1; + for f in changed_files { + stmt.raw_bind_parameter(idx, f.as_str())?; + idx += 1; + } + for f in changed_files { + stmt.raw_bind_parameter(idx, f.as_str())?; + idx += 1; + } + let rows = stmt.raw_query(); + let mut result = Vec::new(); + let mut rows = rows; + while let Some(row) = rows.next()? { + result.push(row.get::<_, String>(0)?); + } + result + }; + + let mut all_affected: Vec<&str> = changed_files.iter().map(|s| s.as_str()).collect(); + for f in &neighbour_files { + all_affected.push(f.as_str()); + } + let affected_ph: String = all_affected.iter().map(|_| "?").collect::>().join(","); + + // 1. Global medians from edge distribution + let median_fan_in = { + let mut stmt = tx.prepare( + "SELECT COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id", + )?; + let mut vals: Vec = stmt + .query_map([], |row| row.get::<_, u32>(0))? + .filter_map(|r| r.ok()) + .collect(); + vals.sort_unstable(); + median(&vals) + }; + let median_fan_out = { + let mut stmt = tx.prepare( + "SELECT COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY source_id", + )?; + let mut vals: Vec = stmt + .query_map([], |row| row.get::<_, u32>(0))? + .filter_map(|r| r.ok()) + .collect(); + vals.sort_unstable(); + median(&vals) + }; + + // 2a. Leaf kinds in affected files + let leaf_sql = format!( + "SELECT id FROM nodes WHERE kind IN ('parameter', 'property') AND file IN ({})", + affected_ph + ); + let leaf_ids: Vec = { + let mut stmt = tx.prepare(&leaf_sql)?; + for (i, f) in all_affected.iter().enumerate() { + stmt.raw_bind_parameter(i as i32 + 1, *f)?; + } + let mut rows = stmt.raw_query(); + let mut result = Vec::new(); + while let Some(row) = rows.next()? { + result.push(row.get::<_, i64>(0)?); + } + result + }; + + // 2b. Callable nodes with correlated subquery fan-in/fan-out + let rows_sql = format!( + "SELECT n.id, n.name, n.kind, n.file, + (SELECT COUNT(*) FROM edges WHERE kind = 'calls' AND target_id = n.id) AS fan_in, + (SELECT COUNT(*) FROM edges WHERE kind = 'calls' AND source_id = n.id) AS fan_out + FROM nodes n + WHERE n.kind NOT IN ('file', 'directory', 'parameter', 'property') + AND n.file IN ({})", + affected_ph + ); + let rows: Vec<(i64, String, String, String, u32, u32)> = { + let mut stmt = tx.prepare(&rows_sql)?; + for (i, f) in all_affected.iter().enumerate() { + stmt.raw_bind_parameter(i as i32 + 1, *f)?; + } + let mut qrows = stmt.raw_query(); + let mut result = Vec::new(); + while let Some(row) = qrows.next()? { + result.push(( + row.get::<_, i64>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + row.get::<_, String>(3)?, + row.get::<_, u32>(4)?, + row.get::<_, u32>(5)?, + )); + } + result + }; + + if rows.is_empty() && leaf_ids.is_empty() { + tx.commit()?; + return Ok(summary); + } + + // 3. Exported IDs for affected nodes + let exported_sql = format!( + "SELECT DISTINCT e.target_id + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE e.kind = 'calls' AND caller.file != target.file + AND target.file IN ({})", + affected_ph + ); + let exported_ids: std::collections::HashSet = { + let mut stmt = tx.prepare(&exported_sql)?; + for (i, f) in all_affected.iter().enumerate() { + stmt.raw_bind_parameter(i as i32 + 1, *f)?; + } + let mut qrows = stmt.raw_query(); + let mut result = std::collections::HashSet::new(); + while let Some(row) = qrows.next()? { + result.insert(row.get::<_, i64>(0)?); + } + result + }; + + // 4. Production fan-in for affected nodes + let test_filter = TEST_FILE_PATTERNS + .iter() + .map(|p| format!("AND caller.file NOT LIKE '{}'", p)) + .collect::>() + .join(" "); + let prod_sql = format!( + "SELECT e.target_id, COUNT(*) AS cnt + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + JOIN nodes target ON e.target_id = target.id + WHERE e.kind = 'calls' + AND target.file IN ({}) + {} + GROUP BY e.target_id", + affected_ph, test_filter + ); + let prod_fan_in: HashMap = { + let mut stmt = tx.prepare(&prod_sql)?; + for (i, f) in all_affected.iter().enumerate() { + stmt.raw_bind_parameter(i as i32 + 1, *f)?; + } + let mut qrows = stmt.raw_query(); + let mut result = HashMap::new(); + while let Some(row) = qrows.next()? { + result.insert(row.get::<_, i64>(0)?, row.get::<_, u32>(1)?); + } + result + }; + + // 5. Classify + let mut ids_by_role: HashMap<&str, Vec> = HashMap::new(); + + if !leaf_ids.is_empty() { + summary.dead += leaf_ids.len() as u32; + summary.dead_leaf += leaf_ids.len() as u32; + ids_by_role.insert("dead-leaf", leaf_ids); + } + + for (id, name, kind, file, fan_in, fan_out) in &rows { + let is_exported = exported_ids.contains(id); + let prod_fi = prod_fan_in.get(id).copied().unwrap_or(0); + let role = classify_node( + name, + kind, + file, + *fan_in, + *fan_out, + is_exported, + prod_fi, + median_fan_in, + median_fan_out, + ); + increment_summary(&mut summary, role); + ids_by_role.entry(role).or_default().push(*id); + } + + // 6. Reset roles for affected files only, then update + let reset_sql = format!( + "UPDATE nodes SET role = NULL WHERE file IN ({}) AND kind NOT IN ('file', 'directory')", + affected_ph + ); + { + let mut stmt = tx.prepare(&reset_sql)?; + for (i, f) in all_affected.iter().enumerate() { + stmt.raw_bind_parameter(i as i32 + 1, *f)?; + } + stmt.raw_execute()?; + } + batch_update_roles(&tx, &ids_by_role)?; + + tx.commit()?; + Ok(summary) +} diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 4d452128..086c073d 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -639,7 +639,11 @@ export async function buildEdges(ctx: PipelineContext): Promise { addLazyFallback(ctx, scopedLoad); const t0 = performance.now(); - const buildEdgesTx = db.transaction(() => { + const native = engineName === 'native' ? loadNative() : null; + + // Phase 1: Compute edges (inside better-sqlite3 transaction for barrel cleanup atomicity) + const allEdgeRows: EdgeRowTuple[] = []; + const computeEdgesTx = db.transaction(() => { // Delete stale outgoing edges for barrel-only files inside the transaction // so that deletion and re-creation are atomic (no edge loss on mid-build crash). if (ctx.barrelOnlyFiles.size > 0) { @@ -651,19 +655,42 @@ export async function buildEdges(ctx: PipelineContext): Promise { } } - const allEdgeRows: EdgeRowTuple[] = []; - buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); - const native = engineName === 'native' ? loadNative() : null; - if (native?.buildCallEdges) { - buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native); + // Skip native call-edge path for small incremental builds (≤3 files): + // napi-rs marshaling overhead for allNodes exceeds computation savings. + const useNativeCallEdges = + native?.buildCallEdges && (ctx.isFullBuild || ctx.fileSymbols.size > 3); + if (useNativeCallEdges) { + buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native!); } else { buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows); } - batchInsertEdges(db, allEdgeRows); + // When using native edge insert, skip JS insert here — do it after tx commits. + // Otherwise insert edges within this transaction for atomicity. + if (!native?.bulkInsertEdges) { + batchInsertEdges(db, allEdgeRows); + } }); - buildEdgesTx(); + computeEdgesTx(); + + // Phase 2: Native rusqlite bulk insert (outside better-sqlite3 transaction + // since rusqlite opens its own connection — avoids SQLITE_BUSY contention) + if (native?.bulkInsertEdges && allEdgeRows.length > 0) { + const nativeEdges = allEdgeRows.map((r) => ({ + sourceId: r[0], + targetId: r[1], + kind: r[2], + confidence: r[3], + dynamic: r[4], + })); + const ok = native.bulkInsertEdges(db.name, nativeEdges); + if (!ok) { + // Fall back to JS path on native failure + batchInsertEdges(db, allEdgeRows); + } + } + ctx.timing.edgesMs = performance.now() - t0; } diff --git a/src/domain/graph/builder/stages/build-structure.ts b/src/domain/graph/builder/stages/build-structure.ts index a3ea2d9b..5dfa44f2 100644 --- a/src/domain/graph/builder/stages/build-structure.ts +++ b/src/domain/graph/builder/stages/build-structure.ts @@ -6,6 +6,7 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { debug } from '../../../../infrastructure/logger.js'; +import { loadNative } from '../../../../infrastructure/native.js'; import { normalizePath } from '../../../../shared/constants.js'; import type { ExtractorOutput } from '../../../../types.js'; import type { PipelineContext } from '../context.js'; @@ -86,13 +87,46 @@ export async function buildStructure(ctx: PipelineContext): Promise { // Classify node roles (incremental: only reclassify changed files' nodes) const t1 = performance.now(); try { - const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { - classifyNodeRoles: ( - db: PipelineContext['db'], - changedFiles?: string[] | null, - ) => Record; - }; - const roleSummary = classifyNodeRoles(db, changedFileList); + let roleSummary: Record | null = null; + + // Try native rusqlite path first (eliminates JS<->SQLite round-trips) + if (ctx.engineName === 'native') { + const native = loadNative(); + if (native?.classifyRolesFull) { + const dbPath = db.name; + const nativeResult = + changedFileList && changedFileList.length > 0 + ? native.classifyRolesIncremental(dbPath, changedFileList) + : native.classifyRolesFull(dbPath); + if (nativeResult) { + roleSummary = { + entry: nativeResult.entry, + core: nativeResult.core, + utility: nativeResult.utility, + adapter: nativeResult.adapter, + dead: nativeResult.dead, + 'dead-leaf': nativeResult.deadLeaf, + 'dead-entry': nativeResult.deadEntry, + 'dead-ffi': nativeResult.deadFfi, + 'dead-unresolved': nativeResult.deadUnresolved, + 'test-only': nativeResult.testOnly, + leaf: nativeResult.leaf, + }; + } + } + } + + // Fall back to JS path + if (!roleSummary) { + const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { + classifyNodeRoles: ( + db: PipelineContext['db'], + changedFiles?: string[] | null, + ) => Record; + }; + roleSummary = classifyNodeRoles(db, changedFileList); + } + debug( `Roles${changedFileList ? ` (incremental, ${changedFileList.length} files)` : ''}: ${Object.entries( roleSummary, diff --git a/src/types.ts b/src/types.ts index f593ae37..4939ff8a 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1838,6 +1838,45 @@ export interface NativeAddon { fileHashes: Array<{ file: string; hash: string; mtime: number; size: number }>, removedFiles: string[], ): boolean; + bulkInsertEdges( + dbPath: string, + edges: Array<{ + sourceId: number; + targetId: number; + kind: string; + confidence: number; + dynamic: number; + }>, + ): boolean; + classifyRolesFull(dbPath: string): { + entry: number; + core: number; + utility: number; + adapter: number; + dead: number; + deadLeaf: number; + deadEntry: number; + deadFfi: number; + deadUnresolved: number; + testOnly: number; + leaf: number; + } | null; + classifyRolesIncremental( + dbPath: string, + changedFiles: string[], + ): { + entry: number; + core: number; + utility: number; + adapter: number; + dead: number; + deadLeaf: number; + deadEntry: number; + deadFfi: number; + deadUnresolved: number; + testOnly: number; + leaf: number; + } | null; engineVersion(): string; ParseTreeCache: new () => NativeParseTreeCache; } From 0849b36e6b1081599696277cca7c1a40645f64ee Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 27 Mar 2026 16:13:16 -0600 Subject: [PATCH 2/8] fix(rust): use usize for raw_bind_parameter index, remove unused params import --- crates/codegraph-core/src/edges_db.rs | 12 ++++++------ crates/codegraph-core/src/roles_db.rs | 14 +++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/crates/codegraph-core/src/edges_db.rs b/crates/codegraph-core/src/edges_db.rs index ceb5d36b..7bcbf6fc 100644 --- a/crates/codegraph-core/src/edges_db.rs +++ b/crates/codegraph-core/src/edges_db.rs @@ -5,7 +5,7 @@ //! implements edges directly to SQLite without marshaling back to JS. use napi_derive::napi; -use rusqlite::{params, Connection, OpenFlags}; +use rusqlite::{Connection, OpenFlags}; /// A single edge row to insert: [source_id, target_id, kind, confidence, dynamic]. #[napi(object)] @@ -66,11 +66,11 @@ fn do_insert(conn: &mut Connection, edges: &[EdgeRow]) -> rusqlite::Result<()> { let mut stmt = tx.prepare_cached(&sql)?; for (i, edge) in chunk.iter().enumerate() { let base = i * 5; - stmt.raw_bind_parameter(base as i32 + 1, edge.source_id)?; - stmt.raw_bind_parameter(base as i32 + 2, edge.target_id)?; - stmt.raw_bind_parameter(base as i32 + 3, edge.kind.as_str())?; - stmt.raw_bind_parameter(base as i32 + 4, edge.confidence)?; - stmt.raw_bind_parameter(base as i32 + 5, edge.dynamic)?; + stmt.raw_bind_parameter(base +1, edge.source_id)?; + stmt.raw_bind_parameter(base +2, edge.target_id)?; + stmt.raw_bind_parameter(base +3, edge.kind.as_str())?; + stmt.raw_bind_parameter(base +4, edge.confidence)?; + stmt.raw_bind_parameter(base +5, edge.dynamic)?; } stmt.raw_execute()?; } diff --git a/crates/codegraph-core/src/roles_db.rs b/crates/codegraph-core/src/roles_db.rs index e1a49207..925896c8 100644 --- a/crates/codegraph-core/src/roles_db.rs +++ b/crates/codegraph-core/src/roles_db.rs @@ -8,7 +8,7 @@ use std::collections::HashMap; use napi_derive::napi; -use rusqlite::{params, Connection, OpenFlags}; +use rusqlite::{Connection, OpenFlags}; // ── Constants ──────────────────────────────────────────────────────── @@ -201,7 +201,7 @@ fn batch_update_roles( // Bind role as param 1, then each id stmt.raw_bind_parameter(1, *role)?; for (i, id) in chunk.iter().enumerate() { - stmt.raw_bind_parameter(i as i32 + 2, *id)?; + stmt.raw_bind_parameter(i + 2, *id)?; } stmt.raw_execute()?; } @@ -414,7 +414,7 @@ fn do_classify_incremental( let leaf_ids: Vec = { let mut stmt = tx.prepare(&leaf_sql)?; for (i, f) in all_affected.iter().enumerate() { - stmt.raw_bind_parameter(i as i32 + 1, *f)?; + stmt.raw_bind_parameter(i + 1, *f)?; } let mut rows = stmt.raw_query(); let mut result = Vec::new(); @@ -437,7 +437,7 @@ fn do_classify_incremental( let rows: Vec<(i64, String, String, String, u32, u32)> = { let mut stmt = tx.prepare(&rows_sql)?; for (i, f) in all_affected.iter().enumerate() { - stmt.raw_bind_parameter(i as i32 + 1, *f)?; + stmt.raw_bind_parameter(i + 1, *f)?; } let mut qrows = stmt.raw_query(); let mut result = Vec::new(); @@ -472,7 +472,7 @@ fn do_classify_incremental( let exported_ids: std::collections::HashSet = { let mut stmt = tx.prepare(&exported_sql)?; for (i, f) in all_affected.iter().enumerate() { - stmt.raw_bind_parameter(i as i32 + 1, *f)?; + stmt.raw_bind_parameter(i + 1, *f)?; } let mut qrows = stmt.raw_query(); let mut result = std::collections::HashSet::new(); @@ -502,7 +502,7 @@ fn do_classify_incremental( let prod_fan_in: HashMap = { let mut stmt = tx.prepare(&prod_sql)?; for (i, f) in all_affected.iter().enumerate() { - stmt.raw_bind_parameter(i as i32 + 1, *f)?; + stmt.raw_bind_parameter(i + 1, *f)?; } let mut qrows = stmt.raw_query(); let mut result = HashMap::new(); @@ -547,7 +547,7 @@ fn do_classify_incremental( { let mut stmt = tx.prepare(&reset_sql)?; for (i, f) in all_affected.iter().enumerate() { - stmt.raw_bind_parameter(i as i32 + 1, *f)?; + stmt.raw_bind_parameter(i + 1, *f)?; } stmt.raw_execute()?; } From 54d66a5901f9c72c32a20deaed676a9c446c61d8 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 27 Mar 2026 16:37:14 -0600 Subject: [PATCH 3/8] fix(rust): port file-path dead-entry detection from JS to native classify_dead_sub_role (#658) --- crates/codegraph-core/src/roles_db.rs | 31 +++++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/crates/codegraph-core/src/roles_db.rs b/crates/codegraph-core/src/roles_db.rs index 925896c8..784787d7 100644 --- a/crates/codegraph-core/src/roles_db.rs +++ b/crates/codegraph-core/src/roles_db.rs @@ -16,6 +16,25 @@ const FRAMEWORK_ENTRY_PREFIXES: &[&str] = &["route:", "event:", "command:"]; const LEAF_KINDS: &[&str] = &["parameter", "property", "constant"]; +/// Path patterns indicating framework-dispatched entry points (matches JS +/// `ENTRY_PATH_PATTERNS` in `graph/classifiers/roles.ts`). +const ENTRY_PATH_PATTERNS: &[&str] = &[ + "cli/commands/", + "cli\\commands\\", + "mcp/", + "mcp\\", + "routes/", + "routes\\", + "route/", + "route\\", + "handlers/", + "handlers\\", + "handler/", + "handler\\", + "middleware/", + "middleware\\", +]; + const TEST_FILE_PATTERNS: &[&str] = &[ "%.test.%", "%.spec.%", @@ -89,21 +108,19 @@ fn median(sorted: &[u32]) -> u32 { } /// Dead sub-role classification matching JS `classifyDeadSubRole`. -fn classify_dead_sub_role(name: &str, kind: &str, file: &str) -> &'static str { +fn classify_dead_sub_role(_name: &str, kind: &str, file: &str) -> &'static str { // Leaf kinds if LEAF_KINDS.iter().any(|k| *k == kind) { return "dead-leaf"; } - // FFI boundary + // FFI boundary (checked before dead-entry — an FFI boundary is a more + // fundamental classification than a path-based hint, matching JS priority) let ffi_exts = [".rs", ".c", ".cpp", ".h", ".go", ".java", ".cs"]; if ffi_exts.iter().any(|ext| file.ends_with(ext)) { return "dead-ffi"; } - // Framework entry points - if FRAMEWORK_ENTRY_PREFIXES - .iter() - .any(|p| name.starts_with(p)) - { + // Framework-dispatched entry points (CLI commands, MCP tools, routes) + if ENTRY_PATH_PATTERNS.iter().any(|p| file.contains(p)) { return "dead-entry"; } "dead-unresolved" From 59efd5387d36e8b88840c8690bc0f826da0b33b3 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 27 Mar 2026 16:37:32 -0600 Subject: [PATCH 4/8] fix(build): add optional-chaining guard for classifyRolesIncremental call (#658) --- src/domain/graph/builder/stages/build-structure.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/domain/graph/builder/stages/build-structure.ts b/src/domain/graph/builder/stages/build-structure.ts index 5dfa44f2..212ce367 100644 --- a/src/domain/graph/builder/stages/build-structure.ts +++ b/src/domain/graph/builder/stages/build-structure.ts @@ -96,7 +96,7 @@ export async function buildStructure(ctx: PipelineContext): Promise { const dbPath = db.name; const nativeResult = changedFileList && changedFileList.length > 0 - ? native.classifyRolesIncremental(dbPath, changedFileList) + ? native.classifyRolesIncremental?.(dbPath, changedFileList) : native.classifyRolesFull(dbPath); if (nativeResult) { roleSummary = { From e8c848fa934c141c82e361140ea1cdde84f18e15 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 27 Mar 2026 21:35:21 -0600 Subject: [PATCH 5/8] fix(build): correct crash-atomicity comment for native edge insert path (#658) The comment claimed barrel-edge deletion and re-insertion were atomic, but with the native rusqlite path the insertion happens in Phase 2 on a separate connection. Updated the comment to accurately describe the atomicity guarantee: JS path is fully atomic; native path has a transient gap that self-heals on next incremental rebuild. --- src/domain/graph/builder/stages/build-edges.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 086c073d..9819ad21 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -641,11 +641,14 @@ export async function buildEdges(ctx: PipelineContext): Promise { const t0 = performance.now(); const native = engineName === 'native' ? loadNative() : null; - // Phase 1: Compute edges (inside better-sqlite3 transaction for barrel cleanup atomicity) + // Phase 1: Compute edges inside a better-sqlite3 transaction. + // Barrel-edge deletion lives here so that the JS path (which also inserts + // edges in this transaction) keeps deletion + insertion atomic. + // When using the native rusqlite path, insertion happens in Phase 2 on a + // separate connection — a crash between Phase 1 and Phase 2 would leave + // barrel edges missing until the next incremental rebuild re-creates them. const allEdgeRows: EdgeRowTuple[] = []; const computeEdgesTx = db.transaction(() => { - // Delete stale outgoing edges for barrel-only files inside the transaction - // so that deletion and re-creation are atomic (no edge loss on mid-build crash). if (ctx.barrelOnlyFiles.size > 0) { const deleteOutgoingEdges = db.prepare( 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', From 53d04a697bfa74500a05a146aab98be7b0fbe8e6 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 27 Mar 2026 21:53:03 -0600 Subject: [PATCH 6/8] fix(rust): reduce edge insert CHUNK from 200 to 199 for SQLite bind param safety (#658) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 200 rows × 5 params = 1000 bind parameters, which exceeds the legacy SQLITE_MAX_VARIABLE_NUMBER default of 999. While bundled SQLite 3.43+ raises the limit, reducing to 199 (995 params) removes the risk for any SQLite build with the old default. --- crates/codegraph-core/src/edges_db.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/codegraph-core/src/edges_db.rs b/crates/codegraph-core/src/edges_db.rs index 7bcbf6fc..25f1ae51 100644 --- a/crates/codegraph-core/src/edges_db.rs +++ b/crates/codegraph-core/src/edges_db.rs @@ -40,7 +40,9 @@ pub fn bulk_insert_edges(db_path: String, edges: Vec) -> bool { do_insert(&mut conn, &edges).is_ok() } -const CHUNK: usize = 200; +/// 199 rows × 5 params = 995 bind parameters per statement, safely under +/// the legacy `SQLITE_MAX_VARIABLE_NUMBER` default of 999. +const CHUNK: usize = 199; fn do_insert(conn: &mut Connection, edges: &[EdgeRow]) -> rusqlite::Result<()> { let tx = conn.transaction()?; From 2281c0030ba5ecf2f6509e84ccb399d1c46367aa Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 27 Mar 2026 21:53:24 -0600 Subject: [PATCH 7/8] fix(build): add debug log when native bulkInsertEdges falls back to JS (#658) The native edge insert fallback path was silent, making it hard to diagnose when the native path fails. Added a debug() call so the fallback is visible in verbose/debug output. --- src/domain/graph/builder/stages/build-edges.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 9819ad21..2869ada5 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -7,6 +7,7 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { getNodeId } from '../../../../db/index.js'; +import { debug } from '../../../../infrastructure/logger.js'; import { loadNative } from '../../../../infrastructure/native.js'; import type { BetterSqlite3Database, @@ -690,7 +691,7 @@ export async function buildEdges(ctx: PipelineContext): Promise { })); const ok = native.bulkInsertEdges(db.name, nativeEdges); if (!ok) { - // Fall back to JS path on native failure + debug('Native bulkInsertEdges failed — falling back to JS batchInsertEdges'); batchInsertEdges(db, allEdgeRows); } } From 889226030e19363eff96bfe96437226c42ffc77b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sun, 29 Mar 2026 03:53:35 -0600 Subject: [PATCH 8/8] docs(competitive): add CodeGraphContext as Tier 1 #11 competitor (score 3.8) Add CodeGraphContext/CodeGraphContext (2,664 stars, Python, MIT) to the competitive analysis. Tree-sitter + graph DB (KuzuDB/FalkorDB/Neo4j), 14 languages, CLI + MCP, bundle registry, 10+ IDE setup wizard. Strong community traction but shallow analysis depth vs codegraph. --- generated/competitive/COMPETITIVE_ANALYSIS.md | 229 +++++++++--------- 1 file changed, 120 insertions(+), 109 deletions(-) diff --git a/generated/competitive/COMPETITIVE_ANALYSIS.md b/generated/competitive/COMPETITIVE_ANALYSIS.md index 3b03e1b9..5aefada1 100644 --- a/generated/competitive/COMPETITIVE_ANALYSIS.md +++ b/generated/competitive/COMPETITIVE_ANALYSIS.md @@ -1,7 +1,7 @@ # Competitive Analysis — Code Graph / Code Intelligence Tools -**Date:** 2026-03-21 (updated from 2026-02-25) -**Scope:** 140+ code analysis tools evaluated, 85+ ranked against `@optave/codegraph` +**Date:** 2026-03-29 (updated from 2026-03-21) +**Scope:** 140+ code analysis tools evaluated, 86+ ranked against `@optave/codegraph` --- @@ -23,87 +23,88 @@ Ranked by weighted score across 6 dimensions (each 1–5): | 8 | 4.2 | [Anandb71/arbor](https://github.com/Anandb71/arbor) | 85 | Rust | MIT | Native GUI, confidence scoring, architectural role classification, fuzzy search, MCP | | 9 | 4.0 | [SimplyLiz/CodeMCP (CKB)](https://github.com/SimplyLiz/CodeMCP) | 77 | Go | Custom | SCIP-based indexing, compound operations (83% token savings), CODEOWNERS, secret scanning, impact analysis, architecture mapping (v8.1.0) | | 10 | 3.8 | [harshkedia177/axon](https://github.com/harshkedia177/axon) | 577 | Python | MIT | 11-phase pipeline, KuzuDB, Leiden community detection, dead code, change coupling, MCP + CLI, hit v1.0 milestone | -| 11 | 3.8 | [seatedro/glimpse](https://github.com/seatedro/glimpse) | 349 | Rust | MIT | Clipboard-first codebase-to-LLM tool with call graphs, token counting, LSP resolution. **Stagnant since Jan 2026** | -| 12 | 3.8 | [ShiftLeftSecurity/codepropertygraph](https://github.com/ShiftLeftSecurity/codepropertygraph) | 564 | Scala | Apache-2.0 | CPG specification + Tinkergraph library, Scala query DSL, protobuf serialization (Joern foundation) | -| 13 | 3.8 | [Jakedismo/codegraph-rust](https://github.com/Jakedismo/codegraph-rust) | 142 | Rust | None | 100% Rust GraphRAG, SurrealDB, LSP-powered dataflow analysis, architecture boundary enforcement | -| 14 | 3.7 | [JudiniLabs/mcp-code-graph](https://github.com/JudiniLabs/mcp-code-graph) | 380 | JavaScript | MIT | Cloud-hosted MCP server by CodeGPT, semantic search, dependency links (requires account) | -| 15 | 3.7 | [entrepeneur4lyf/code-graph-mcp](https://github.com/entrepeneur4lyf/code-graph-mcp) | 83 | Python | MIT | ast-grep for 25+ languages, complexity metrics, code smells, circular dependency detection. **Stagnant since Jul 2025** | -| 16 | 3.7 | [cs-au-dk/jelly](https://github.com/cs-au-dk/jelly) | 423 | TypeScript | BSD-3 | Academic-grade JS/TS points-to analysis, call graphs, vulnerability exposure, 5 published papers | -| 17 | 3.7 | [colbymchenry/codegraph](https://github.com/colbymchenry/codegraph) | 308 | TypeScript | MIT | tree-sitter + SQLite + MCP, Claude Code token reduction benchmarks, npx installer. **Nearly doubled since Feb — naming competitor** | -| 18 | 3.5 | [er77/code-graph-rag-mcp](https://github.com/er77/code-graph-rag-mcp) | 89 | TypeScript | MIT | 26 MCP methods, 11 languages, tree-sitter, semantic search, hotspot analysis, clone detection | -| 19 | 3.5 | [MikeRecognex/mcp-codebase-index](https://github.com/MikeRecognex/mcp-codebase-index) | 25 | Python | AGPL-3.0 | 18 MCP tools, zero runtime deps, auto-incremental reindexing via git diff | -| 20 | 3.5 | [nahisaho/CodeGraphMCPServer](https://github.com/nahisaho/CodeGraphMCPServer) | 7 | Python | MIT | GraphRAG with Louvain community detection, 16 languages, 14 MCP tools, 334 tests | -| 21 | 3.5 | [dundalek/stratify](https://github.com/dundalek/stratify) | 102 | Clojure | MIT | Multi-backend extraction (LSP/SCIP/Joern), 10 languages, DGML/CodeCharta output, architecture linting | -| 22 | 3.5 | [kraklabs/cie](https://github.com/kraklabs/cie) | 9 | Go | AGPL-3.0 | Code Intelligence Engine: 20+ MCP tools, tree-sitter, semantic search (Ollama), Homebrew, single Go binary | -| 23 | 3.5 | [NeuralRays/codexray](https://github.com/NeuralRays/codexray) | 2 | TypeScript | MIT | 16 MCP tools, TF-IDF semantic search (~50MB), dead code, complexity, path finding | -| 24 | 3.3 | [anrgct/autodev-codebase](https://github.com/anrgct/autodev-codebase) | 111 | TypeScript | None | 40+ languages, 7 embedding providers, Cytoscape.js visualization, LLM reranking. **Stagnant since Jan 2026** | -| 25 | 3.3 | [DucPhamNgoc08/CodeVisualizer](https://github.com/DucPhamNgoc08/CodeVisualizer) | 475 | TypeScript | MIT | VS Code extension, tree-sitter WASM, flowcharts + dependency graphs, 5 AI providers, 9 themes | -| 26 | 3.3 | [helabenkhalfallah/code-health-meter](https://github.com/helabenkhalfallah/code-health-meter) | 34 | JavaScript | MIT | Formal health metrics (MI, CC, Louvain modularity), published in ACM TOSEM 2025 | -| 27 | 3.3 | [JohT/code-graph-analysis-pipeline](https://github.com/JohT/code-graph-analysis-pipeline) | 27 | Cypher | GPL-3.0 | 200+ CSV reports, ML anomaly detection, Leiden/HashGNN, jQAssistant + Neo4j for Java | -| 28 | 3.3 | [Lekssays/codebadger](https://github.com/Lekssays/codebadger) | 43 | Python | GPL-3.0 | Containerized MCP server using Joern CPG, 12+ languages | -| 29 | 3.3 | [Vasu014/loregrep](https://github.com/Vasu014/loregrep) | 12 | Rust | Apache-2.0 | In-memory index library, Rust + Python bindings, AI-tool-ready schemas | -| 30 | 3.3 | [Durafen/Claude-code-memory](https://github.com/Durafen/Claude-code-memory) | 72 | Python | None | Memory Guard quality gate, persistent codebase memory, Voyage AI + Qdrant | -| 31 | 3.2 | [anasdayeh/claude-context-local](https://github.com/anasdayeh/claude-context-local) | 0 | Python | None | 100% local, Merkle DAG incremental indexing, sharded FAISS, hybrid BM25+vector, GPU accel | -| 32 | 3.0 | [al1-nasir/codegraph-cli](https://github.com/al1-nasir/codegraph-cli) | 11 | Python | MIT | CrewAI multi-agent system, 6 LLM providers, browser explorer, DOCX export | -| 33 | 3.0 | [xnuinside/codegraph](https://github.com/xnuinside/codegraph) | 438 | Python | MIT | Python-only interactive HTML dependency diagrams with zoom/pan/search | -| 34 | 3.0 | [Adrninistrator/java-all-call-graph](https://github.com/Adrninistrator/java-all-call-graph) | 551 | Java | Apache-2.0 | Complete Java bytecode call graphs, Spring/MyBatis-aware, SQL-queryable DB | -| 35 | 3.0 | [Technologicat/pyan](https://github.com/Technologicat/pyan) | 395 | Python | GPL-2.0 | Python 3 call graph generator, module import analysis, cycle detection, interactive HTML | -| 36 | 3.0 | [clouditor/cloud-property-graph](https://github.com/clouditor/cloud-property-graph) | 28 | Kotlin | Apache-2.0 | Connects code property graphs with cloud runtime security assessment | +| 11 | 3.8 | [CodeGraphContext/CodeGraphContext](https://github.com/CodeGraphContext/CodeGraphContext) | 2,664 | Python | MIT | Tree-sitter + graph DB (KuzuDB/FalkorDB/Neo4j), 14 languages, CLI + MCP dual mode, interactive HTML viz, pre-indexed `.cgc` bundle registry for popular repos, setup wizard for 10+ IDEs, live file watching. **2,664 stars, 30 contributors — likely Hacktoberfest-boosted community** | +| 12 | 3.8 | [seatedro/glimpse](https://github.com/seatedro/glimpse) | 349 | Rust | MIT | Clipboard-first codebase-to-LLM tool with call graphs, token counting, LSP resolution. **Stagnant since Jan 2026** | +| 13 | 3.8 | [ShiftLeftSecurity/codepropertygraph](https://github.com/ShiftLeftSecurity/codepropertygraph) | 564 | Scala | Apache-2.0 | CPG specification + Tinkergraph library, Scala query DSL, protobuf serialization (Joern foundation) | +| 14 | 3.8 | [Jakedismo/codegraph-rust](https://github.com/Jakedismo/codegraph-rust) | 142 | Rust | None | 100% Rust GraphRAG, SurrealDB, LSP-powered dataflow analysis, architecture boundary enforcement | +| 15 | 3.7 | [JudiniLabs/mcp-code-graph](https://github.com/JudiniLabs/mcp-code-graph) | 380 | JavaScript | MIT | Cloud-hosted MCP server by CodeGPT, semantic search, dependency links (requires account) | +| 16 | 3.7 | [entrepeneur4lyf/code-graph-mcp](https://github.com/entrepeneur4lyf/code-graph-mcp) | 83 | Python | MIT | ast-grep for 25+ languages, complexity metrics, code smells, circular dependency detection. **Stagnant since Jul 2025** | +| 17 | 3.7 | [cs-au-dk/jelly](https://github.com/cs-au-dk/jelly) | 423 | TypeScript | BSD-3 | Academic-grade JS/TS points-to analysis, call graphs, vulnerability exposure, 5 published papers | +| 18 | 3.7 | [colbymchenry/codegraph](https://github.com/colbymchenry/codegraph) | 308 | TypeScript | MIT | tree-sitter + SQLite + MCP, Claude Code token reduction benchmarks, npx installer. **Nearly doubled since Feb — naming competitor** | +| 19 | 3.5 | [er77/code-graph-rag-mcp](https://github.com/er77/code-graph-rag-mcp) | 89 | TypeScript | MIT | 26 MCP methods, 11 languages, tree-sitter, semantic search, hotspot analysis, clone detection | +| 20 | 3.5 | [MikeRecognex/mcp-codebase-index](https://github.com/MikeRecognex/mcp-codebase-index) | 25 | Python | AGPL-3.0 | 18 MCP tools, zero runtime deps, auto-incremental reindexing via git diff | +| 21 | 3.5 | [nahisaho/CodeGraphMCPServer](https://github.com/nahisaho/CodeGraphMCPServer) | 7 | Python | MIT | GraphRAG with Louvain community detection, 16 languages, 14 MCP tools, 334 tests | +| 22 | 3.5 | [dundalek/stratify](https://github.com/dundalek/stratify) | 102 | Clojure | MIT | Multi-backend extraction (LSP/SCIP/Joern), 10 languages, DGML/CodeCharta output, architecture linting | +| 23 | 3.5 | [kraklabs/cie](https://github.com/kraklabs/cie) | 9 | Go | AGPL-3.0 | Code Intelligence Engine: 20+ MCP tools, tree-sitter, semantic search (Ollama), Homebrew, single Go binary | +| 24 | 3.5 | [NeuralRays/codexray](https://github.com/NeuralRays/codexray) | 2 | TypeScript | MIT | 16 MCP tools, TF-IDF semantic search (~50MB), dead code, complexity, path finding | +| 25 | 3.3 | [anrgct/autodev-codebase](https://github.com/anrgct/autodev-codebase) | 111 | TypeScript | None | 40+ languages, 7 embedding providers, Cytoscape.js visualization, LLM reranking. **Stagnant since Jan 2026** | +| 26 | 3.3 | [DucPhamNgoc08/CodeVisualizer](https://github.com/DucPhamNgoc08/CodeVisualizer) | 475 | TypeScript | MIT | VS Code extension, tree-sitter WASM, flowcharts + dependency graphs, 5 AI providers, 9 themes | +| 27 | 3.3 | [helabenkhalfallah/code-health-meter](https://github.com/helabenkhalfallah/code-health-meter) | 34 | JavaScript | MIT | Formal health metrics (MI, CC, Louvain modularity), published in ACM TOSEM 2025 | +| 28 | 3.3 | [JohT/code-graph-analysis-pipeline](https://github.com/JohT/code-graph-analysis-pipeline) | 27 | Cypher | GPL-3.0 | 200+ CSV reports, ML anomaly detection, Leiden/HashGNN, jQAssistant + Neo4j for Java | +| 29 | 3.3 | [Lekssays/codebadger](https://github.com/Lekssays/codebadger) | 43 | Python | GPL-3.0 | Containerized MCP server using Joern CPG, 12+ languages | +| 30 | 3.3 | [Vasu014/loregrep](https://github.com/Vasu014/loregrep) | 12 | Rust | Apache-2.0 | In-memory index library, Rust + Python bindings, AI-tool-ready schemas | +| 31 | 3.3 | [Durafen/Claude-code-memory](https://github.com/Durafen/Claude-code-memory) | 72 | Python | None | Memory Guard quality gate, persistent codebase memory, Voyage AI + Qdrant | +| 32 | 3.2 | [anasdayeh/claude-context-local](https://github.com/anasdayeh/claude-context-local) | 0 | Python | None | 100% local, Merkle DAG incremental indexing, sharded FAISS, hybrid BM25+vector, GPU accel | +| 33 | 3.0 | [al1-nasir/codegraph-cli](https://github.com/al1-nasir/codegraph-cli) | 11 | Python | MIT | CrewAI multi-agent system, 6 LLM providers, browser explorer, DOCX export | +| 34 | 3.0 | [xnuinside/codegraph](https://github.com/xnuinside/codegraph) | 438 | Python | MIT | Python-only interactive HTML dependency diagrams with zoom/pan/search | +| 35 | 3.0 | [Adrninistrator/java-all-call-graph](https://github.com/Adrninistrator/java-all-call-graph) | 551 | Java | Apache-2.0 | Complete Java bytecode call graphs, Spring/MyBatis-aware, SQL-queryable DB | +| 36 | 3.0 | [Technologicat/pyan](https://github.com/Technologicat/pyan) | 395 | Python | GPL-2.0 | Python 3 call graph generator, module import analysis, cycle detection, interactive HTML | +| 37 | 3.0 | [clouditor/cloud-property-graph](https://github.com/clouditor/cloud-property-graph) | 28 | Kotlin | Apache-2.0 | Connects code property graphs with cloud runtime security assessment | ### Tier 2: Niche & Single-Language Tools (score 2.0–2.9) | # | Score | Project | Stars | Lang | License | Summary | |---|-------|---------|-------|------|---------|---------| -| 37 | 2.9 | [rahulvgmail/CodeInteliMCP](https://github.com/rahulvgmail/CodeInteliMCP) | 8 | Python | None | DuckDB + ChromaDB (zero Docker), multi-repo, lightweight embedded DBs | -| 38 | 2.8 | [Aider-AI/aider](https://github.com/Aider-AI/aider) | 42,198 | Python | Apache-2.0 | AI pair programming CLI; tree-sitter repo map with PageRank-style graph ranking for LLM context selection, 100+ languages, multi-provider LLM support, git-integrated auto-commits. Moved to Aider-AI org | -| 39 | 2.8 | [scottrogowski/code2flow](https://github.com/scottrogowski/code2flow) | 4,528 | Python | MIT | Call graphs for Python/JS/Ruby/PHP via AST, DOT output, 100% test coverage | -| 40 | 2.8 | [ysk8hori/typescript-graph](https://github.com/ysk8hori/typescript-graph) | 200 | TypeScript | None | TypeScript file-level dependency Mermaid diagrams, code metrics (MI, CC), watch mode | -| 41 | 2.8 | [nuanced-dev/nuanced-py](https://github.com/nuanced-dev/nuanced-py) | 126 | Python | MIT | Python call graph enrichment designed for AI agent consumption | -| 42 | 2.8 | [sdsrss/code-graph-mcp](https://github.com/sdsrss/code-graph-mcp) | 16 | TypeScript | MIT | AST knowledge graph MCP server with tree-sitter, 10 languages. New entrant | -| 43 | 2.8 | [Bikach/codeGraph](https://github.com/Bikach/codeGraph) | 6 | TypeScript | MIT | Neo4j graph, Claude Code slash commands, Kotlin support, 40-50% cost reduction | -| 44 | 2.8 | [ChrisRoyse/CodeGraph](https://github.com/ChrisRoyse/CodeGraph) | 65 | TypeScript | None | Neo4j + MCP, multi-language, framework detection (React, Tailwind, Supabase) | -| 45 | 2.8 | [Symbolk/Code2Graph](https://github.com/Symbolk/Code2Graph) | 48 | Java | None | Multilingual code → language-agnostic graph representation | -| 46 | 2.7 | [yumeiriowl/repo-graphrag-mcp](https://github.com/yumeiriowl/repo-graphrag-mcp) | 3 | Python | MIT | LightRAG + tree-sitter, entity merge (code ↔ docs), implementation planning tool | -| 47 | 2.7 | [davidfraser/pyan](https://github.com/davidfraser/pyan) | 712 | Python | GPL-2.0 | Python call graph generator (stable fork), DOT/SVG/HTML output, Sphinx integration | -| 48 | 2.7 | [mamuz/PhpDependencyAnalysis](https://github.com/mamuz/PhpDependencyAnalysis) | 572 | PHP | MIT | PHP dependency graphs, cycle detection, architecture verification against defined layers | -| 49 | 2.7 | [faraazahmad/graphsense](https://github.com/faraazahmad/graphsense) | 35 | TypeScript | MIT | MCP server providing code intelligence via static analysis | -| 50 | 2.7 | [JonnoC/CodeRAG](https://github.com/JonnoC/CodeRAG) | 14 | TypeScript | MIT | Enterprise code intelligence with CK metrics, Neo4j, 23 analysis tools, MCP server | -| 51 | 2.6 | [0xjcf/MCP_CodeAnalysis](https://github.com/0xjcf/MCP_CodeAnalysis) | 7 | Python/TS | None | Stateful tools (XState), Redis sessions, socio-technical analysis, dual language impl | -| 52 | 2.5 | [koknat/callGraph](https://github.com/koknat/callGraph) | 325 | Perl | GPL-3.0 | Multi-language (22+) call graph generator via regex, GraphViz output | -| 53 | 2.5 | [RaheesAhmed/code-context-mcp](https://github.com/RaheesAhmed/code-context-mcp) | 0 | Python | MIT | Security pattern detection, auto architecture diagrams, code flow tracing | -| 54 | 2.5 | [league1991/CodeAtlasVsix](https://github.com/league1991/CodeAtlasVsix) | 265 | C# | GPL-2.0 | Visual Studio plugin, Doxygen-based call graph navigation (VS 2010-2015 era) | -| 55 | 2.5 | [beicause/call-graph](https://github.com/beicause/call-graph) | 105 | TypeScript | Apache-2.0 | VS Code extension generating call graphs via LSP call hierarchy API | -| 56 | 2.5 | [Thibault-Knobloch/codebase-intelligence](https://github.com/Thibault-Knobloch/codebase-intelligence) | 44 | Python | None | Code indexing + call graph + vector DB + natural language queries (requires OpenAI) | -| 57 | 2.5 | [darkmacheken/wasmati](https://github.com/darkmacheken/wasmati) | 31 | C++ | Apache-2.0 | CPG infrastructure for scanning vulnerabilities in WebAssembly | -| 58 | 2.5 | [sutragraph/sutracli](https://github.com/sutragraph/sutracli) | 28 | Python | GPL-3.0 | AI-powered cross-repo dependency graphs for coding agents | -| 59 | 2.5 | [julianjensen/ast-flow-graph](https://github.com/julianjensen/ast-flow-graph) | 69 | JavaScript | Other | JavaScript control flow graphs from AST analysis | -| 60 | 2.5 | [yoanbernabeu/grepai-skills](https://github.com/yoanbernabeu/grepai-skills) | 14 | — | MIT | 27 AI agent skills for semantic code search and call graph analysis | -| 61 | 2.5 | [GaloisInc/MATE](https://github.com/GaloisInc/MATE) | 194 | Python | BSD-3 | DARPA-funded interactive CPG-based bug hunting for C/C++ via LLVM | -| 62 | 2.4 | [shantham/codegraph](https://github.com/shantham/codegraph) | 0 | TypeScript | MIT | Polished `npx` one-command installer, sqlite-vss, 7 MCP tools | -| 63 | 2.3 | [ozyyshr/RepoGraph](https://github.com/ozyyshr/RepoGraph) | 251 | Python | Apache-2.0 | SWE-bench code graph research (ctags + networkx for LLM context) | -| 64 | 2.3 | [emad-elsaid/rubrowser](https://github.com/emad-elsaid/rubrowser) | 644 | Ruby | MIT | Ruby-only interactive D3 force-directed dependency graph | -| 65 | 2.3 | [Chentai-Kao/call-graph-plugin](https://github.com/Chentai-Kao/call-graph-plugin) | 87 | Kotlin | None | IntelliJ plugin for visualizing call graphs in IDE | -| 66 | 2.3 | [ehabterra/apispec](https://github.com/ehabterra/apispec) | 72 | Go | Apache-2.0 | OpenAPI 3.1 spec generator from Go code via call graph analysis | -| 67 | 2.3 | [huoyo/ko-time](https://github.com/huoyo/ko-time) | 61 | Java | LGPL-2.1 | Spring Boot call graph with runtime durations | -| 68 | 2.3 | [Fraunhofer-AISEC/codyze](https://github.com/Fraunhofer-AISEC/codyze) | 91 | Kotlin | None | CPG-based analyzer for cryptographic API misuse (archived, merged into cpg repo) | -| 69 | 2.3 | [CartographAI/mcp-server-codegraph](https://github.com/CartographAI/mcp-server-codegraph) | 17 | JavaScript | MIT | Lightweight MCP code graph (3 tools only, Python/JS/Rust) | -| 70 | 2.3 | [YounesBensafia/DevLens](https://github.com/YounesBensafia/DevLens) | 21 | Python | None | Repo scanner with AI summaries, dead code detection (dep graph not yet implemented) | -| 71 | 2.3 | [0xd219b/codegraph](https://github.com/0xd219b/codegraph) | 0 | Rust | None | Pure Rust, HTTP server mode, Java + Go support | -| 72 | 2.3 | [aryx/codegraph](https://github.com/aryx/codegraph) | 6 | OCaml | Other | Multi-language source code dependency visualizer (the original "codegraph" name) | -| 73 | 2.2 | [jmarkowski/codeviz](https://github.com/jmarkowski/codeviz) | 144 | Python | MIT | C/C++ `#include` header dependency graph visualization | -| 74 | 2.2 | [juanallo/vscode-dependency-cruiser](https://github.com/juanallo/vscode-dependency-cruiser) | 76 | JavaScript | MIT | VS Code wrapper for dependency-cruiser (JS/TS) | -| 75 | 2.2 | [hidva/as2cfg](https://github.com/hidva/as2cfg) | 63 | Rust | GPL-3.0 | Intel assembly → control flow graph | -| 76 | 2.2 | [microsoft/cmd-call-graph](https://github.com/microsoft/cmd-call-graph) | 55 | Python | MIT | Call graphs for Windows CMD batch files | -| 77 | 2.2 | [siggy/gographs](https://github.com/siggy/gographs) | 52 | Go | MIT | Go package dependency graph generator | -| 78 | 2.2 | [henryhale/depgraph](https://github.com/henryhale/depgraph) | 33 | Go | MIT | Go-focused codebase dependency analysis | -| 79 | 2.2 | [2015xli/clangd-graph-rag](https://github.com/2015xli/clangd-graph-rag) | 28 | Python | Apache-2.0 | C/C++ Neo4j GraphRAG via clangd (scales to Linux kernel) | -| 80 | 2.1 | [floydw1234/badger-graph](https://github.com/floydw1234/badger-graph) | 0 | Python | None | Dgraph backend (Docker), C struct field access tracking | -| 81 | 2.0 | [crubier/code-to-graph](https://github.com/crubier/code-to-graph) | 382 | JavaScript | None | JS code → Mermaid flowchart (single-function, web demo) | -| 82 | 2.0 | [khushil/code-graph-rag](https://github.com/khushil/code-graph-rag) | 0 | Python | MIT | Fork of vitali87/code-graph-rag with no modifications | -| 83 | 2.0 | [FalkorDB/code-graph-backend](https://github.com/FalkorDB/code-graph-backend) | 26 | Python | MIT | FalkorDB (Redis-based graph) code analysis demo | -| 84 | 2.0 | [jillesvangurp/spring-depend](https://github.com/jillesvangurp/spring-depend) | 46 | Java | MIT | Spring bean dependency graph extraction | -| 85 | 2.0 | [ivan-m/SourceGraph](https://github.com/ivan-m/SourceGraph) | 27 | Haskell | GPL-3.0 | Haskell graph-theoretic code analysis (last updated 2022) | -| 86 | 2.0 | [brutski/go-code-graph](https://github.com/brutski/go-code-graph) | 13 | Go | MIT | Go codebase analyzer with MCP integration | +| 38 | 2.9 | [rahulvgmail/CodeInteliMCP](https://github.com/rahulvgmail/CodeInteliMCP) | 8 | Python | None | DuckDB + ChromaDB (zero Docker), multi-repo, lightweight embedded DBs | +| 39 | 2.8 | [Aider-AI/aider](https://github.com/Aider-AI/aider) | 42,198 | Python | Apache-2.0 | AI pair programming CLI; tree-sitter repo map with PageRank-style graph ranking for LLM context selection, 100+ languages, multi-provider LLM support, git-integrated auto-commits. Moved to Aider-AI org | +| 40 | 2.8 | [scottrogowski/code2flow](https://github.com/scottrogowski/code2flow) | 4,528 | Python | MIT | Call graphs for Python/JS/Ruby/PHP via AST, DOT output, 100% test coverage | +| 41 | 2.8 | [ysk8hori/typescript-graph](https://github.com/ysk8hori/typescript-graph) | 200 | TypeScript | None | TypeScript file-level dependency Mermaid diagrams, code metrics (MI, CC), watch mode | +| 42 | 2.8 | [nuanced-dev/nuanced-py](https://github.com/nuanced-dev/nuanced-py) | 126 | Python | MIT | Python call graph enrichment designed for AI agent consumption | +| 43 | 2.8 | [sdsrss/code-graph-mcp](https://github.com/sdsrss/code-graph-mcp) | 16 | TypeScript | MIT | AST knowledge graph MCP server with tree-sitter, 10 languages. New entrant | +| 44 | 2.8 | [Bikach/codeGraph](https://github.com/Bikach/codeGraph) | 6 | TypeScript | MIT | Neo4j graph, Claude Code slash commands, Kotlin support, 40-50% cost reduction | +| 45 | 2.8 | [ChrisRoyse/CodeGraph](https://github.com/ChrisRoyse/CodeGraph) | 65 | TypeScript | None | Neo4j + MCP, multi-language, framework detection (React, Tailwind, Supabase) | +| 46 | 2.8 | [Symbolk/Code2Graph](https://github.com/Symbolk/Code2Graph) | 48 | Java | None | Multilingual code → language-agnostic graph representation | +| 47 | 2.7 | [yumeiriowl/repo-graphrag-mcp](https://github.com/yumeiriowl/repo-graphrag-mcp) | 3 | Python | MIT | LightRAG + tree-sitter, entity merge (code ↔ docs), implementation planning tool | +| 48 | 2.7 | [davidfraser/pyan](https://github.com/davidfraser/pyan) | 712 | Python | GPL-2.0 | Python call graph generator (stable fork), DOT/SVG/HTML output, Sphinx integration | +| 49 | 2.7 | [mamuz/PhpDependencyAnalysis](https://github.com/mamuz/PhpDependencyAnalysis) | 572 | PHP | MIT | PHP dependency graphs, cycle detection, architecture verification against defined layers | +| 50 | 2.7 | [faraazahmad/graphsense](https://github.com/faraazahmad/graphsense) | 35 | TypeScript | MIT | MCP server providing code intelligence via static analysis | +| 51 | 2.7 | [JonnoC/CodeRAG](https://github.com/JonnoC/CodeRAG) | 14 | TypeScript | MIT | Enterprise code intelligence with CK metrics, Neo4j, 23 analysis tools, MCP server | +| 52 | 2.6 | [0xjcf/MCP_CodeAnalysis](https://github.com/0xjcf/MCP_CodeAnalysis) | 7 | Python/TS | None | Stateful tools (XState), Redis sessions, socio-technical analysis, dual language impl | +| 53 | 2.5 | [koknat/callGraph](https://github.com/koknat/callGraph) | 325 | Perl | GPL-3.0 | Multi-language (22+) call graph generator via regex, GraphViz output | +| 54 | 2.5 | [RaheesAhmed/code-context-mcp](https://github.com/RaheesAhmed/code-context-mcp) | 0 | Python | MIT | Security pattern detection, auto architecture diagrams, code flow tracing | +| 55 | 2.5 | [league1991/CodeAtlasVsix](https://github.com/league1991/CodeAtlasVsix) | 265 | C# | GPL-2.0 | Visual Studio plugin, Doxygen-based call graph navigation (VS 2010-2015 era) | +| 56 | 2.5 | [beicause/call-graph](https://github.com/beicause/call-graph) | 105 | TypeScript | Apache-2.0 | VS Code extension generating call graphs via LSP call hierarchy API | +| 57 | 2.5 | [Thibault-Knobloch/codebase-intelligence](https://github.com/Thibault-Knobloch/codebase-intelligence) | 44 | Python | None | Code indexing + call graph + vector DB + natural language queries (requires OpenAI) | +| 58 | 2.5 | [darkmacheken/wasmati](https://github.com/darkmacheken/wasmati) | 31 | C++ | Apache-2.0 | CPG infrastructure for scanning vulnerabilities in WebAssembly | +| 59 | 2.5 | [sutragraph/sutracli](https://github.com/sutragraph/sutracli) | 28 | Python | GPL-3.0 | AI-powered cross-repo dependency graphs for coding agents | +| 60 | 2.5 | [julianjensen/ast-flow-graph](https://github.com/julianjensen/ast-flow-graph) | 69 | JavaScript | Other | JavaScript control flow graphs from AST analysis | +| 61 | 2.5 | [yoanbernabeu/grepai-skills](https://github.com/yoanbernabeu/grepai-skills) | 14 | — | MIT | 27 AI agent skills for semantic code search and call graph analysis | +| 62 | 2.5 | [GaloisInc/MATE](https://github.com/GaloisInc/MATE) | 194 | Python | BSD-3 | DARPA-funded interactive CPG-based bug hunting for C/C++ via LLVM | +| 63 | 2.4 | [shantham/codegraph](https://github.com/shantham/codegraph) | 0 | TypeScript | MIT | Polished `npx` one-command installer, sqlite-vss, 7 MCP tools | +| 64 | 2.3 | [ozyyshr/RepoGraph](https://github.com/ozyyshr/RepoGraph) | 251 | Python | Apache-2.0 | SWE-bench code graph research (ctags + networkx for LLM context) | +| 65 | 2.3 | [emad-elsaid/rubrowser](https://github.com/emad-elsaid/rubrowser) | 644 | Ruby | MIT | Ruby-only interactive D3 force-directed dependency graph | +| 66 | 2.3 | [Chentai-Kao/call-graph-plugin](https://github.com/Chentai-Kao/call-graph-plugin) | 87 | Kotlin | None | IntelliJ plugin for visualizing call graphs in IDE | +| 67 | 2.3 | [ehabterra/apispec](https://github.com/ehabterra/apispec) | 72 | Go | Apache-2.0 | OpenAPI 3.1 spec generator from Go code via call graph analysis | +| 68 | 2.3 | [huoyo/ko-time](https://github.com/huoyo/ko-time) | 61 | Java | LGPL-2.1 | Spring Boot call graph with runtime durations | +| 69 | 2.3 | [Fraunhofer-AISEC/codyze](https://github.com/Fraunhofer-AISEC/codyze) | 91 | Kotlin | None | CPG-based analyzer for cryptographic API misuse (archived, merged into cpg repo) | +| 70 | 2.3 | [CartographAI/mcp-server-codegraph](https://github.com/CartographAI/mcp-server-codegraph) | 17 | JavaScript | MIT | Lightweight MCP code graph (3 tools only, Python/JS/Rust) | +| 71 | 2.3 | [YounesBensafia/DevLens](https://github.com/YounesBensafia/DevLens) | 21 | Python | None | Repo scanner with AI summaries, dead code detection (dep graph not yet implemented) | +| 72 | 2.3 | [0xd219b/codegraph](https://github.com/0xd219b/codegraph) | 0 | Rust | None | Pure Rust, HTTP server mode, Java + Go support | +| 73 | 2.3 | [aryx/codegraph](https://github.com/aryx/codegraph) | 6 | OCaml | Other | Multi-language source code dependency visualizer (the original "codegraph" name) | +| 74 | 2.2 | [jmarkowski/codeviz](https://github.com/jmarkowski/codeviz) | 144 | Python | MIT | C/C++ `#include` header dependency graph visualization | +| 75 | 2.2 | [juanallo/vscode-dependency-cruiser](https://github.com/juanallo/vscode-dependency-cruiser) | 76 | JavaScript | MIT | VS Code wrapper for dependency-cruiser (JS/TS) | +| 76 | 2.2 | [hidva/as2cfg](https://github.com/hidva/as2cfg) | 63 | Rust | GPL-3.0 | Intel assembly → control flow graph | +| 77 | 2.2 | [microsoft/cmd-call-graph](https://github.com/microsoft/cmd-call-graph) | 55 | Python | MIT | Call graphs for Windows CMD batch files | +| 78 | 2.2 | [siggy/gographs](https://github.com/siggy/gographs) | 52 | Go | MIT | Go package dependency graph generator | +| 79 | 2.2 | [henryhale/depgraph](https://github.com/henryhale/depgraph) | 33 | Go | MIT | Go-focused codebase dependency analysis | +| 80 | 2.2 | [2015xli/clangd-graph-rag](https://github.com/2015xli/clangd-graph-rag) | 28 | Python | Apache-2.0 | C/C++ Neo4j GraphRAG via clangd (scales to Linux kernel) | +| 81 | 2.1 | [floydw1234/badger-graph](https://github.com/floydw1234/badger-graph) | 0 | Python | None | Dgraph backend (Docker), C struct field access tracking | +| 82 | 2.0 | [crubier/code-to-graph](https://github.com/crubier/code-to-graph) | 382 | JavaScript | None | JS code → Mermaid flowchart (single-function, web demo) | +| 83 | 2.0 | [khushil/code-graph-rag](https://github.com/khushil/code-graph-rag) | 0 | Python | MIT | Fork of vitali87/code-graph-rag with no modifications | +| 84 | 2.0 | [FalkorDB/code-graph-backend](https://github.com/FalkorDB/code-graph-backend) | 26 | Python | MIT | FalkorDB (Redis-based graph) code analysis demo | +| 85 | 2.0 | [jillesvangurp/spring-depend](https://github.com/jillesvangurp/spring-depend) | 46 | Java | MIT | Spring bean dependency graph extraction | +| 86 | 2.0 | [ivan-m/SourceGraph](https://github.com/ivan-m/SourceGraph) | 27 | Haskell | GPL-3.0 | Haskell graph-theoretic code analysis (last updated 2022) | +| 87 | 2.0 | [brutski/go-code-graph](https://github.com/brutski/go-code-graph) | 13 | Go | MIT | Go codebase analyzer with MCP integration | ### Tier 3: Minimal or Inactive (score < 2.0) @@ -142,32 +143,33 @@ Ranked by weighted score across 6 dimensions (each 1–5): | 8 | arbor | 4 | 4 | 5 | 4 | 5 | 3 | | 9 | CKB | 5 | 5 | 4 | 3 | 4 | 3 | | 10 | axon | 5 | 5 | 4 | 2 | 4 | 3 | -| 11 | glimpse | 4 | 4 | 5 | 3 | 5 | 2 | -| 12 | codepropertygraph | 4 | 5 | 2 | 4 | 5 | 3 | -| 13 | codegraph-rust | 5 | 5 | 2 | 4 | 4 | 3 | -| 14 | mcp-code-graph | 4 | 3 | 4 | 4 | 3 | 4 | -| 15 | code-graph-mcp | 4 | 4 | 4 | 5 | 3 | 2 | -| 16 | jelly | 4 | 5 | 4 | 1 | 5 | 3 | -| 17 | colbymchenry/codegraph | 4 | 3 | 5 | 3 | 3 | 4 | -| 18 | code-graph-rag-mcp | 5 | 4 | 3 | 4 | 3 | 2 | -| 19 | mcp-codebase-index | 4 | 3 | 5 | 3 | 4 | 2 | -| 20 | CodeGraphMCPServer | 4 | 4 | 4 | 5 | 3 | 1 | -| 21 | stratify | 4 | 4 | 2 | 5 | 4 | 2 | -| 22 | cie | 5 | 4 | 4 | 3 | 4 | 1 | -| 23 | codexray | 5 | 4 | 4 | 4 | 3 | 1 | -| 24 | autodev-codebase | 5 | 3 | 3 | 5 | 3 | 1 | -| 25 | CodeVisualizer | 4 | 3 | 5 | 3 | 3 | 2 | -| 26 | code-health-meter | 3 | 5 | 5 | 1 | 4 | 2 | -| 27 | code-graph-analysis-pipeline | 5 | 5 | 1 | 2 | 5 | 2 | -| 28 | codebadger | 4 | 4 | 3 | 5 | 3 | 1 | -| 29 | loregrep | 3 | 3 | 4 | 3 | 5 | 2 | -| 30 | Claude-code-memory | 4 | 3 | 3 | 3 | 4 | 3 | -| 31 | claude-context-local | 4 | 3 | 3 | 4 | 4 | 1 | -| 32 | codegraph-cli | 5 | 3 | 3 | 2 | 3 | 2 | -| 33 | xnuinside/codegraph | 3 | 2 | 5 | 1 | 3 | 4 | -| 34 | java-all-call-graph | 4 | 4 | 3 | 1 | 3 | 3 | -| 35 | pyan | 3 | 3 | 5 | 1 | 4 | 2 | -| 36 | cloud-property-graph | 4 | 4 | 2 | 2 | 4 | 2 | +| 11 | CodeGraphContext | 4 | 3 | 4 | 4 | 3 | 5 | +| 12 | glimpse | 4 | 4 | 5 | 3 | 5 | 2 | +| 13 | codepropertygraph | 4 | 5 | 2 | 4 | 5 | 3 | +| 14 | codegraph-rust | 5 | 5 | 2 | 4 | 4 | 3 | +| 15 | mcp-code-graph | 4 | 3 | 4 | 4 | 3 | 4 | +| 16 | code-graph-mcp | 4 | 4 | 4 | 5 | 3 | 2 | +| 17 | jelly | 4 | 5 | 4 | 1 | 5 | 3 | +| 18 | colbymchenry/codegraph | 4 | 3 | 5 | 3 | 3 | 4 | +| 19 | code-graph-rag-mcp | 5 | 4 | 3 | 4 | 3 | 2 | +| 20 | mcp-codebase-index | 4 | 3 | 5 | 3 | 4 | 2 | +| 21 | CodeGraphMCPServer | 4 | 4 | 4 | 5 | 3 | 1 | +| 22 | stratify | 4 | 4 | 2 | 5 | 4 | 2 | +| 23 | cie | 5 | 4 | 4 | 3 | 4 | 1 | +| 24 | codexray | 5 | 4 | 4 | 4 | 3 | 1 | +| 25 | autodev-codebase | 5 | 3 | 3 | 5 | 3 | 1 | +| 26 | CodeVisualizer | 4 | 3 | 5 | 3 | 3 | 2 | +| 27 | code-health-meter | 3 | 5 | 5 | 1 | 4 | 2 | +| 28 | code-graph-analysis-pipeline | 5 | 5 | 1 | 2 | 5 | 2 | +| 29 | codebadger | 4 | 4 | 3 | 5 | 3 | 1 | +| 30 | loregrep | 3 | 3 | 4 | 3 | 5 | 2 | +| 31 | Claude-code-memory | 4 | 3 | 3 | 3 | 4 | 3 | +| 32 | claude-context-local | 4 | 3 | 3 | 4 | 4 | 1 | +| 33 | codegraph-cli | 5 | 3 | 3 | 2 | 3 | 2 | +| 34 | xnuinside/codegraph | 3 | 2 | 5 | 1 | 3 | 4 | +| 35 | java-all-call-graph | 4 | 4 | 3 | 1 | 3 | 3 | +| 36 | pyan | 3 | 3 | 5 | 1 | 4 | 2 | +| 37 | cloud-property-graph | 4 | 4 | 2 | 2 | 4 | 2 | **Scoring criteria:** - **Features** (1-5): breadth of tools, MCP integration, search, visualization, export @@ -264,23 +266,32 @@ Ranked by weighted score across 6 dimensions (each 1–5): - **Git change coupling**: Co-change analysis — *(Gap closed: we now have `co-change` command)* - **Branch structural diff**: *(Gap closed: we now have `branch-compare`)* -### vs glimpse (#11, 349 stars — stagnant) + +### vs CodeGraphContext (#11, 2,664 stars) +- **Community traction**: 2,664 stars, 497 forks, 30 contributors — much higher visibility than us (likely Hacktoberfest/social coding event-boosted, but real community) +- **Multiple graph DB backends**: KuzuDB (embedded), FalkorDB Lite, FalkorDB Remote, Neo4j — native graph traversal and raw Cypher queries. Our SQLite is simpler but less expressive for graph queries +- **14 languages**: Adds C/C++, Swift, Kotlin, Dart, Perl over our 11 +- **Pre-indexed bundle registry**: Download `.cgc` bundles for popular open-source repos — instant context without indexing. Unique in this space +- **IDE setup wizard**: Auto-configures MCP for 10+ IDEs (VS Code, Cursor, Windsurf, Claude, Gemini CLI, ChatGPT Codex, Cline, RooCode, Amazon Q, Kiro). We only support Claude Code MCP config +- **Where we win**: Significantly deeper analysis — qualified call resolution, dataflow, CFG, stored ASTs, architecture boundaries, community detection, diff-impact, role classification, semantic search, sequence diagrams, complexity metrics (cognitive, Halstead, MI), CI gates. Dual engine (native Rust + WASM) is much faster. CGC is v0.3.1 (early) vs our mature release. No semantic search, no incremental file-hash rebuilds, no confidence-scored edges, no export formats. Python-only ecosystem limits reach in Node.js/TypeScript shops + +### vs glimpse (#12, 349 stars — stagnant) - **LLM workflow optimization**: clipboard-first output + token counting + XML output mode — purpose-built for "code → LLM context" - **LSP-based call resolution**: compiler-grade accuracy vs our tree-sitter heuristic approach - **Web content processing**: can fetch URLs and convert HTML to markdown for context -### vs codegraph-rust (#13, 142 stars) +### vs codegraph-rust (#14, 142 stars) - **LSP-powered analysis**: compiler-grade cross-file references via rust-analyzer, pyright, gopls vs our tree-sitter heuristics - **Dataflow edges**: defines/uses/flows_to/returns/mutates relationships — *(Gap closed: we now have `flows_to`/`returns`/`mutates` across all 11 languages)* - **Architecture boundary enforcement**: *(Gap closed: we now have `boundaries` command with onion/hexagonal/layered/clean presets)* - **Tiered indexing**: fast/balanced/full modes for different use cases — we have one mode -### vs jelly (#16, 423 stars) +### vs jelly (#17, 423 stars) - **Points-to analysis**: flow-insensitive analysis with access paths for JS/TS — fundamentally more precise than our tree-sitter-based call resolution - **Academic rigor**: 5 published papers backing the methodology (Aarhus University) - **Vulnerability exposure analysis**: library usage pattern matching specific to the JS/TS ecosystem -### vs aider (#38, 42,198 stars — now Aider-AI/aider) +### vs aider (#39, 42,198 stars — now Aider-AI/aider) - **Different product category**: Aider is an AI pair programming CLI, not a code graph tool — but its tree-sitter repo map with PageRank-style graph ranking is a lightweight alternative to our full graph for LLM context selection - **Massive community**: 42,198 stars, 4,054 forks — orders of magnitude more traction than any tool in this space. Aider *is* the category leader for AI-assisted coding in the terminal. Moved to Aider-AI org - **100+ languages**: tree-sitter parsing covers far more languages than our 11, though only for identifier extraction (not full symbol/call resolution) @@ -288,7 +299,7 @@ Ranked by weighted score across 6 dimensions (each 1–5): - **Built-in code editing**: Aider's core loop is "understand code → edit code → commit." We provide the understanding layer but don't edit - **Where we win**: Aider's repo map is shallow — file-level dependency graph with identifier ranking, no function-level call resolution, no impact analysis, no dead code detection, no complexity metrics, no MCP server, no standalone queryable graph. It answers "what's relevant?" but not "what breaks if I change this?" Our graph is deeper and persistent; Aider rebuilds its map per-request -### vs colbymchenry/codegraph (#17, 308 stars — nearly doubled) +### vs colbymchenry/codegraph (#18, 308 stars — nearly doubled) - **Fastest-growing naming competitor**: 165 → 308 stars since Feb. Same name, same tech stack (tree-sitter + SQLite + MCP + Node.js) — marketplace confusion is increasing - **Published benchmarks**: 67% fewer tool calls and measurable Claude Code token reduction — compelling marketing. *(Gap closed: our `context`, `audit`, and `batch` compound commands provide equivalent or better token savings)* - **One-liner setup**: `npx @colbymchenry/codegraph` with interactive installer auto-configures Claude Code