Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 158 additions & 1 deletion crates/codegraph-core/src/native_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
//! Any changes there MUST be reflected here (and vice-versa).

use napi_derive::napi;
use rusqlite::{params, Connection, OpenFlags};
use rusqlite::{params, types::ValueRef, Connection, OpenFlags};
use send_wrapper::SendWrapper;

use crate::ast_db::{self, FileAstBatch};
Expand Down Expand Up @@ -549,6 +549,104 @@ impl NativeDatabase {
Ok(())
}

// ── Phase 6.16: Generic query execution & version validation ────────

/// Execute a parameterized query and return all rows as JSON objects.
/// Each row is a `{ column_name: value, ... }` object.
/// Params are positional (`?1, ?2, ...`) and accept string, number, or null.
///
/// **Note**: Designed for SELECT statements. Passing DML/DDL will not error
/// at the Rust layer but is not an intended use — all current callers pass
/// SELECT-only SQL generated by `NodeQuery.build()`.
#[napi]
pub fn query_all(
&self,
sql: String,
params: Vec<serde_json::Value>,
) -> napi::Result<Vec<serde_json::Value>> {
let conn = self.conn()?;
let rusqlite_params = json_to_rusqlite_params(&params)?;
let param_refs: Vec<&dyn rusqlite::types::ToSql> =
rusqlite_params.iter().map(|v| v as &dyn rusqlite::types::ToSql).collect();

let mut stmt = conn
.prepare(&sql)
.map_err(|e| napi::Error::from_reason(format!("queryAll prepare failed: {e}")))?;

let col_count = stmt.column_count();
let col_names: Vec<String> = (0..col_count)
.map(|i| stmt.column_name(i).unwrap_or("?").to_owned())
.collect();

let rows = stmt
.query_map(param_refs.as_slice(), |row| {
Ok(row_to_json(row, col_count, &col_names))
})
.map_err(|e| napi::Error::from_reason(format!("queryAll query failed: {e}")))?;

let mut result = Vec::new();
for row in rows {
let val =
row.map_err(|e| napi::Error::from_reason(format!("queryAll row failed: {e}")))?;
result.push(val);
}
Ok(result)
}

/// Execute a parameterized query and return the first row, or null.
/// See `query_all` for parameter and contract details.
#[napi]
pub fn query_get(
&self,
sql: String,
params: Vec<serde_json::Value>,
) -> napi::Result<Option<serde_json::Value>> {
let conn = self.conn()?;
let rusqlite_params = json_to_rusqlite_params(&params)?;
let param_refs: Vec<&dyn rusqlite::types::ToSql> =
rusqlite_params.iter().map(|v| v as &dyn rusqlite::types::ToSql).collect();

let mut stmt = conn
.prepare(&sql)
.map_err(|e| napi::Error::from_reason(format!("queryGet prepare failed: {e}")))?;

let col_count = stmt.column_count();
let col_names: Vec<String> = (0..col_count)
.map(|i| stmt.column_name(i).unwrap_or("?").to_owned())
.collect();

let mut query_rows = stmt
.query(param_refs.as_slice())
.map_err(|e| napi::Error::from_reason(format!("queryGet query failed: {e}")))?;

match query_rows.next() {
Ok(Some(row)) => Ok(Some(row_to_json(row, col_count, &col_names))),
Ok(None) => Ok(None),
Err(e) => Err(napi::Error::from_reason(format!(
"queryGet row failed: {e}"
))),
}
}

/// Validate that the DB's codegraph_version matches the expected version.
/// Returns `true` if versions match or no version is stored.
/// Prints a warning to stderr on mismatch.
#[napi]
pub fn validate_schema_version(&self, expected_version: String) -> napi::Result<bool> {
let stored = self.get_build_meta("codegraph_version".to_string())?;
match stored {
None => Ok(true),
Some(ref v) if v == &expected_version => Ok(true),
Some(v) => {
eprintln!(
"[codegraph] DB was built with v{v}, running v{expected_version}. \
Consider: codegraph build --no-incremental"
);
Ok(false)
}
}
}

// ── Phase 6.15: Build pipeline write operations ─────────────────────

/// Bulk-insert nodes, children, containment edges, exports, and file hashes.
Expand Down Expand Up @@ -698,3 +796,62 @@ fn has_column(conn: &Connection, table: &str, column: &str) -> bool {
Err(_) => false,
}
}

/// Convert a JSON param array to rusqlite-compatible values.
fn json_to_rusqlite_params(
params: &[serde_json::Value],
) -> napi::Result<Vec<rusqlite::types::Value>> {
params
.iter()
.enumerate()
.map(|(i, v)| match v {
serde_json::Value::Null => Ok(rusqlite::types::Value::Null),
serde_json::Value::Number(n) => {
if let Some(int) = n.as_i64() {
Ok(rusqlite::types::Value::Integer(int))
} else if let Some(float) = n.as_f64() {
Ok(rusqlite::types::Value::Real(float))
} else {
Err(napi::Error::from_reason(format!(
"param[{i}]: unsupported number {n}"
)))
}
}
serde_json::Value::String(s) => Ok(rusqlite::types::Value::Text(s.clone())),
other => Err(napi::Error::from_reason(format!(
"param[{i}]: unsupported type {}",
other
))),
})
.collect()
}

/// Convert a rusqlite row to a serde_json::Value object.
///
/// **Contract**: Only Integer, Real, Text, and Null column types are supported.
/// BLOB columns are mapped to `null` because the current codegraph schema has no
/// BLOB columns and the generic query path is not designed for binary data.
/// Cell-level read errors are also mapped to `null` to avoid partial-row failures.
fn row_to_json(
row: &rusqlite::Row<'_>,
col_count: usize,
col_names: &[String],
) -> serde_json::Value {
let mut map = serde_json::Map::with_capacity(col_count);
for i in 0..col_count {
let val = match row.get_ref(i) {
Ok(ValueRef::Integer(n)) => serde_json::json!(n),
Ok(ValueRef::Real(f)) => serde_json::json!(f),
Ok(ValueRef::Text(s)) => {
serde_json::Value::String(String::from_utf8_lossy(s).into_owned())
}
Ok(ValueRef::Null) => serde_json::Value::Null,
// BLOB: no codegraph schema columns use BLOB; map to null (see contract above)
Ok(ValueRef::Blob(_)) => serde_json::Value::Null,
// Cell read error: map to null to avoid partial-row failures
Err(_) => serde_json::Value::Null,
};
map.insert(col_names[i].clone(), val);
}
serde_json::Value::Object(map)
}
127 changes: 127 additions & 0 deletions docs/migration/db-prepare-audit.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# `db.prepare()` Migration Audit

> **Phase 6.16** — Audit of all direct `better-sqlite3` `.prepare()` calls.
> Goal: every call routes through either `Repository` or `NativeDatabase` methods.

## Summary

| Tier | Layer | Files | Calls | Status |
|------|-------|-------|-------|--------|
| 0 | DB infrastructure | 4 | 7 | Done (repository + migrations) |
| 0 | Starter migrations | 2 | 3 | Done (6.16 PR) |
| 1 | Build pipeline | 7 | 52 | Next — ctx.nativeDb available |
| 2 | Domain analysis | 8 | 29 | Requires NativeDatabase in read path |
| 3 | Features | 14 | 94 | Requires NativeDatabase in read path |
| 3 | Shared utilities | 3 | 9 | Requires NativeDatabase in read path |
| — | **Total** | **43** | **194** | — |

## Tier 0 — Already Abstracted

These are either inside the Repository pattern or in schema migration code.

| File | Calls | Notes |
|------|-------|-------|
| `db/repository/build-stmts.ts` | 3 | Repository layer |
| `db/repository/cfg.ts` | 1 | Repository layer |
| `db/migrations.ts` | 3 | Schema DDL — keep as-is |

## Tier 0 — Starter Migrations (6.16 PR)

Converted to `nativeDb` dispatch in the 6.16 PR:

| File | Calls | What |
|------|-------|------|
| `domain/graph/builder/stages/detect-changes.ts` | 2 | file_hashes probe + full read |
| `domain/graph/builder/stages/build-structure.ts` | 1 | file node count |

## Tier 1 — Build Pipeline (ctx.nativeDb available)

These run during the build pipeline where `ctx.nativeDb` is already open.
Migrate using the same `ctx.nativeDb ? nativeDb.queryAll/queryGet(...) : db.prepare(...)` pattern.

| File | Calls | What |
|------|-------|------|
| `domain/graph/builder/stages/build-structure.ts` | 10 | dir metrics, role UPDATEs, line counts |
| `domain/graph/builder/stages/detect-changes.ts` | 7 | journal queries, mtime checks, CFG count |
| `domain/graph/builder/incremental.ts` | 6 | incremental rebuild queries |
| `domain/graph/builder/stages/build-edges.ts` | 5 | edge dedup, containment edges |
| `domain/graph/builder/stages/finalize.ts` | 5 | build metadata, embedding count |
| `domain/graph/builder/stages/resolve-imports.ts` | 4 | import resolution lookups |
| `domain/graph/builder/stages/insert-nodes.ts` | 3 | node insertion (JS fallback path) |
| `domain/graph/builder/stages/collect-files.ts` | 2 | file collection queries |
| `domain/graph/builder/helpers.ts` | 2 | utility queries |
| `domain/graph/watcher.ts` | 9 | watch mode incremental |

## Tier 2 — Domain Analysis (query-time, read-only)

These run in the query pipeline which currently uses `openReadonlyOrFail()` (better-sqlite3 only).
Migrating these requires adding NativeDatabase to the read path.

| File | Calls | What |
|------|-------|------|
| `domain/analysis/module-map.ts` | 20 | Module map queries (heaviest file) |
| `domain/analysis/symbol-lookup.ts` | 2 | Symbol search |
| `domain/analysis/dependencies.ts` | 2 | Dependency queries |
| `domain/analysis/diff-impact.ts` | 1 | Diff impact analysis |
| `domain/analysis/exports.ts` | 1 | Export analysis |
| `domain/analysis/fn-impact.ts` | 1 | Function impact |
| `domain/analysis/roles.ts` | 1 | Role queries |
| `domain/search/generator.ts` | 4 | Embedding generation |
| `domain/search/stores/fts5.ts` | 1 | FTS5 search |
| `domain/search/search/keyword.ts` | 1 | Keyword search |
| `domain/search/search/prepare.ts` | 1 | Search preparation |

## Tier 3 — Features Layer (query-time, read-only)

Same dependency as Tier 2 — requires NativeDatabase in the read path.

| File | Calls | What |
|------|-------|------|
| `features/structure.ts` | 21 | Structure analysis (heaviest) |
| `features/export.ts` | 13 | Graph export |
| `features/dataflow.ts` | 10 | Dataflow analysis |
| `features/structure-query.ts` | 9 | Structure queries |
| `features/audit.ts` | 7 | Audit command |
| `features/cochange.ts` | 6 | Co-change analysis |
| `features/branch-compare.ts` | 4 | Branch comparison |
| `features/check.ts` | 3 | CI check predicates |
| `features/owners.ts` | 3 | CODEOWNERS integration |
| `features/cfg.ts` | 2 | Control flow graph |
| `features/ast.ts` | 2 | AST queries |
| `features/manifesto.ts` | 2 | Rule engine |
| `features/sequence.ts` | 2 | Sequence diagrams |
| `features/complexity.ts` | 1 | Complexity metrics |
| `features/boundaries.ts` | 1 | Architecture boundaries |
| `features/shared/find-nodes.ts` | 1 | Shared node finder |

## Tier 3 — Shared Utilities

| File | Calls | What |
|------|-------|------|
| `shared/generators.ts` | 4 | Generator utilities |
| `shared/hierarchy.ts` | 4 | Hierarchy traversal |
| `shared/normalize.ts` | 1 | Normalization helpers |

## Migration Recipe

### For Tier 1 (build pipeline):
```typescript
// Before:
const row = db.prepare('SELECT ...').get(...args);

// After:
const sql = 'SELECT ...';
const row = ctx.nativeDb
? ctx.nativeDb.queryGet(sql, [...args])
: db.prepare(sql).get(...args);
```

### For Tiers 2-3 (query pipeline):
Requires adding a `nativeDb` parameter to query-path functions, or opening
a NativeDatabase in `openReadonlyOrFail()`. This is phase 6.17+ work.

## Decision Log

- **`iterate()` stays on better-sqlite3**: rusqlite can't stream across FFI. Only used by `iterateFunctionNodes` — bounded row counts.
- **Migrations stay as-is**: Schema DDL runs once, no performance concern.
- **Features/analysis layers blocked on read-path NativeDatabase**: These only have a better-sqlite3 handle via `openReadonlyOrFail()`. Adding NativeDatabase to the read path is a phase 6.17 prerequisite.
17 changes: 9 additions & 8 deletions docs/roadmap/ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -1322,16 +1322,17 @@ Structure building is unchanged — at 22ms it's already fast.

### 6.16 -- Dynamic SQL & Edge Cases

**Not started.** Handle the remaining non-trivial DB patterns that don't map cleanly to fixed Repository methods.
**Done.** Generic parameterized query execution on NativeDatabase, connection lifecycle helpers, version validation, and `db.prepare()` audit.

**Plan:**
- **`NodeQuery` builder edge cases:** Ensure the Rust-side replica handles all filter combinations, JOIN paths, ORDER BY variations, and LIMIT/OFFSET correctly — fuzz-test with random filter combinations against the JS builder
- **`openReadonlyOrFail` version-check logic:** Port the schema-version validation that runs on read-only DB opens
- **Advisory lock mechanism:** Keep in JS (filesystem-based, not SQLite) — ensure `NativeDatabase.close()` integrates with the existing lock lifecycle
- **`closeDbDeferred` / WAL checkpoint deferral:** Keep deferred-close logic in JS, call `NativeDatabase.close()` when ready
- **Raw `db.prepare()` stragglers:** Audit all 383 callers of `.prepare()` and ensure every one routes through either `Repository` or `NativeDatabase` methods — no direct better-sqlite3 usage on the native path
**Delivered:**
- **`NativeDatabase.queryAll` / `queryGet`:** Generic parameterized SELECT execution via rusqlite, returning rows as JSON objects. Uses `serde_json::Value` for dynamic column support
- **`NodeQuery` native dispatch:** `all()` and `get()` accept optional `nativeDb` parameter for rusqlite execution. Combinatorial parity test suite covers all filter/JOIN/ORDER BY combinations
- **`NativeDatabase.validateSchemaVersion`:** Schema version check for future read-path callers
- **`closeDbPair` / `closeDbPairDeferred`:** Unified connection lifecycle helpers — close NativeDatabase first (fast), then better-sqlite3 (WAL checkpoint). Replaces manual close sequences in `finalize.ts` and `pipeline.ts`
- **Starter straggler migrations:** 3 build-pipeline reads in `detect-changes.ts` and `build-structure.ts` dispatch through `nativeDb` when available
- **`db.prepare()` audit:** 194 calls across 43 files documented in `docs/migration/db-prepare-audit.md` with tiered migration path (Tier 0 done, Tier 1 build pipeline next, Tiers 2-3 blocked on read-path NativeDatabase)

**Affected files:** `crates/codegraph-core/src/native_db.rs`, `src/db/connection.ts`, `src/db/query-builder.ts`, `src/db/repository/sqlite-repository.ts`
**Affected files:** `crates/codegraph-core/src/native_db.rs`, `src/db/connection.ts`, `src/db/query-builder.ts`, `src/db/repository/nodes.ts`, `src/types.ts`, `src/domain/graph/builder/stages/finalize.ts`, `src/domain/graph/builder/pipeline.ts`, `src/domain/graph/builder/stages/detect-changes.ts`, `src/domain/graph/builder/stages/build-structure.ts`

### 6.17 -- Cleanup & better-sqlite3 Isolation

Expand Down
14 changes: 14 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@
"description": "Local code graph CLI — parse codebases with tree-sitter, build dependency graphs, query them",
"type": "module",
"main": "dist/index.js",
"imports": {
"#shared/*": { "@codegraph/source": "./src/shared/*", "default": "./dist/shared/*" },
"#infrastructure/*": { "@codegraph/source": "./src/infrastructure/*", "default": "./dist/infrastructure/*" },
"#db/*": { "@codegraph/source": "./src/db/*", "default": "./dist/db/*" },
"#domain/*": { "@codegraph/source": "./src/domain/*", "default": "./dist/domain/*" },
"#features/*": { "@codegraph/source": "./src/features/*", "default": "./dist/features/*" },
"#presentation/*": { "@codegraph/source": "./src/presentation/*", "default": "./dist/presentation/*" },
"#graph/*": { "@codegraph/source": "./src/graph/*", "default": "./dist/graph/*" },
"#mcp/*": { "@codegraph/source": "./src/mcp/*", "default": "./dist/mcp/*" },
"#ast-analysis/*": { "@codegraph/source": "./src/ast-analysis/*", "default": "./dist/ast-analysis/*" },
"#extractors/*": { "@codegraph/source": "./src/extractors/*", "default": "./dist/extractors/*" },
"#cli/*": { "@codegraph/source": "./src/cli/*", "default": "./dist/cli/*" },
"#types": { "@codegraph/source": "./src/types.ts", "default": "./dist/types.js" }
},
"exports": {
".": {
"import": "./dist/index.js",
Expand Down
Loading
Loading