-
Notifications
You must be signed in to change notification settings - Fork 4
perf(ast): bulk-insert AST nodes via native Rust/rusqlite #651
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
42fafca
perf(ast): bulk-insert AST nodes via native Rust/rusqlite
carlos-alm 89a1ddb
fix(ast): add busy_timeout pragma to Rust SQLite connection (#651)
carlos-alm 4d91d0b
fix(ast): fall back to JS when native bulk insert count mismatches (#…
carlos-alm d4cbd32
docs(cargo): document rusqlite bundled feature rationale (#651)
carlos-alm 5be93ca
fix(ast): match JS findParentDef semantics for null end_line (#651)
carlos-alm 51bf006
Merge branch 'main' into perf/ast-node-bulk-insert
carlos-alm 6dc0a80
fix(ast): treat row-level execute errors as fatal for transaction (#651)
carlos-alm b406301
Merge branch 'perf/ast-node-bulk-insert' of https://github.com/optave…
carlos-alm File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,165 @@ | ||
| //! Bulk AST node insertion via rusqlite. | ||
| //! | ||
| //! Bypasses the JS iteration loop by opening the SQLite database directly | ||
| //! from Rust and inserting all AST nodes in a single transaction. | ||
| //! Parent node IDs are resolved by querying the `nodes` table. | ||
|
|
||
| use std::collections::HashMap; | ||
|
|
||
| use napi_derive::napi; | ||
| use rusqlite::{params, Connection, OpenFlags}; | ||
| use serde::{Deserialize, Serialize}; | ||
|
|
||
| /// A single AST node to insert (received from JS). | ||
| #[napi(object)] | ||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||
| pub struct AstInsertNode { | ||
| pub line: u32, | ||
| pub kind: String, | ||
| pub name: String, | ||
| pub text: Option<String>, | ||
| pub receiver: Option<String>, | ||
| } | ||
|
|
||
| /// A batch of AST nodes for a single file. | ||
| #[napi(object)] | ||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||
| pub struct FileAstBatch { | ||
| pub file: String, | ||
| pub nodes: Vec<AstInsertNode>, | ||
| } | ||
|
|
||
| /// A definition row from the `nodes` table used for parent resolution. | ||
| struct NodeDef { | ||
| id: i64, | ||
| line: u32, | ||
| end_line: Option<u32>, | ||
| } | ||
|
|
||
| /// Find the narrowest enclosing definition for a given source line. | ||
| /// Returns the node ID of the best match, or None if no definition encloses this line. | ||
| /// | ||
| /// Mirrors the JS `findParentDef` semantics: a definition with `end_line = NULL` | ||
| /// is treated as always enclosing, with a negative sentinel span so it is preferred | ||
| /// over definitions that have an explicit (wider) `end_line`. | ||
| fn find_parent_id(defs: &[NodeDef], line: u32) -> Option<i64> { | ||
| let mut best_id: Option<i64> = None; | ||
| let mut best_span: i64 = i64::MAX; | ||
| for d in defs { | ||
| if d.line <= line { | ||
| let span: i64 = match d.end_line { | ||
| Some(el) if el >= line => (el - d.line) as i64, | ||
| Some(_) => continue, | ||
| // JS: (def.endLine ?? 0) - def.line → negative, always preferred | ||
| None => -(d.line as i64), | ||
| }; | ||
| if span < best_span { | ||
| best_id = Some(d.id); | ||
| best_span = span; | ||
| } | ||
| } | ||
| } | ||
| best_id | ||
| } | ||
|
|
||
| /// Bulk-insert AST nodes into the database, resolving `parent_node_id` | ||
| /// from the `nodes` table. Runs all inserts in a single SQLite transaction. | ||
| /// | ||
| /// Returns the number of rows inserted. Returns 0 on any error (DB open | ||
| /// failure, missing table, transaction failure). | ||
| #[napi] | ||
| pub fn bulk_insert_ast_nodes(db_path: String, batches: Vec<FileAstBatch>) -> u32 { | ||
| if batches.is_empty() { | ||
| return 0; | ||
| } | ||
|
|
||
| let flags = OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX; | ||
| let mut conn = match Connection::open_with_flags(&db_path, flags) { | ||
| Ok(c) => c, | ||
| Err(_) => return 0, | ||
| }; | ||
|
|
||
| // Match the JS-side performance pragmas (including busy_timeout for WAL contention) | ||
| let _ = conn.execute_batch( | ||
| "PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000", | ||
| ); | ||
|
|
||
| // Bail out if the ast_nodes table doesn't exist (schema too old) | ||
| let has_table: bool = conn | ||
| .prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name='ast_nodes'") | ||
| .and_then(|mut s| s.query_row([], |_| Ok(true))) | ||
| .unwrap_or(false); | ||
| if !has_table { | ||
| return 0; | ||
| } | ||
|
|
||
| // ── Phase 1: Pre-fetch node definitions for parent resolution ──────── | ||
| let mut file_defs: HashMap<String, Vec<NodeDef>> = HashMap::new(); | ||
| { | ||
| let Ok(mut stmt) = | ||
| conn.prepare("SELECT id, line, end_line FROM nodes WHERE file = ?1") | ||
| else { | ||
| return 0; | ||
| }; | ||
|
|
||
| for batch in &batches { | ||
| if batch.nodes.is_empty() || file_defs.contains_key(&batch.file) { | ||
| continue; | ||
| } | ||
| let defs: Vec<NodeDef> = stmt | ||
| .query_map(params![&batch.file], |row| { | ||
| Ok(NodeDef { | ||
| id: row.get(0)?, | ||
| line: row.get(1)?, | ||
| end_line: row.get(2)?, | ||
| }) | ||
| }) | ||
| .map(|rows| rows.filter_map(|r| r.ok()).collect()) | ||
| .unwrap_or_default(); | ||
| file_defs.insert(batch.file.clone(), defs); | ||
| } | ||
| } // `stmt` dropped — releases the immutable borrow on `conn` | ||
|
|
||
| // ── Phase 2: Bulk insert in a single transaction ───────────────────── | ||
| let Ok(tx) = conn.transaction() else { | ||
| return 0; | ||
| }; | ||
|
|
||
| let mut total = 0u32; | ||
| { | ||
| let Ok(mut insert_stmt) = tx.prepare( | ||
| "INSERT INTO ast_nodes (file, line, kind, name, text, receiver, parent_node_id) \ | ||
| VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", | ||
| ) else { | ||
| return 0; | ||
| }; | ||
|
|
||
| for batch in &batches { | ||
| let empty = Vec::new(); | ||
| let defs = file_defs.get(&batch.file).unwrap_or(&empty); | ||
|
|
||
| for node in &batch.nodes { | ||
| let parent_id = find_parent_id(defs, node.line); | ||
|
|
||
| match insert_stmt.execute(params![ | ||
| &batch.file, | ||
| node.line, | ||
| &node.kind, | ||
| &node.name, | ||
| &node.text, | ||
| &node.receiver, | ||
| parent_id, | ||
| ]) { | ||
| Ok(_) => total += 1, | ||
| Err(_) => return 0, // abort; tx rolls back on drop | ||
| } | ||
| } | ||
| } | ||
| } // `insert_stmt` dropped | ||
|
|
||
| if tx.commit().is_err() { | ||
| return 0; | ||
| } | ||
|
|
||
| total | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,4 @@ | ||
| pub mod ast_db; | ||
| pub mod cfg; | ||
| pub mod complexity; | ||
| pub mod constants; | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bundledfeature embeds a second independent copy of SQLite into the processWith
features = ["bundled"], rusqlite statically compiles its own SQLite C library. better-sqlite3 already bundles its own copy. The process will therefore contain two separate SQLite runtime instances that each open the same.dbfile via OS-level file locking.This is functionally correct — WAL-mode coordination is handled by the OS — but it has a few downsides:
If the target platform is guaranteed to have a system SQLite (Linux/macOS), removing
bundledand relying onlibsqlite3-syswith dynamic linking would keep a single SQLite instance. If portability is the priority (e.g., Windows CI without system SQLite),bundledis the safer choice — just worth a conscious decision here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a comment documenting the rationale:
bundledis intentional because Windows CI lacks a system SQLite, and WAL coordination between the two instances is handled safely at the OS level. Removing it would break Windows builds.