diff --git a/src/planner.rs b/src/planner.rs index b01e0e0..2af009b 100644 --- a/src/planner.rs +++ b/src/planner.rs @@ -340,12 +340,20 @@ async fn usearch_execute( if params.physical_filters.is_empty() { // ── Unfiltered path ─────────────────────────────────────────────── - let matches = usearch_search( - ®istered.index, - ¶ms.query_vec, - params.k, - params.scalar_kind, - )?; + let matches = { + let _span = tracing::info_span!( + "usearch_hnsw_search", + usearch.k = params.k, + usearch.dims = params.query_vec.len(), + ) + .entered(); + usearch_search( + ®istered.index, + ¶ms.query_vec, + params.k, + params.scalar_kind, + )? + }; if matches.keys.is_empty() { return Ok(vec![]); @@ -358,12 +366,21 @@ async fn usearch_execute( .map(|(&k, &d)| (k, d)) .collect(); - let data_batches = registered - .lookup_provider - .fetch_by_keys(&matches.keys, ¶ms.key_col, None) - .await?; + let fetch_keys_count = matches.keys.len(); + let data_batches = async { + registered + .lookup_provider + .fetch_by_keys(&matches.keys, ¶ms.key_col, None) + .await + } + .instrument(tracing::info_span!( + "usearch_sqlite_fetch", + usearch.fetch_keys = fetch_keys_count, + )) + .await?; let key_col_idx = provider_key_col_idx(®istered)?; + let _span = tracing::info_span!("usearch_attach_distances").entered(); attach_distances(data_batches, key_col_idx, &key_to_dist, ¶ms.schema) } else { // ── Adaptive filtered path ──────────────────────────────────────── @@ -490,17 +507,28 @@ async fn adaptive_filtered_execute( .map(|(&k, &d)| (k, d)) .collect(); - let data_batches = registered - .lookup_provider - .fetch_by_keys(&matches.keys, ¶ms.key_col, None) - .await?; - - let result_batches = attach_distances( - data_batches, - lookup_key_col_idx, - &key_to_dist, - ¶ms.schema, - )?; + let fetch_keys_count = matches.keys.len(); + let data_batches = async { + registered + .lookup_provider + .fetch_by_keys(&matches.keys, ¶ms.key_col, None) + .await + } + .instrument(tracing::info_span!( + "usearch_sqlite_fetch", + usearch.fetch_keys = fetch_keys_count, + )) + .await?; + + let result_batches = { + let _span = tracing::info_span!("usearch_attach_distances").entered(); + attach_distances( + data_batches, + lookup_key_col_idx, + &key_to_dist, + ¶ms.schema, + )? + }; tracing::Span::current().record( "usearch.result_count", diff --git a/src/sqlite_provider.rs b/src/sqlite_provider.rs index 19572df..6fa723f 100644 --- a/src/sqlite_provider.rs +++ b/src/sqlite_provider.rs @@ -104,6 +104,88 @@ fn open_conn(db_path: &str) -> DFResult { Ok(conn) } +/// Ensure the key column has an index. If the table was created with +/// `INTEGER PRIMARY KEY` the rowid alias already serves as the index and +/// this is a no-op. For tables created without a PK (pre-fix builds) we +/// create a secondary index so point lookups use the B-tree instead of a +/// full table scan. +fn ensure_key_index(conn: &Connection, table_name: &str, key_col: &str) -> DFResult<()> { + // Check if the key column is the INTEGER PRIMARY KEY (rowid alias). + // In that case SQLite already uses the rowid B-tree — no extra index needed. + let is_pk: bool = conn + .query_row( + &format!( + "SELECT pk FROM pragma_table_info({tn}) WHERE name = ?1", + tn = quote_ident(table_name) + ), + rusqlite::params![key_col], + |row| row.get::<_, i64>(0), + ) + .map(|pk| pk > 0) + .unwrap_or(false); + + if is_pk { + return Ok(()); + } + + // Check if any existing index covers the key column using pragmas + // (avoids brittle SQL text matching against sqlite_master). + let has_index: bool = { + let mut found = false; + let mut idx_stmt = conn + .prepare(&format!( + "SELECT name FROM pragma_index_list({tn})", + tn = quote_ident(table_name) + )) + .map_err(|e| DataFusionError::Execution(e.to_string()))?; + let idx_names: Vec = idx_stmt + .query_map([], |row| row.get::<_, String>(0)) + .map_err(|e| DataFusionError::Execution(e.to_string()))? + .filter_map(|r| r.ok()) + .collect(); + for idx_name in idx_names { + let col_name: Option = conn + .query_row( + &format!( + "SELECT name FROM pragma_index_info({idx})", + idx = quote_ident(&idx_name) + ), + [], + |row| row.get::<_, String>(0), + ) + .ok(); + if col_name.as_deref() == Some(key_col) { + found = true; + break; + } + } + found + }; + + if has_index { + return Ok(()); + } + + tracing::warn!( + "SQLite table '{}': key column '{}' has no index — creating one (one-time migration).", + table_name, + key_col, + ); + conn.execute( + &format!( + "CREATE INDEX IF NOT EXISTS {idx} ON {tn}({col})", + idx = quote_ident(&format!("idx_{table_name}_{key_col}")), + tn = quote_ident(table_name), + col = quote_ident(key_col), + ), + [], + ) + .map_err(|e| DataFusionError::Execution(format!("failed to create key index: {e}")))?; + + tracing::info!("Created index on '{}'.'{}'", table_name, key_col,); + Ok(()) +} + impl SqliteLookupProvider { /// Open the existing SQLite database at `db_path`, or build it from /// parquet files on first run. Opens a pool of `pool_size` read @@ -152,6 +234,10 @@ impl SqliteLookupProvider { table_name, n ); + // Ensure the key column is indexed. Tables built before the + // INTEGER PRIMARY KEY fix may lack any index on the key column, + // turning every point lookup into a full table scan. + ensure_key_index(&conn, table_name, &key_col)?; } else { tracing::info!( "First run: building SQLite table '{}' (one-time).",