From af1ec9fb1572025f92a2f3e06eb1577b6aeb46e5 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 28 Mar 2026 09:54:08 -0400 Subject: [PATCH 01/24] chore: update gitignore and remove dead return-borrowed docs Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 3 ++- src/tx/iter/mod.rs | 11 ++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 298bb47..90c2997 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ .DS_Store Cargo.lock .idea/ -mdbx-sys/target/ \ No newline at end of file +mdbx-sys/target/ +docs/ \ No newline at end of file diff --git a/src/tx/iter/mod.rs b/src/tx/iter/mod.rs index 57b3859..9eff7ee 100644 --- a/src/tx/iter/mod.rs +++ b/src/tx/iter/mod.rs @@ -30,15 +30,8 @@ //! # Dirty Page Handling //! //! In read-write transactions, database pages may be "dirty" (modified but -//! not yet committed). The behavior of `Cow<[u8]>` depends on the -//! `return-borrowed` feature: -//! -//! - **With `return-borrowed`**: Always returns `Cow::Borrowed`, even for -//! dirty pages. This is faster but the data may change if the transaction -//! modifies it later. -//! -//! - **Without `return-borrowed`** (default): Dirty pages are copied to -//! `Cow::Owned`. This is safer but allocates more. +//! not yet committed). When using `Cow<[u8]>`, dirty pages are copied to +//! `Cow::Owned` while clean pages are borrowed as `Cow::Borrowed`. //! //! # Example //! From 24312c9fd96b1e20018f195ef558cff7fb27554f Mon Sep 17 00:00:00 2001 From: James Date: Sat, 28 Mar 2026 09:58:47 -0400 Subject: [PATCH 02/24] bench: add cursor write operation benchmarks Adds benches/cursor_write.rs with cursor put, del, append, and append_dup benchmarks in both sync and unsync (single-thread) variants. Co-Authored-By: Claude Sonnet 4.6 --- Cargo.toml | 4 + benches/cursor_write.rs | 225 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 100644 benches/cursor_write.rs diff --git a/Cargo.toml b/Cargo.toml index d9f8d5f..6b8285d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,3 +68,7 @@ harness = false [[bench]] name = "deletion" harness = false + +[[bench]] +name = "cursor_write" +harness = false diff --git a/benches/cursor_write.rs b/benches/cursor_write.rs new file mode 100644 index 0000000..a666b42 --- /dev/null +++ b/benches/cursor_write.rs @@ -0,0 +1,225 @@ +#![allow(missing_docs, dead_code)] +mod utils; + +use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; +use signet_libmdbx::{DatabaseFlags, WriteFlags}; +use utils::*; + +const N: u32 = 100; +const DUPSORT_DB: &str = "dupsort_bench"; + +/// Set up a plain (no named sub-databases) environment with N key-value pairs. +fn setup_plain_env(n: u32) -> (tempfile::TempDir, signet_libmdbx::Environment) { + let dir = tempfile::tempdir().unwrap(); + let env = signet_libmdbx::Environment::builder().open(dir.path()).unwrap(); + { + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + for i in 0..n { + txn.put(db, get_key(i), get_data(i), WriteFlags::empty()).unwrap(); + } + txn.commit().unwrap(); + } + (dir, env) +} + +// PUT + +fn bench_cursor_put_sync(c: &mut Criterion) { + let items: Vec<(String, String)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); + let (_dir, env) = setup_plain_env(0); + + c.bench_function("cursor_write::put::sync", |b| { + b.iter_batched( + || { + let txn = create_rw_sync(&env); + let db = txn.open_db(None).unwrap(); + (txn, db) + }, + |(txn, db)| { + let mut cursor = txn.cursor(db).unwrap(); + for (key, data) in &items { + cursor.put(key.as_bytes(), data.as_bytes(), WriteFlags::empty()).unwrap(); + } + }, + BatchSize::PerIteration, + ) + }); +} + +fn bench_cursor_put_unsync(c: &mut Criterion) { + let items: Vec<(String, String)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); + let (_dir, env) = setup_plain_env(0); + + c.bench_function("cursor_write::put::single_thread", |b| { + b.iter_batched( + || { + let txn = create_rw_unsync(&env); + let db = txn.open_db(None).unwrap(); + (txn, db) + }, + |(txn, db)| { + let mut cursor = txn.cursor(db).unwrap(); + for (key, data) in &items { + cursor.put(key.as_bytes(), data.as_bytes(), WriteFlags::empty()).unwrap(); + } + }, + BatchSize::PerIteration, + ) + }); +} + +// DEL + +fn bench_cursor_del_sync(c: &mut Criterion) { + c.bench_function("cursor_write::del::sync", |b| { + b.iter_batched( + || setup_plain_env(N), + |(_dir, env)| { + let txn = create_rw_sync(&env); + let db = txn.open_db(None).unwrap(); + let mut cursor = txn.cursor(db).unwrap(); + cursor.first::<(), ()>().unwrap(); + while cursor.get_current::<(), ()>().unwrap().is_some() { + cursor.del().unwrap(); + } + }, + BatchSize::PerIteration, + ) + }); +} + +fn bench_cursor_del_unsync(c: &mut Criterion) { + c.bench_function("cursor_write::del::single_thread", |b| { + b.iter_batched( + || setup_plain_env(N), + |(_dir, env)| { + let txn = create_rw_unsync(&env); + let db = txn.open_db(None).unwrap(); + let mut cursor = txn.cursor(db).unwrap(); + cursor.first::<(), ()>().unwrap(); + while cursor.get_current::<(), ()>().unwrap().is_some() { + cursor.del().unwrap(); + } + }, + BatchSize::PerIteration, + ) + }); +} + +// APPEND + +fn bench_cursor_append_sync(c: &mut Criterion) { + // Keys must be lexicographically sorted for append; zero-pad to ensure order. + let items: Vec<(String, String)> = + (0..N).map(|i| (format!("key{i:05}"), get_data(i))).collect(); + let (_dir, env) = setup_plain_env(0); + + c.bench_function("cursor_write::append::sync", |b| { + b.iter_batched( + || { + let txn = create_rw_sync(&env); + let db = txn.open_db(None).unwrap(); + (txn, db) + }, + |(txn, db)| { + let mut cursor = txn.cursor(db).unwrap(); + for (key, data) in &items { + cursor.append(key.as_bytes(), data.as_bytes()).unwrap(); + } + }, + BatchSize::PerIteration, + ) + }); +} + +fn bench_cursor_append_unsync(c: &mut Criterion) { + let items: Vec<(String, String)> = + (0..N).map(|i| (format!("key{i:05}"), get_data(i))).collect(); + let (_dir, env) = setup_plain_env(0); + + c.bench_function("cursor_write::append::single_thread", |b| { + b.iter_batched( + || { + let txn = create_rw_unsync(&env); + let db = txn.open_db(None).unwrap(); + (txn, db) + }, + |(txn, db)| { + let mut cursor = txn.cursor(db).unwrap(); + for (key, data) in &items { + cursor.append(key.as_bytes(), data.as_bytes()).unwrap(); + } + }, + BatchSize::PerIteration, + ) + }); +} + +// APPEND_DUP + +/// Set up a fresh environment with a DUPSORT database (no pre-existing data). +fn setup_dupsort_env() -> (tempfile::TempDir, signet_libmdbx::Environment) { + let dir = tempfile::tempdir().unwrap(); + let env = signet_libmdbx::Environment::builder().set_max_dbs(1).open(dir.path()).unwrap(); + // Create the named DUPSORT database so it exists for subsequent transactions. + { + let txn = env.begin_rw_unsync().unwrap(); + txn.create_db(Some(DUPSORT_DB), DatabaseFlags::DUP_SORT).unwrap(); + txn.commit().unwrap(); + } + (dir, env) +} + +fn bench_cursor_append_dup_sync(c: &mut Criterion) { + // One key, N duplicate values in sorted order. + let key = b"benchkey"; + let dups: Vec = (0..N).map(|i| format!("dup{i:05}")).collect(); + let (_dir, env) = setup_dupsort_env(); + + c.bench_function("cursor_write::append_dup::sync", |b| { + b.iter_batched( + || create_rw_sync(&env), + |txn| { + let db = txn.open_db(Some(DUPSORT_DB)).unwrap(); + let mut cursor = txn.cursor(db).unwrap(); + for dup in &dups { + cursor.append_dup(key, dup.as_bytes()).unwrap(); + } + }, + BatchSize::PerIteration, + ) + }); +} + +fn bench_cursor_append_dup_unsync(c: &mut Criterion) { + let key = b"benchkey"; + let dups: Vec = (0..N).map(|i| format!("dup{i:05}")).collect(); + let (_dir, env) = setup_dupsort_env(); + + c.bench_function("cursor_write::append_dup::single_thread", |b| { + b.iter_batched( + || create_rw_unsync(&env), + |txn| { + let db = txn.open_db(Some(DUPSORT_DB)).unwrap(); + let mut cursor = txn.cursor(db).unwrap(); + for dup in &dups { + cursor.append_dup(key, dup.as_bytes()).unwrap(); + } + }, + BatchSize::PerIteration, + ) + }); +} + +criterion_group! { + name = benches; + config = Criterion::default(); + targets = + bench_cursor_put_sync, bench_cursor_put_unsync, + bench_cursor_del_sync, bench_cursor_del_unsync, + bench_cursor_append_sync, bench_cursor_append_unsync, + bench_cursor_append_dup_sync, bench_cursor_append_dup_unsync, +} + +criterion_main!(benches); From 44608bbfdc9541b1c41e364c253711b3a2517be0 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 28 Mar 2026 10:01:51 -0400 Subject: [PATCH 03/24] bench: add reserve, nested_txn, concurrent, and scaling benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds four new criterion benchmark suites: reserve (put vs with_reservation for 64–4096 byte values), nested_txn (flat baseline + nested commit depths 1–3 + write-then-read in child), concurrent (N-reader no-writer, N-reader one-writer, sync vs unsync single-thread), and scaling (sequential get, random get, full iteration, append-ordered put at 100–100k entries). Co-Authored-By: Claude Sonnet 4.6 --- Cargo.toml | 16 ++++ benches/concurrent.rs | 182 ++++++++++++++++++++++++++++++++++++++++++ benches/nested_txn.rs | 96 ++++++++++++++++++++++ benches/reserve.rs | 66 +++++++++++++++ benches/scaling.rs | 144 +++++++++++++++++++++++++++++++++ 5 files changed, 504 insertions(+) create mode 100644 benches/concurrent.rs create mode 100644 benches/nested_txn.rs create mode 100644 benches/reserve.rs create mode 100644 benches/scaling.rs diff --git a/Cargo.toml b/Cargo.toml index 6b8285d..92682f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -72,3 +72,19 @@ harness = false [[bench]] name = "cursor_write" harness = false + +[[bench]] +name = "reserve" +harness = false + +[[bench]] +name = "nested_txn" +harness = false + +[[bench]] +name = "concurrent" +harness = false + +[[bench]] +name = "scaling" +harness = false diff --git a/benches/concurrent.rs b/benches/concurrent.rs new file mode 100644 index 0000000..2aabf1a --- /dev/null +++ b/benches/concurrent.rs @@ -0,0 +1,182 @@ +#![allow(missing_docs, dead_code)] +mod utils; + +use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main}; +use signet_libmdbx::{Environment, ObjectLength, WriteFlags}; +use std::{ + sync::{Arc, Barrier}, + thread, +}; +use tempfile::tempdir; +use utils::*; + +const N_ROWS: u32 = 1_000; +const READER_COUNTS: &[usize] = &[1, 4, 8]; + +fn setup_arc_env(n: u32) -> (tempfile::TempDir, Arc) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + { + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + for i in 0..n { + txn.put(db, get_key(i), get_data(i), WriteFlags::empty()).unwrap(); + } + txn.commit().unwrap(); + } + (dir, Arc::new(env)) +} + +/// N readers, no writer — read throughput baseline. +fn bench_n_readers_no_writer(c: &mut Criterion) { + let mut group = c.benchmark_group("concurrent::readers_no_writer"); + + for &n_readers in READER_COUNTS { + let (_dir, env) = setup_arc_env(N_ROWS); + let keys: Arc> = Arc::new((0..N_ROWS).map(get_key).collect()); + + group.bench_with_input( + BenchmarkId::from_parameter(n_readers), + &n_readers, + |b, &n_readers| { + b.iter_batched( + || Arc::new(Barrier::new(n_readers + 1)), + |barrier| { + let handles: Vec<_> = (0..n_readers) + .map(|_| { + let env = Arc::clone(&env); + let keys = Arc::clone(&keys); + let barrier = Arc::clone(&barrier); + thread::spawn(move || { + let txn = env.begin_ro_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + barrier.wait(); + let mut total = 0usize; + for key in keys.iter() { + total += *txn + .get::(db.dbi(), key.as_bytes()) + .unwrap() + .unwrap(); + } + total + }) + }) + .collect(); + barrier.wait(); + handles.into_iter().for_each(|h| { + h.join().unwrap(); + }); + }, + BatchSize::PerIteration, + ) + }, + ); + } + group.finish(); +} + +/// N readers + 1 writer — read throughput under write contention. +fn bench_n_readers_one_writer(c: &mut Criterion) { + let mut group = c.benchmark_group("concurrent::readers_one_writer"); + + for &n_readers in READER_COUNTS { + let (_dir, env) = setup_arc_env(N_ROWS); + let keys: Arc> = Arc::new((0..N_ROWS).map(get_key).collect()); + + group.bench_with_input( + BenchmarkId::from_parameter(n_readers), + &n_readers, + |b, &n_readers| { + b.iter_batched( + || Arc::new(Barrier::new(n_readers + 2)), + |barrier| { + // Spawn readers. + let reader_handles: Vec<_> = (0..n_readers) + .map(|_| { + let env = Arc::clone(&env); + let keys = Arc::clone(&keys); + let barrier = Arc::clone(&barrier); + thread::spawn(move || { + let txn = env.begin_ro_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + barrier.wait(); + let mut total = 0usize; + for key in keys.iter() { + total += *txn + .get::(db.dbi(), key.as_bytes()) + .unwrap() + .unwrap(); + } + total + }) + }) + .collect(); + + // Spawn one writer. + let writer = { + let env = Arc::clone(&env); + let barrier = Arc::clone(&barrier); + thread::spawn(move || { + barrier.wait(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + txn.put(db, b"writer_key", b"writer_val", WriteFlags::empty()) + .unwrap(); + txn.commit().unwrap(); + }) + }; + + barrier.wait(); + writer.join().unwrap(); + reader_handles.into_iter().for_each(|h| { + h.join().unwrap(); + }); + }, + BatchSize::PerIteration, + ) + }, + ); + } + group.finish(); +} + +/// Single-thread comparison: sync vs unsync transaction creation. +fn bench_single_thread_sync_vs_unsync(c: &mut Criterion) { + let (_dir, env) = setup_arc_env(N_ROWS); + let keys: Vec = (0..N_ROWS).map(get_key).collect(); + + c.bench_function("concurrent::single_thread::sync", |b| { + b.iter(|| { + let txn = env.begin_ro_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut total = 0usize; + for key in &keys { + total += *txn.get::(db.dbi(), key.as_bytes()).unwrap().unwrap(); + } + total + }) + }); + + c.bench_function("concurrent::single_thread::unsync", |b| { + b.iter(|| { + let txn = env.begin_ro_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut total = 0usize; + for key in &keys { + total += *txn.get::(db.dbi(), key.as_bytes()).unwrap().unwrap(); + } + total + }) + }); +} + +criterion_group! { + name = benches; + config = Criterion::default(); + targets = + bench_n_readers_no_writer, + bench_n_readers_one_writer, + bench_single_thread_sync_vs_unsync, +} + +criterion_main!(benches); diff --git a/benches/nested_txn.rs b/benches/nested_txn.rs new file mode 100644 index 0000000..3c804ee --- /dev/null +++ b/benches/nested_txn.rs @@ -0,0 +1,96 @@ +#![allow(missing_docs, dead_code)] + +use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main}; +use signet_libmdbx::{Environment, WriteFlags}; +use tempfile::tempdir; + +fn setup_env() -> (tempfile::TempDir, Environment) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + (dir, env) +} + +/// Benchmark: create + commit a flat (non-nested) transaction as baseline. +fn bench_flat_baseline(c: &mut Criterion) { + let (_dir, env) = setup_env(); + + c.bench_function("nested_txn::flat_baseline", |b| { + b.iter_batched( + || (), + |()| { + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + txn.put(db, b"key", b"value", WriteFlags::empty()).unwrap(); + txn.commit().unwrap(); + }, + BatchSize::PerIteration, + ) + }); +} + +/// Benchmark: create + commit a nested transaction at depth N. +fn bench_nested_commit(c: &mut Criterion) { + let mut group = c.benchmark_group("nested_txn::commit"); + + for depth in [1usize, 2, 3] { + let (_dir, env) = setup_env(); + + group.bench_with_input(BenchmarkId::from_parameter(depth), &depth, |b, &depth| { + b.iter_batched( + || (), + |()| { + let root = env.begin_rw_sync().unwrap(); + // Build the nesting chain. Each nested txn is committed + // before the parent commits. + let mut parents = Vec::with_capacity(depth); + parents.push(root); + for _ in 1..depth { + let child = parents.last().unwrap().begin_nested_txn().unwrap(); + parents.push(child); + } + // Commit innermost to outermost. + for txn in parents.into_iter().rev() { + txn.commit().unwrap(); + } + }, + BatchSize::PerIteration, + ) + }); + } + group.finish(); +} + +/// Benchmark: write in a nested txn, commit child, verify visible in parent. +fn bench_nested_write_and_read(c: &mut Criterion) { + let (_dir, env) = setup_env(); + + c.bench_function("nested_txn::write_in_child_read_in_parent", |b| { + b.iter_batched( + || (), + |()| { + let parent = env.begin_rw_sync().unwrap(); + let child = parent.begin_nested_txn().unwrap(); + + let db = child.open_db(None).unwrap(); + child.put(db, b"nested_key", b"nested_val", WriteFlags::empty()).unwrap(); + child.commit().unwrap(); + + // Value should be visible to parent after child commit. + let db = parent.open_db(None).unwrap(); + let val: Option> = parent.get(db.dbi(), b"nested_key").unwrap(); + assert_eq!(val.as_deref(), Some(b"nested_val".as_slice())); + + parent.commit().unwrap(); + }, + BatchSize::PerIteration, + ) + }); +} + +criterion_group! { + name = benches; + config = Criterion::default(); + targets = bench_flat_baseline, bench_nested_commit, bench_nested_write_and_read, +} + +criterion_main!(benches); diff --git a/benches/reserve.rs b/benches/reserve.rs new file mode 100644 index 0000000..0c44685 --- /dev/null +++ b/benches/reserve.rs @@ -0,0 +1,66 @@ +#![allow(missing_docs, dead_code)] +mod utils; + +use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main}; +use signet_libmdbx::WriteFlags; +use utils::*; + +const VALUE_SIZES: &[usize] = &[64, 256, 1024, 4096]; +const KEY: &[u8] = b"benchkey"; + +fn bench_put(c: &mut Criterion) { + let mut group = c.benchmark_group("reserve::put"); + for &size in VALUE_SIZES { + let data = vec![0u8; size]; + let (_dir, env) = setup_bench_db(0); + + group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, _| { + b.iter_batched( + || { + let txn = create_rw_unsync(&env); + let db = txn.open_db(None).unwrap(); + (txn, db) + }, + |(txn, db)| { + txn.put(db, KEY, data.as_slice(), WriteFlags::empty()).unwrap(); + }, + BatchSize::PerIteration, + ) + }); + } + group.finish(); +} + +fn bench_with_reservation(c: &mut Criterion) { + let mut group = c.benchmark_group("reserve::with_reservation"); + for &size in VALUE_SIZES { + let data = vec![0u8; size]; + let (_dir, env) = setup_bench_db(0); + + group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, _| { + b.iter_batched( + || { + let txn = create_rw_unsync(&env); + let db = txn.open_db(None).unwrap(); + (txn, db) + }, + |(txn, db)| { + txn.with_reservation(db, KEY, size, WriteFlags::empty(), |buf| { + buf.copy_from_slice(&data); + }) + .unwrap(); + }, + BatchSize::PerIteration, + ) + }); + } + group.finish(); +} + +criterion_group! { + name = benches; + config = Criterion::default(); + targets = bench_put, bench_with_reservation, +} + +criterion_main!(benches); diff --git a/benches/scaling.rs b/benches/scaling.rs new file mode 100644 index 0000000..bcba0ae --- /dev/null +++ b/benches/scaling.rs @@ -0,0 +1,144 @@ +#![allow(missing_docs, dead_code)] + +use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main}; +use rand::{SeedableRng, prelude::SliceRandom, rngs::StdRng}; +use signet_libmdbx::{Environment, ObjectLength, WriteFlags}; +use tempfile::tempdir; + +const ENTRY_COUNTS: &[u32] = &[100, 1_000, 10_000, 100_000]; + +fn format_key(i: u32) -> String { + format!("key{i:010}") +} + +fn format_data(i: u32) -> String { + format!("data{i:010}") +} + +/// Set up a plain environment (default db only) with N entries pre-populated. +fn setup_scaling_env(n: u32) -> (tempfile::TempDir, Environment) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + { + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + for i in 0..n { + txn.put(db, format_key(i), format_data(i), WriteFlags::empty()).unwrap(); + } + txn.commit().unwrap(); + } + (dir, env) +} + +/// Sequential get: read every entry in insertion order. +fn bench_sequential_get(c: &mut Criterion) { + let mut group = c.benchmark_group("scaling::sequential_get"); + + for &n in ENTRY_COUNTS { + let (_dir, env) = setup_scaling_env(n); + let keys: Vec = (0..n).map(format_key).collect(); + + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter(|| { + let txn = env.begin_ro_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut total = 0usize; + for key in &keys { + total += *txn.get::(db.dbi(), key.as_bytes()).unwrap().unwrap(); + } + total + }) + }); + } + group.finish(); +} + +/// Random get: read every entry in shuffled order. +fn bench_random_get(c: &mut Criterion) { + let mut group = c.benchmark_group("scaling::random_get"); + + for &n in ENTRY_COUNTS { + let (_dir, env) = setup_scaling_env(n); + let mut keys: Vec = (0..n).map(format_key).collect(); + keys.shuffle(&mut StdRng::from_seed(Default::default())); + + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter(|| { + let txn = env.begin_ro_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut total = 0usize; + for key in &keys { + total += *txn.get::(db.dbi(), key.as_bytes()).unwrap().unwrap(); + } + total + }) + }); + } + group.finish(); +} + +/// Full iteration: walk every entry via a cursor. +fn bench_full_iteration(c: &mut Criterion) { + let mut group = c.benchmark_group("scaling::full_iteration"); + + for &n in ENTRY_COUNTS { + let (_dir, env) = setup_scaling_env(n); + + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter(|| { + let txn = env.begin_ro_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut cursor = txn.cursor(db).unwrap(); + let mut count = 0usize; + while cursor.next::, Vec>().unwrap().is_some() { + count += 1; + } + count + }) + }); + } + group.finish(); +} + +/// Append-ordered put: insert N entries in key order into a fresh environment. +fn bench_append_ordered_put(c: &mut Criterion) { + let mut group = c.benchmark_group("scaling::append_ordered_put"); + + for &n in ENTRY_COUNTS { + // Keys use zero-padded format to ensure lexicographic ordering. + let items: Vec<(String, String)> = + (0..n).map(|i| (format_key(i), format_data(i))).collect(); + + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter_batched( + || { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + (dir, env) + }, + |(_dir, env)| { + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + for (key, data) in &items { + txn.append(db, key.as_bytes(), data.as_bytes()).unwrap(); + } + txn.commit().unwrap(); + }, + BatchSize::PerIteration, + ) + }); + } + group.finish(); +} + +criterion_group! { + name = benches; + config = Criterion::default(); + targets = + bench_sequential_get, + bench_random_get, + bench_full_iteration, + bench_append_ordered_put, +} + +criterion_main!(benches); From dc9c795871423971779bdd262ee4d23a909504c3 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 28 Mar 2026 10:07:08 -0400 Subject: [PATCH 04/24] test: add proptest_kv, proptest_cursor, proptest_dupsort with migrated and new tests Co-Authored-By: Claude Sonnet 4.6 --- tests/proptest_cursor.rs | 367 ++++++++++++++++++++++++ tests/proptest_dupsort.rs | 515 ++++++++++++++++++++++++++++++++++ tests/proptest_kv.rs | 571 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 1453 insertions(+) create mode 100644 tests/proptest_cursor.rs create mode 100644 tests/proptest_dupsort.rs create mode 100644 tests/proptest_kv.rs diff --git a/tests/proptest_cursor.rs b/tests/proptest_cursor.rs new file mode 100644 index 0000000..cc4e47b --- /dev/null +++ b/tests/proptest_cursor.rs @@ -0,0 +1,367 @@ +//! Property-based tests for cursor operations. +//! +//! Tests focus on both "does not panic" and correctness properties. Errors are +//! acceptable (e.g., `BadValSize`), panics are not. +#![allow(missing_docs)] + +use proptest::prelude::*; +use signet_libmdbx::{Environment, WriteFlags}; +use tempfile::tempdir; + +/// Strategy for generating byte vectors of various sizes (0 to 1KB). +fn arb_bytes() -> impl Strategy> { + prop::collection::vec(any::(), 0..1024) +} + +/// Strategy for keys that won't trigger MDBX assertion failures. +/// MDBX max key size is ~2022 bytes for 4KB pages. +fn arb_safe_key() -> impl Strategy> { + prop::collection::vec(any::(), 0..512) +} + +// ============================================================================= +// Cursor Operations - TxSync (V1) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Test that cursor.set() with arbitrary key does not panic (V1). + #[test] + fn cursor_set_arbitrary_key_v1(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Add some data so cursor is positioned + txn.put(db, b"test_key", b"test_val", WriteFlags::empty()).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + // set() with arbitrary key should return None or value, never panic + let result: signet_libmdbx::ReadResult>> = cursor.set(&key); + prop_assert!(result.is_ok()); + } + + /// Test that cursor.set_range() with arbitrary key does not panic (V1). + #[test] + fn cursor_set_range_arbitrary_key_v1(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Add some data + txn.put(db, b"aaa", b"val_a", WriteFlags::empty()).unwrap(); + txn.put(db, b"zzz", b"val_z", WriteFlags::empty()).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + // set_range() with arbitrary key should not panic + let result: signet_libmdbx::ReadResult, Vec)>> = + cursor.set_range(&key); + prop_assert!(result.is_ok()); + } + + /// Test that cursor.set_key() with arbitrary key does not panic (V1). + #[test] + fn cursor_set_key_arbitrary_v1(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + txn.put(db, b"test", b"value", WriteFlags::empty()).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + // set_key() should not panic + let result: signet_libmdbx::ReadResult, Vec)>> = + cursor.set_key(&key); + prop_assert!(result.is_ok()); + } +} + +// ============================================================================= +// Cursor Operations - TxUnsync (V2) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Test that cursor.set() with arbitrary key does not panic (V2). + #[test] + fn cursor_set_arbitrary_key_v2(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + txn.put(db, b"test_key", b"test_val", WriteFlags::empty()).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + let result: signet_libmdbx::ReadResult>> = cursor.set(&key); + prop_assert!(result.is_ok()); + } + + /// Test that cursor.set_range() with arbitrary key does not panic (V2). + #[test] + fn cursor_set_range_arbitrary_key_v2(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + txn.put(db, b"aaa", b"val_a", WriteFlags::empty()).unwrap(); + txn.put(db, b"zzz", b"val_z", WriteFlags::empty()).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + let result: signet_libmdbx::ReadResult, Vec)>> = + cursor.set_range(&key); + prop_assert!(result.is_ok()); + } + + /// Test that cursor.set_key() with arbitrary key does not panic (V2). + #[test] + fn cursor_set_key_arbitrary_v2(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + txn.put(db, b"test", b"value", WriteFlags::empty()).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + let result: signet_libmdbx::ReadResult, Vec)>> = + cursor.set_key(&key); + prop_assert!(result.is_ok()); + } +} + +// ============================================================================= +// Cursor Put Operations +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test cursor.put with arbitrary key/value does not panic (V1). + /// + /// Note: Uses constrained key sizes because MDBX aborts on very large keys + /// via cursor.put (assertion failure in cursor_put_checklen). + #[test] + fn cursor_put_arbitrary_v1(key in arb_safe_key(), value in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + // cursor.put should not panic + let result = cursor.put(&key, &value, WriteFlags::empty()); + // Errors are fine (e.g., BadValSize), panics are not + let _ = result; + } + + /// Test cursor.put with arbitrary key/value does not panic (V2). + #[test] + fn cursor_put_arbitrary_v2(key in arb_safe_key(), value in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + let result = cursor.put(&key, &value, WriteFlags::empty()); + let _ = result; + } +} + +// ============================================================================= +// Correctness: Cursor Set - TxSync (V1) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Test that cursor.set returns the correct value when key exists (V1). + #[test] + fn cursor_set_correctness_v1(key in arb_safe_key(), value in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let put_result = txn.put(db, &key, &value, WriteFlags::empty()); + if put_result.is_ok() { + let mut cursor = txn.cursor(db).unwrap(); + let retrieved: Option> = cursor.set(&key).unwrap(); + prop_assert_eq!(retrieved, Some(value)); + } + } +} + +// ============================================================================= +// Correctness: Cursor Set - TxUnsync (V2) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Test that cursor.set returns the correct value when key exists (V2). + #[test] + fn cursor_set_correctness_v2(key in arb_safe_key(), value in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let put_result = txn.put(db, &key, &value, WriteFlags::empty()); + if put_result.is_ok() { + let mut cursor = txn.cursor(db).unwrap(); + let retrieved: Option> = cursor.set(&key).unwrap(); + prop_assert_eq!(retrieved, Some(value)); + } + } +} + +// ============================================================================= +// New: Cursor set_lowerbound +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that set_lowerbound returns a key >= the search key when Some (V1). + #[test] + fn cursor_set_lowerbound_v1( + entries in prop::collection::vec((arb_safe_key(), arb_bytes()), 1..10), + search_key in arb_safe_key(), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + for (key, value) in &entries { + // Ignore errors (e.g. empty key issues) + let _ = txn.put(db, key, value, WriteFlags::empty()); + } + + let mut cursor = txn.cursor(db).unwrap(); + let result = cursor.set_lowerbound::, Vec>(&search_key); + prop_assert!(result.is_ok()); + + if let Some((_exact, returned_key, _val)) = result.unwrap() { + // The returned key must be >= the search key + prop_assert!(returned_key >= search_key); + } + } + + /// Test that set_lowerbound returns a key >= the search key when Some (V2). + #[test] + fn cursor_set_lowerbound_v2( + entries in prop::collection::vec((arb_safe_key(), arb_bytes()), 1..10), + search_key in arb_safe_key(), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + for (key, value) in &entries { + let _ = txn.put(db, key, value, WriteFlags::empty()); + } + + let mut cursor = txn.cursor(db).unwrap(); + let result = cursor.set_lowerbound::, Vec>(&search_key); + prop_assert!(result.is_ok()); + + if let Some((_exact, returned_key, _val)) = result.unwrap() { + prop_assert!(returned_key >= search_key); + } + } +} + +// ============================================================================= +// New: Cursor append sorted +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that appending sorted keys via cursor then iterating retrieves all in order (V1). + #[test] + fn cursor_append_sorted_v1( + raw_keys in prop::collection::vec(arb_safe_key(), 1..20), + ) { + // Filter out empty keys (MDBX allows empty keys but let's keep it simple) + let mut keys: Vec> = raw_keys.into_iter().filter(|k| !k.is_empty()).collect(); + prop_assume!(!keys.is_empty()); + + keys.sort(); + keys.dedup(); + + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + // Append all keys in sorted order + for key in &keys { + cursor.append(key, b"v").unwrap(); + } + + // Iterate and verify all keys are present in order + let mut read_cursor = txn.cursor(db).unwrap(); + let retrieved: Vec> = read_cursor + .iter_start::, Vec>() + .unwrap() + .filter_map(Result::ok) + .map(|(k, _)| k) + .collect(); + + prop_assert_eq!(retrieved, keys); + } + + /// Test that appending sorted keys via cursor then iterating retrieves all in order (V2). + #[test] + fn cursor_append_sorted_v2( + raw_keys in prop::collection::vec(arb_safe_key(), 1..20), + ) { + let mut keys: Vec> = raw_keys.into_iter().filter(|k| !k.is_empty()).collect(); + prop_assume!(!keys.is_empty()); + + keys.sort(); + keys.dedup(); + + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + for key in &keys { + cursor.append(key, b"v").unwrap(); + } + + let mut read_cursor = txn.cursor(db).unwrap(); + let retrieved: Vec> = read_cursor + .iter_start::, Vec>() + .unwrap() + .filter_map(Result::ok) + .map(|(k, _)| k) + .collect(); + + prop_assert_eq!(retrieved, keys); + } +} diff --git a/tests/proptest_dupsort.rs b/tests/proptest_dupsort.rs new file mode 100644 index 0000000..239a003 --- /dev/null +++ b/tests/proptest_dupsort.rs @@ -0,0 +1,515 @@ +//! Property-based tests for DUP_SORT operations and database name handling. +//! +//! Tests focus on both "does not panic" and correctness properties. Errors are +//! acceptable (e.g., `BadValSize`), panics are not. +#![allow(missing_docs)] + +use proptest::prelude::*; +use signet_libmdbx::{DatabaseFlags, Environment, WriteFlags}; +use tempfile::tempdir; + +/// Strategy for generating small byte vectors (0 to 64 bytes). +fn arb_small_bytes() -> impl Strategy> { + prop::collection::vec(any::(), 0..64) +} + +/// Strategy for valid database names (alphanumeric + underscore, 1-64 chars). +fn arb_db_name() -> impl Strategy { + "[a-zA-Z][a-zA-Z0-9_]{0,63}" +} + +// ============================================================================= +// Database Names +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that create_db with arbitrary valid names does not panic (V1). + #[test] + fn create_db_arbitrary_name_v1(name in arb_db_name()) { + let dir = tempdir().unwrap(); + let env = Environment::builder() + .set_max_dbs(16) + .open(dir.path()) + .unwrap(); + let txn = env.begin_rw_sync().unwrap(); + + // create_db should not panic, may return error for invalid names + let result = txn.create_db(Some(&name), DatabaseFlags::empty()); + // We accept both success and error, just no panic + let _ = result; + } + + /// Test that create_db with arbitrary valid names does not panic (V2). + #[test] + fn create_db_arbitrary_name_v2(name in arb_db_name()) { + let dir = tempdir().unwrap(); + let env = Environment::builder() + .set_max_dbs(16) + .open(dir.path()) + .unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + + let result = txn.create_db(Some(&name), DatabaseFlags::empty()); + let _ = result; + } +} + +// ============================================================================= +// DUP_SORT Operations +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that DUP_SORT put with multiple values does not panic (V1). + #[test] + fn dupsort_put_multiple_values_v1( + key in arb_small_bytes(), + values in prop::collection::vec(arb_small_bytes(), 1..10), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + for value in &values { + // Should not panic + let result = txn.put(db, &key, value, WriteFlags::empty()); + // Errors are acceptable, panics are not + let _ = result; + } + } + + /// Test that DUP_SORT put with multiple values does not panic (V2). + #[test] + fn dupsort_put_multiple_values_v2( + key in arb_small_bytes(), + values in prop::collection::vec(arb_small_bytes(), 1..10), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + for value in &values { + let result = txn.put(db, &key, value, WriteFlags::empty()); + let _ = result; + } + } + + /// Test cursor get_both with arbitrary key/value does not panic (V1). + #[test] + fn cursor_get_both_arbitrary_v1( + search_key in arb_small_bytes(), + search_value in arb_small_bytes(), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + // Add some data + txn.put(db, b"key1", b"val1", WriteFlags::empty()).unwrap(); + txn.put(db, b"key1", b"val2", WriteFlags::empty()).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + // get_both should not panic + let result: signet_libmdbx::ReadResult>> = + cursor.get_both(&search_key, &search_value); + prop_assert!(result.is_ok()); + } + + /// Test cursor get_both_range with arbitrary key/value does not panic (V1). + #[test] + fn cursor_get_both_range_arbitrary_v1( + search_key in arb_small_bytes(), + search_value in arb_small_bytes(), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + txn.put(db, b"key1", b"val1", WriteFlags::empty()).unwrap(); + txn.put(db, b"key1", b"val2", WriteFlags::empty()).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + // get_both_range should not panic + let result: signet_libmdbx::ReadResult>> = + cursor.get_both_range(&search_key, &search_value); + prop_assert!(result.is_ok()); + } + + /// Test cursor get_both with arbitrary key/value does not panic (V2). + #[test] + fn cursor_get_both_arbitrary_v2( + search_key in arb_small_bytes(), + search_value in arb_small_bytes(), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + txn.put(db, b"key1", b"val1", WriteFlags::empty()).unwrap(); + txn.put(db, b"key1", b"val2", WriteFlags::empty()).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + let result: signet_libmdbx::ReadResult>> = + cursor.get_both(&search_key, &search_value); + prop_assert!(result.is_ok()); + } + + /// Test cursor get_both_range with arbitrary key/value does not panic (V2). + #[test] + fn cursor_get_both_range_arbitrary_v2( + search_key in arb_small_bytes(), + search_value in arb_small_bytes(), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + txn.put(db, b"key1", b"val1", WriteFlags::empty()).unwrap(); + txn.put(db, b"key1", b"val2", WriteFlags::empty()).unwrap(); + + let mut cursor = txn.cursor(db).unwrap(); + + let result: signet_libmdbx::ReadResult>> = + cursor.get_both_range(&search_key, &search_value); + prop_assert!(result.is_ok()); + } +} + +// ============================================================================= +// Correctness: DUP_SORT Values - TxSync (V1) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that all unique DUP_SORT values are retrievable via iter_dup_of (V1). + #[test] + fn dupsort_values_correctness_v1( + key in arb_small_bytes(), + values in prop::collection::vec(arb_small_bytes(), 1..10), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + // Insert all values + let mut inserted: Vec> = Vec::new(); + for value in &values { + if txn.put(db, &key, value, WriteFlags::empty()).is_ok() + && !inserted.contains(value) + { + inserted.push(value.clone()); + } + } + + // Skip if nothing was inserted + prop_assume!(!inserted.is_empty()); + + // Retrieve all values via iter_dup_of (yields just values, not (key, value)) + let mut cursor = txn.cursor(db).unwrap(); + let retrieved: Vec> = + cursor.iter_dup_of::>(&key).unwrap().filter_map(Result::ok).collect(); + + // All inserted values should be retrieved (order is sorted by MDBX) + inserted.sort(); + let mut retrieved_sorted = retrieved.clone(); + retrieved_sorted.sort(); + prop_assert_eq!(inserted, retrieved_sorted); + } +} + +// ============================================================================= +// Correctness: DUP_SORT Values - TxUnsync (V2) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that all unique DUP_SORT values are retrievable via iter_dup_of (V2). + #[test] + fn dupsort_values_correctness_v2( + key in arb_small_bytes(), + values in prop::collection::vec(arb_small_bytes(), 1..10), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + let mut inserted: Vec> = Vec::new(); + for value in &values { + if txn.put(db, &key, value, WriteFlags::empty()).is_ok() + && !inserted.contains(value) + { + inserted.push(value.clone()); + } + } + + prop_assume!(!inserted.is_empty()); + + // iter_dup_of yields just values, not (key, value) + let mut cursor = txn.cursor(db).unwrap(); + let retrieved: Vec> = + cursor.iter_dup_of::>(&key).unwrap().filter_map(Result::ok).collect(); + + inserted.sort(); + let mut retrieved_sorted = retrieved.clone(); + retrieved_sorted.sort(); + prop_assert_eq!(inserted, retrieved_sorted); + } +} + +// ============================================================================= +// New: Delete specific dup +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that del with a specific value removes only that dup (V1). + #[test] + fn del_specific_dup_v1( + key in arb_small_bytes(), + values in prop::collection::vec(arb_small_bytes(), 2..8), + ) { + // Need at least 2 distinct non-empty values and a non-empty key + prop_assume!(!key.is_empty()); + let mut unique: Vec> = values + .into_iter() + .filter(|v| !v.is_empty()) + .collect::>() + .into_iter() + .collect(); + prop_assume!(unique.len() >= 2); + + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + // Insert all unique values + for value in &unique { + txn.put(db, &key, value, WriteFlags::empty()).unwrap(); + } + + // Delete the first value specifically + let to_delete = unique.remove(0); + let deleted = txn.del(db, &key, Some(to_delete.as_slice())).unwrap(); + prop_assert!(deleted); + + // Retrieve remaining values + let mut cursor = txn.cursor(db).unwrap(); + let retrieved: Vec> = + cursor.iter_dup_of::>(&key).unwrap().filter_map(Result::ok).collect(); + + // The deleted value should not be present; the rest should be + prop_assert!(!retrieved.contains(&to_delete)); + unique.sort(); + let mut retrieved_sorted = retrieved; + retrieved_sorted.sort(); + prop_assert_eq!(retrieved_sorted, unique); + } + + /// Test that del with a specific value removes only that dup (V2). + #[test] + fn del_specific_dup_v2( + key in arb_small_bytes(), + values in prop::collection::vec(arb_small_bytes(), 2..8), + ) { + prop_assume!(!key.is_empty()); + let mut unique: Vec> = values + .into_iter() + .filter(|v| !v.is_empty()) + .collect::>() + .into_iter() + .collect(); + prop_assume!(unique.len() >= 2); + + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + for value in &unique { + txn.put(db, &key, value, WriteFlags::empty()).unwrap(); + } + + let to_delete = unique.remove(0); + let deleted = txn.del(db, &key, Some(to_delete.as_slice())).unwrap(); + prop_assert!(deleted); + + let mut cursor = txn.cursor(db).unwrap(); + let retrieved: Vec> = + cursor.iter_dup_of::>(&key).unwrap().filter_map(Result::ok).collect(); + + prop_assert!(!retrieved.contains(&to_delete)); + unique.sort(); + let mut retrieved_sorted = retrieved; + retrieved_sorted.sort(); + prop_assert_eq!(retrieved_sorted, unique); + } +} + +// ============================================================================= +// New: Delete all dups +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that del with None removes all dup values for the key (V1). + #[test] + fn del_all_dups_v1( + key in arb_small_bytes(), + values in prop::collection::vec(arb_small_bytes(), 1..8), + ) { + prop_assume!(!key.is_empty()); + let unique: Vec> = values + .into_iter() + .filter(|v| !v.is_empty()) + .collect::>() + .into_iter() + .collect(); + prop_assume!(!unique.is_empty()); + + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + for value in &unique { + txn.put(db, &key, value, WriteFlags::empty()).unwrap(); + } + + // del with None deletes ALL dups for this key + let deleted = txn.del(db, &key, None).unwrap(); + prop_assert!(deleted); + + // After deletion, get should return None + let result: Option> = txn.get(db.dbi(), &key).unwrap(); + prop_assert!(result.is_none()); + } + + /// Test that del with None removes all dup values for the key (V2). + #[test] + fn del_all_dups_v2( + key in arb_small_bytes(), + values in prop::collection::vec(arb_small_bytes(), 1..8), + ) { + prop_assume!(!key.is_empty()); + let unique: Vec> = values + .into_iter() + .filter(|v| !v.is_empty()) + .collect::>() + .into_iter() + .collect(); + prop_assume!(!unique.is_empty()); + + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + for value in &unique { + txn.put(db, &key, value, WriteFlags::empty()).unwrap(); + } + + let deleted = txn.del(db, &key, None).unwrap(); + prop_assert!(deleted); + + let result: Option> = txn.get(db.dbi(), &key).unwrap(); + prop_assert!(result.is_none()); + } +} + +// ============================================================================= +// New: iter_dup completeness +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(64))] + + /// Test that iter_dup_of retrieves all inserted values for each key (V1). + #[test] + fn iter_dup_completeness_v1( + n_keys in 1usize..5, + m_values in 1usize..6, + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + // Insert N keys with M values each (using deterministic byte sequences) + for k in 0..n_keys { + let key = vec![k as u8]; + for v in 0..m_values { + let value = vec![v as u8]; + txn.put(db, &key, &value, WriteFlags::empty()).unwrap(); + } + } + + // Verify each key has exactly M values via iter_dup_of + let mut cursor = txn.cursor(db).unwrap(); + for k in 0..n_keys { + let key = vec![k as u8]; + let retrieved: Vec> = cursor + .iter_dup_of::>(&key) + .unwrap() + .filter_map(Result::ok) + .collect(); + prop_assert_eq!(retrieved.len(), m_values); + + // Values should be in order 0..m_values + let expected: Vec> = (0..m_values).map(|v| vec![v as u8]).collect(); + prop_assert_eq!(retrieved, expected); + } + } + + /// Test that iter_dup_of retrieves all inserted values for each key (V2). + #[test] + fn iter_dup_completeness_v2( + n_keys in 1usize..5, + m_values in 1usize..6, + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + for k in 0..n_keys { + let key = vec![k as u8]; + for v in 0..m_values { + let value = vec![v as u8]; + txn.put(db, &key, &value, WriteFlags::empty()).unwrap(); + } + } + + let mut cursor = txn.cursor(db).unwrap(); + for k in 0..n_keys { + let key = vec![k as u8]; + let retrieved: Vec> = cursor + .iter_dup_of::>(&key) + .unwrap() + .filter_map(Result::ok) + .collect(); + prop_assert_eq!(retrieved.len(), m_values); + + let expected: Vec> = (0..m_values).map(|v| vec![v as u8]).collect(); + prop_assert_eq!(retrieved, expected); + } + } +} diff --git a/tests/proptest_kv.rs b/tests/proptest_kv.rs new file mode 100644 index 0000000..a675dfc --- /dev/null +++ b/tests/proptest_kv.rs @@ -0,0 +1,571 @@ +//! Property-based tests for key/value operations. +//! +//! Tests focus on both "does not panic" and correctness properties. Errors are +//! acceptable (e.g., `BadValSize`), panics are not. +#![allow(missing_docs)] + +use proptest::prelude::*; +use signet_libmdbx::{Environment, Geometry, WriteFlags}; +use tempfile::tempdir; + +/// Strategy for generating byte vectors of various sizes (0 to 1KB). +fn arb_bytes() -> impl Strategy> { + prop::collection::vec(any::(), 0..1024) +} + +/// Strategy for generating small byte vectors (0 to 64 bytes). +fn arb_small_bytes() -> impl Strategy> { + prop::collection::vec(any::(), 0..64) +} + +/// Strategy for keys that won't trigger MDBX assertion failures. +/// MDBX max key size is ~2022 bytes for 4KB pages. +fn arb_safe_key() -> impl Strategy> { + prop::collection::vec(any::(), 0..512) +} + +// ============================================================================= +// Key/Value Operations - TxSync (V1) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Test that put/get with arbitrary key/value does not panic (V1). + #[test] + fn put_get_arbitrary_kv_v1(key in arb_bytes(), value in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Should not panic - may return error for invalid sizes + let put_result = txn.put(db, &key, &value, WriteFlags::empty()); + + // If put succeeded, get should not panic + if put_result.is_ok() { + let _: Option> = txn.get(db.dbi(), &key).unwrap(); + } + } + + /// Test that del with nonexistent arbitrary key does not panic (V1). + #[test] + fn del_nonexistent_key_v1(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Delete on nonexistent key should return Ok(false), not panic + let result = txn.del(db, &key, None); + prop_assert!(result.is_ok()); + prop_assert!(!result.unwrap()); + } + + /// Test that get with arbitrary key on empty db does not panic (V1). + #[test] + fn get_arbitrary_key_empty_db_v1(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_ro_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Get on nonexistent key should return Ok(None), not panic + let result: signet_libmdbx::ReadResult>> = txn.get(db.dbi(), &key); + prop_assert!(result.is_ok()); + prop_assert!(result.unwrap().is_none()); + } + + /// Test empty key handling does not panic (V1). + #[test] + fn empty_key_operations_v1(value in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Empty key should be valid + let put_result = txn.put(db, b"", &value, WriteFlags::empty()); + prop_assert!(put_result.is_ok()); + + let get_result: signet_libmdbx::ReadResult>> = + txn.get(db.dbi(), b""); + prop_assert!(get_result.is_ok()); + + let del_result = txn.del(db, b"", None); + prop_assert!(del_result.is_ok()); + } + + /// Test empty value handling does not panic (V1). + #[test] + fn empty_value_operations_v1(key in arb_small_bytes()) { + // Skip empty keys for this test + prop_assume!(!key.is_empty()); + + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Empty value should be valid + let put_result = txn.put(db, &key, b"", WriteFlags::empty()); + prop_assert!(put_result.is_ok()); + + let get_result: signet_libmdbx::ReadResult>> = + txn.get(db.dbi(), &key); + prop_assert!(get_result.is_ok()); + prop_assert!(get_result.unwrap().is_some()); + } +} + +// ============================================================================= +// Key/Value Operations - TxUnsync (V2) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Test that put/get with arbitrary key/value does not panic (V2). + #[test] + fn put_get_arbitrary_kv_v2(key in arb_bytes(), value in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Should not panic - may return error for invalid sizes + let put_result = txn.put(db, &key, &value, WriteFlags::empty()); + + // If put succeeded, get should not panic + if put_result.is_ok() { + let _: Option> = txn.get(db.dbi(), &key).unwrap(); + } + } + + /// Test that del with nonexistent arbitrary key does not panic (V2). + #[test] + fn del_nonexistent_key_v2(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Delete on nonexistent key should return Ok(false), not panic + let result = txn.del(db, &key, None); + prop_assert!(result.is_ok()); + prop_assert!(!result.unwrap()); + } + + /// Test that get with arbitrary key on empty db does not panic (V2). + #[test] + fn get_arbitrary_key_empty_db_v2(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_ro_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Get on nonexistent key should return Ok(None), not panic + let result: signet_libmdbx::ReadResult>> = txn.get(db.dbi(), &key); + prop_assert!(result.is_ok()); + prop_assert!(result.unwrap().is_none()); + } + + /// Test empty key handling does not panic (V2). + #[test] + fn empty_key_operations_v2(value in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let put_result = txn.put(db, b"", &value, WriteFlags::empty()); + prop_assert!(put_result.is_ok()); + + let get_result: signet_libmdbx::ReadResult>> = + txn.get(db.dbi(), b""); + prop_assert!(get_result.is_ok()); + + let del_result = txn.del(db, b"", None); + prop_assert!(del_result.is_ok()); + } + + /// Test empty value handling does not panic (V2). + #[test] + fn empty_value_operations_v2(key in arb_small_bytes()) { + prop_assume!(!key.is_empty()); + + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let put_result = txn.put(db, &key, b"", WriteFlags::empty()); + prop_assert!(put_result.is_ok()); + + let get_result: signet_libmdbx::ReadResult>> = + txn.get(db.dbi(), &key); + prop_assert!(get_result.is_ok()); + prop_assert!(get_result.unwrap().is_some()); + } +} + +// ============================================================================= +// Correctness: Round-trip - TxSync (V1) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Test that put followed by get returns the same value (V1). + #[test] + fn roundtrip_correctness_v1(key in arb_safe_key(), value in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let put_result = txn.put(db, &key, &value, WriteFlags::empty()); + if put_result.is_ok() { + let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); + prop_assert_eq!(retrieved, Some(value)); + } + } +} + +// ============================================================================= +// Correctness: Round-trip - TxUnsync (V2) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Test that put followed by get returns the same value (V2). + #[test] + fn roundtrip_correctness_v2(key in arb_safe_key(), value in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let put_result = txn.put(db, &key, &value, WriteFlags::empty()); + if put_result.is_ok() { + let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); + prop_assert_eq!(retrieved, Some(value)); + } + } +} + +// ============================================================================= +// Correctness: Overwrite - TxSync (V1) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Test that overwriting a key returns the new value (V1). + #[test] + fn overwrite_correctness_v1( + key in arb_safe_key(), + value1 in arb_bytes(), + value2 in arb_bytes(), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let put1 = txn.put(db, &key, &value1, WriteFlags::empty()); + let put2 = txn.put(db, &key, &value2, WriteFlags::empty()); + + if put1.is_ok() && put2.is_ok() { + let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); + prop_assert_eq!(retrieved, Some(value2)); + } + } +} + +// ============================================================================= +// Correctness: Overwrite - TxUnsync (V2) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Test that overwriting a key returns the new value (V2). + #[test] + fn overwrite_correctness_v2( + key in arb_safe_key(), + value1 in arb_bytes(), + value2 in arb_bytes(), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let put1 = txn.put(db, &key, &value1, WriteFlags::empty()); + let put2 = txn.put(db, &key, &value2, WriteFlags::empty()); + + if put1.is_ok() && put2.is_ok() { + let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); + prop_assert_eq!(retrieved, Some(value2)); + } + } +} + +// ============================================================================= +// Correctness: Delete - TxSync (V1) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Test that delete removes the key and get returns None (V1). + #[test] + fn delete_correctness_v1(key in arb_safe_key(), value in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let put_result = txn.put(db, &key, &value, WriteFlags::empty()); + if put_result.is_ok() { + let deleted = txn.del(db, &key, None).unwrap(); + prop_assert!(deleted); + + let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); + prop_assert_eq!(retrieved, None); + } + } +} + +// ============================================================================= +// Correctness: Delete - TxUnsync (V2) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Test that delete removes the key and get returns None (V2). + #[test] + fn delete_correctness_v2(key in arb_safe_key(), value in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let put_result = txn.put(db, &key, &value, WriteFlags::empty()); + if put_result.is_ok() { + let deleted = txn.del(db, &key, None).unwrap(); + prop_assert!(deleted); + + let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); + prop_assert_eq!(retrieved, None); + } + } +} + +// ============================================================================= +// Correctness: Iteration Order - TxSync (V1) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that keys are returned in lexicographically sorted order (V1). + #[test] + fn iteration_order_correctness_v1( + entries in prop::collection::vec((arb_safe_key(), arb_bytes()), 1..20), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Insert all entries + let mut inserted_keys: Vec> = Vec::new(); + for (key, value) in &entries { + if txn.put(db, key, value, WriteFlags::empty()).is_ok() + && !inserted_keys.contains(key) + { + inserted_keys.push(key.clone()); + } + } + + prop_assume!(!inserted_keys.is_empty()); + + // Iterate and collect keys + let mut cursor = txn.cursor(db).unwrap(); + let retrieved_keys: Vec> = cursor + .iter::, Vec>() + .filter_map(Result::ok) + .map(|(k, _)| k) + .collect(); + + // Keys should be in sorted order + let mut expected = inserted_keys; + expected.sort(); + expected.dedup(); + prop_assert_eq!(retrieved_keys, expected); + } +} + +// ============================================================================= +// Correctness: Iteration Order - TxUnsync (V2) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that keys are returned in lexicographically sorted order (V2). + #[test] + fn iteration_order_correctness_v2( + entries in prop::collection::vec((arb_safe_key(), arb_bytes()), 1..20), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let mut inserted_keys: Vec> = Vec::new(); + for (key, value) in &entries { + if txn.put(db, key, value, WriteFlags::empty()).is_ok() + && !inserted_keys.contains(key) + { + inserted_keys.push(key.clone()); + } + } + + prop_assume!(!inserted_keys.is_empty()); + + let mut cursor = txn.cursor(db).unwrap(); + let retrieved_keys: Vec> = cursor + .iter::, Vec>() + .filter_map(Result::ok) + .map(|(k, _)| k) + .collect(); + + let mut expected = inserted_keys; + expected.sort(); + expected.dedup(); + prop_assert_eq!(retrieved_keys, expected); + } +} + +// ============================================================================= +// New: Large Value Roundtrip +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(32))] + + /// Test roundtrip with large values (up to 64KB) using a larger environment (V1). + #[test] + fn large_value_roundtrip_v1( + key in arb_safe_key(), + value in prop::collection::vec(any::(), 0..65536), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder() + .set_geometry(Geometry { size: Some(0..(256 * 1024 * 1024)), ..Default::default() }) + .open(dir.path()) + .unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let put_result = txn.put(db, &key, &value, WriteFlags::empty()); + if put_result.is_ok() { + let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); + prop_assert_eq!(retrieved, Some(value)); + } + } + + /// Test roundtrip with large values (up to 64KB) using a larger environment (V2). + #[test] + fn large_value_roundtrip_v2( + key in arb_safe_key(), + value in prop::collection::vec(any::(), 0..65536), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder() + .set_geometry(Geometry { size: Some(0..(256 * 1024 * 1024)), ..Default::default() }) + .open(dir.path()) + .unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let put_result = txn.put(db, &key, &value, WriteFlags::empty()); + if put_result.is_ok() { + let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); + prop_assert_eq!(retrieved, Some(value)); + } + } +} + +// ============================================================================= +// New: Multi-Database Isolation +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that two named databases are isolated from each other (V1). + #[test] + fn multi_database_isolation_v1( + key in arb_safe_key(), + value_a in arb_bytes(), + value_b in arb_bytes(), + ) { + // Values must differ for isolation check to be meaningful + prop_assume!(value_a != value_b); + + let dir = tempdir().unwrap(); + let env = Environment::builder() + .set_max_dbs(4) + .open(dir.path()) + .unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db_a = txn.create_db(Some("db_a"), signet_libmdbx::DatabaseFlags::empty()).unwrap(); + let db_b = txn.create_db(Some("db_b"), signet_libmdbx::DatabaseFlags::empty()).unwrap(); + + let put_a = txn.put(db_a, &key, &value_a, WriteFlags::empty()); + let put_b = txn.put(db_b, &key, &value_b, WriteFlags::empty()); + + if put_a.is_ok() && put_b.is_ok() { + let retrieved_a: Option> = txn.get(db_a.dbi(), &key).unwrap(); + let retrieved_b: Option> = txn.get(db_b.dbi(), &key).unwrap(); + // Each db should return its own value, not the other's + prop_assert_eq!(retrieved_a, Some(value_a)); + prop_assert_eq!(retrieved_b, Some(value_b)); + } + } + + /// Test that two named databases are isolated from each other (V2). + #[test] + fn multi_database_isolation_v2( + key in arb_safe_key(), + value_a in arb_bytes(), + value_b in arb_bytes(), + ) { + prop_assume!(value_a != value_b); + + let dir = tempdir().unwrap(); + let env = Environment::builder() + .set_max_dbs(4) + .open(dir.path()) + .unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db_a = txn.create_db(Some("db_a"), signet_libmdbx::DatabaseFlags::empty()).unwrap(); + let db_b = txn.create_db(Some("db_b"), signet_libmdbx::DatabaseFlags::empty()).unwrap(); + + let put_a = txn.put(db_a, &key, &value_a, WriteFlags::empty()); + let put_b = txn.put(db_b, &key, &value_b, WriteFlags::empty()); + + if put_a.is_ok() && put_b.is_ok() { + let retrieved_a: Option> = txn.get(db_a.dbi(), &key).unwrap(); + let retrieved_b: Option> = txn.get(db_b.dbi(), &key).unwrap(); + prop_assert_eq!(retrieved_a, Some(value_a)); + prop_assert_eq!(retrieved_b, Some(value_b)); + } + } +} From 0d1eafaaa104db624b04b96e48d5fbd749694505 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 28 Mar 2026 10:11:04 -0400 Subject: [PATCH 05/24] test: add proptest_dupfixed, proptest_iter, proptest_nested Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/proptest_dupfixed.rs | 202 ++++++++++++++++++++++++ tests/proptest_iter.rs | 308 +++++++++++++++++++++++++++++++++++++ tests/proptest_nested.rs | 132 ++++++++++++++++ 3 files changed, 642 insertions(+) create mode 100644 tests/proptest_dupfixed.rs create mode 100644 tests/proptest_iter.rs create mode 100644 tests/proptest_nested.rs diff --git a/tests/proptest_dupfixed.rs b/tests/proptest_dupfixed.rs new file mode 100644 index 0000000..9b0919a --- /dev/null +++ b/tests/proptest_dupfixed.rs @@ -0,0 +1,202 @@ +//! Property-based tests for DUP_FIXED operations. +//! +//! Tests focus on both "does not panic" and correctness properties. Errors are +//! acceptable (e.g., `BadValSize`), panics are not. +#![allow(missing_docs)] + +use proptest::prelude::*; +use signet_libmdbx::{DatabaseFlags, Environment, Geometry, WriteFlags}; +use tempfile::tempdir; + +// ============================================================================= +// Roundtrip: 8-byte values +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(64))] + + /// Test that put/iter_dupfixed_of roundtrips 8-byte values correctly (V2). + #[test] + fn dupfixed_roundtrip_8( + n_values in 1usize..20, + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT | DatabaseFlags::DUP_FIXED).unwrap(); + + // Insert n_values distinct 8-byte values under a single key. + let mut expected: Vec<[u8; 8]> = (0..n_values) + .map(|i| { + let mut v = [0u8; 8]; + v[0] = i as u8; + v[1] = (i >> 8) as u8; + v + }) + .collect(); + + for value in &expected { + txn.put(db, b"key", value.as_slice(), WriteFlags::empty()).unwrap(); + } + + let mut cursor = txn.cursor(db).unwrap(); + let retrieved: Vec<[u8; 8]> = cursor + .iter_dupfixed_of::<[u8; 8]>(b"key") + .unwrap() + .filter_map(Result::ok) + .collect(); + + expected.sort(); + let mut retrieved_sorted = retrieved; + retrieved_sorted.sort(); + prop_assert_eq!(retrieved_sorted, expected); + } + + /// Test that put/iter_dupfixed_of roundtrips 32-byte values correctly (V2). + #[test] + fn dupfixed_roundtrip_32( + n_values in 1usize..20, + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT | DatabaseFlags::DUP_FIXED).unwrap(); + + let mut expected: Vec<[u8; 32]> = (0..n_values) + .map(|i| { + let mut v = [0u8; 32]; + v[0] = i as u8; + v[1] = (i >> 8) as u8; + v + }) + .collect(); + + for value in &expected { + txn.put(db, b"key", value.as_slice(), WriteFlags::empty()).unwrap(); + } + + let mut cursor = txn.cursor(db).unwrap(); + let retrieved: Vec<[u8; 32]> = cursor + .iter_dupfixed_of::<[u8; 32]>(b"key") + .unwrap() + .filter_map(Result::ok) + .collect(); + + expected.sort(); + let mut retrieved_sorted = retrieved; + retrieved_sorted.sort(); + prop_assert_eq!(retrieved_sorted, expected); + } + + /// Test that put/iter_dupfixed_of roundtrips 100-byte values correctly (V2). + #[test] + fn dupfixed_roundtrip_100( + n_values in 1usize..20, + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT | DatabaseFlags::DUP_FIXED).unwrap(); + + let mut expected: Vec<[u8; 100]> = (0..n_values) + .map(|i| { + let mut v = [0u8; 100]; + v[0] = i as u8; + v[1] = (i >> 8) as u8; + v + }) + .collect(); + + for value in &expected { + txn.put(db, b"key", value.as_slice(), WriteFlags::empty()).unwrap(); + } + + let mut cursor = txn.cursor(db).unwrap(); + let retrieved: Vec<[u8; 100]> = cursor + .iter_dupfixed_of::<[u8; 100]>(b"key") + .unwrap() + .filter_map(Result::ok) + .collect(); + + expected.sort(); + let mut retrieved_sorted = retrieved; + retrieved_sorted.sort(); + prop_assert_eq!(retrieved_sorted, expected); + } +} + +// ============================================================================= +// Completeness: iter_dupfixed_start +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(64))] + + /// Test that iter_dupfixed_start yields exactly N items inserted under one key (V2). + #[test] + fn iter_dupfixed_start_completeness( + n_values in 1usize..100, + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT | DatabaseFlags::DUP_FIXED).unwrap(); + + // Insert n_values distinct 8-byte values under b"key" (3 bytes). + for i in 0..n_values { + let mut v = [0u8; 8]; + v[0] = i as u8; + v[1] = (i >> 8) as u8; + txn.put(db, b"key", v.as_slice(), WriteFlags::empty()).unwrap(); + } + + let mut cursor = txn.cursor(db).unwrap(); + // Key type is [u8; 3] since "key" is 3 bytes, value type is [u8; 8]. + let count = cursor + .iter_dupfixed_start::<[u8; 3], [u8; 8]>() + .unwrap() + .filter_map(Result::ok) + .count(); + + prop_assert_eq!(count, n_values); + } +} + +// ============================================================================= +// Page spanning: large numbers of fixed-size values +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(16))] + + /// Test that all 64-byte values survive across page boundaries (V2). + #[test] + fn dupfixed_page_spanning( + n_values in 100usize..500, + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder() + .set_geometry(Geometry { size: Some(0..(64 * 1024 * 1024)), ..Default::default() }) + .open(dir.path()) + .unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT | DatabaseFlags::DUP_FIXED).unwrap(); + + // Insert n_values distinct 64-byte values under a single key. + for i in 0..n_values { + let mut v = [0u8; 64]; + v[0] = i as u8; + v[1] = (i >> 8) as u8; + txn.put(db, b"key", v.as_slice(), WriteFlags::empty()).unwrap(); + } + + let mut cursor = txn.cursor(db).unwrap(); + let retrieved: Vec<[u8; 64]> = cursor + .iter_dupfixed_of::<[u8; 64]>(b"key") + .unwrap() + .filter_map(Result::ok) + .collect(); + + prop_assert_eq!(retrieved.len(), n_values); + } +} diff --git a/tests/proptest_iter.rs b/tests/proptest_iter.rs new file mode 100644 index 0000000..358f05d --- /dev/null +++ b/tests/proptest_iter.rs @@ -0,0 +1,308 @@ +//! Property-based tests for iterator operations. +//! +//! Tests focus on both "does not panic" and correctness properties. Errors are +//! acceptable (e.g., `BadValSize`), panics are not. +#![allow(missing_docs)] + +use proptest::prelude::*; +use signet_libmdbx::{DatabaseFlags, Environment, WriteFlags}; +use tempfile::tempdir; + +/// Strategy for generating byte vectors of various sizes (0 to 1KB). +fn arb_bytes() -> impl Strategy> { + prop::collection::vec(any::(), 0..1024) +} + +/// Strategy for keys that won't trigger MDBX assertion failures. +/// MDBX max key size is ~2022 bytes for 4KB pages. +fn arb_safe_key() -> impl Strategy> { + prop::collection::vec(any::(), 1..512) +} + +// ============================================================================= +// Migrated: iter_from — TxSync (V1) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test iter_from with arbitrary key does not panic (V1). + #[test] + fn iter_from_arbitrary_key_v1(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + for i in 0u8..10 { + txn.put(db, [i], [i], WriteFlags::empty()).unwrap(); + } + + let mut cursor = txn.cursor(db).unwrap(); + + let result = cursor.iter_from::, Vec>(&key); + prop_assert!(result.is_ok()); + + let count = result.unwrap().count(); + prop_assert!(count <= 10); + } + + /// Test iter_dup_of with arbitrary key does not panic (V1). + #[test] + fn iter_dup_of_arbitrary_key_v1(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + for i in 0u8..5 { + txn.put(db, b"known_key", [i], WriteFlags::empty()).unwrap(); + } + + let mut cursor = txn.cursor(db).unwrap(); + + let result = cursor.iter_dup_of::>(&key); + prop_assert!(result.is_ok()); + + let _ = result.unwrap().count(); + } + + /// Test iter_dup_from with arbitrary key does not panic (V1). + #[test] + fn iter_dup_from_arbitrary_key_v1(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + for i in 0u8..5 { + txn.put(db, b"key_a", [i], WriteFlags::empty()).unwrap(); + txn.put(db, b"key_z", [i], WriteFlags::empty()).unwrap(); + } + + let mut cursor = txn.cursor(db).unwrap(); + + let result = cursor.iter_dup_from::, Vec>(&key); + prop_assert!(result.is_ok()); + + let _ = result.unwrap().count(); + } +} + +// ============================================================================= +// Migrated: iter_from — TxUnsync (V2) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test iter_from with arbitrary key does not panic (V2). + #[test] + fn iter_from_arbitrary_key_v2(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + for i in 0u8..10 { + txn.put(db, [i], [i], WriteFlags::empty()).unwrap(); + } + + let mut cursor = txn.cursor(db).unwrap(); + + let result = cursor.iter_from::, Vec>(&key); + prop_assert!(result.is_ok()); + + let count = result.unwrap().count(); + prop_assert!(count <= 10); + } + + /// Test iter_dup_of with arbitrary key does not panic (V2). + #[test] + fn iter_dup_of_arbitrary_key_v2(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + for i in 0u8..5 { + txn.put(db, b"known_key", [i], WriteFlags::empty()).unwrap(); + } + + let mut cursor = txn.cursor(db).unwrap(); + + let result = cursor.iter_dup_of::>(&key); + prop_assert!(result.is_ok()); + + let _ = result.unwrap().count(); + } + + /// Test iter_dup_from with arbitrary key does not panic (V2). + #[test] + fn iter_dup_from_arbitrary_key_v2(key in arb_bytes()) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); + + for i in 0u8..5 { + txn.put(db, b"key_a", [i], WriteFlags::empty()).unwrap(); + txn.put(db, b"key_z", [i], WriteFlags::empty()).unwrap(); + } + + let mut cursor = txn.cursor(db).unwrap(); + + let result = cursor.iter_dup_from::, Vec>(&key); + prop_assert!(result.is_ok()); + + let _ = result.unwrap().count(); + } +} + +// ============================================================================= +// New: iter_start yields all — TxSync (V1) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that iter_start yields exactly the number of distinct keys inserted (V1). + #[test] + fn iter_start_yields_all_v1( + keys in prop::collection::vec(arb_safe_key(), 1..20), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Deduplicate and sort keys, then insert all. + let mut unique_keys = keys; + unique_keys.sort(); + unique_keys.dedup(); + + for key in &unique_keys { + txn.put(db, key, b"v", WriteFlags::empty()).unwrap(); + } + + let mut cursor = txn.cursor(db).unwrap(); + let count = cursor + .iter_start::, Vec>() + .unwrap() + .filter_map(Result::ok) + .count(); + + prop_assert_eq!(count, unique_keys.len()); + } +} + +// ============================================================================= +// New: iter_start yields all — TxUnsync (V2) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that iter_start yields exactly the number of distinct keys inserted (V2). + #[test] + fn iter_start_yields_all_v2( + keys in prop::collection::vec(arb_safe_key(), 1..20), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + let mut unique_keys = keys; + unique_keys.sort(); + unique_keys.dedup(); + + for key in &unique_keys { + txn.put(db, key, b"v", WriteFlags::empty()).unwrap(); + } + + let mut cursor = txn.cursor(db).unwrap(); + let count = cursor + .iter_start::, Vec>() + .unwrap() + .filter_map(Result::ok) + .count(); + + prop_assert_eq!(count, unique_keys.len()); + } +} + +// ============================================================================= +// New: iter_from bounds — TxSync (V1) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that iter_from returns only keys >= search key (V1). + #[test] + fn iter_from_bounds_v1(search_idx in 0usize..20) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Insert 20 single-byte keys [0]...[19]. + for i in 0u8..20 { + txn.put(db, [i], b"v", WriteFlags::empty()).unwrap(); + } + + let search_key = [search_idx as u8]; + let mut cursor = txn.cursor(db).unwrap(); + let retrieved_keys: Vec> = cursor + .iter_from::, Vec>(&search_key) + .unwrap() + .filter_map(Result::ok) + .map(|(k, _)| k) + .collect(); + + // All returned keys must be >= search_key. + for k in &retrieved_keys { + prop_assert!(k.as_slice() >= search_key.as_slice()); + } + + // The number of returned keys should be 20 - search_idx. + prop_assert_eq!(retrieved_keys.len(), 20 - search_idx); + } +} + +// ============================================================================= +// New: iter_from bounds — TxUnsync (V2) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Test that iter_from returns only keys >= search key (V2). + #[test] + fn iter_from_bounds_v2(search_idx in 0usize..20) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + + for i in 0u8..20 { + txn.put(db, [i], b"v", WriteFlags::empty()).unwrap(); + } + + let search_key = [search_idx as u8]; + let mut cursor = txn.cursor(db).unwrap(); + let retrieved_keys: Vec> = cursor + .iter_from::, Vec>(&search_key) + .unwrap() + .filter_map(Result::ok) + .map(|(k, _)| k) + .collect(); + + for k in &retrieved_keys { + prop_assert!(k.as_slice() >= search_key.as_slice()); + } + + prop_assert_eq!(retrieved_keys.len(), 20 - search_idx); + } +} diff --git a/tests/proptest_nested.rs b/tests/proptest_nested.rs new file mode 100644 index 0000000..423eed7 --- /dev/null +++ b/tests/proptest_nested.rs @@ -0,0 +1,132 @@ +//! Property-based tests for nested transaction behavior (TxSync / V1 only). +//! +//! Tests focus on correctness of commit/abort semantics. Errors are +//! acceptable (e.g., `BadValSize`), panics are not. +#![allow(missing_docs)] + +use proptest::prelude::*; +use signet_libmdbx::{Environment, WriteFlags}; +use tempfile::tempdir; + +/// Strategy for keys that won't trigger MDBX assertion failures (non-empty). +fn arb_safe_key() -> impl Strategy> { + prop::collection::vec(any::(), 1..512) +} + +// ============================================================================= +// Nested commit preserves writes +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(64))] + + /// Test that a write in a committed child transaction is visible in the parent (V1). + #[test] + fn nested_commit_preserves_writes( + key in arb_safe_key(), + value in prop::collection::vec(any::(), 1..64), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + { + let nested = txn.begin_nested_txn().unwrap(); + let nested_db = nested.open_db(None).unwrap(); + let put_result = nested.put(nested_db, &key, &value, WriteFlags::empty()); + if put_result.is_ok() { + nested.commit().unwrap(); + + // After commit, parent should see the value. + let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); + prop_assert_eq!(retrieved, Some(value)); + } + } + } +} + +// ============================================================================= +// Nested abort discards writes +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(64))] + + /// Test that a write in a dropped (aborted) child transaction is NOT visible in the parent (V1). + #[test] + fn nested_abort_discards_writes( + key in arb_safe_key(), + value in prop::collection::vec(any::(), 1..64), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Confirm the key is not yet present. + let before: Option> = txn.get(db.dbi(), &key).unwrap(); + prop_assume!(before.is_none()); + + { + let nested = txn.begin_nested_txn().unwrap(); + let nested_db = nested.open_db(None).unwrap(); + let put_result = nested.put(nested_db, &key, &value, WriteFlags::empty()); + if put_result.is_ok() { + // Drop without committing — this aborts the nested transaction. + drop(nested); + + // Parent should NOT see the value. + let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); + prop_assert!(retrieved.is_none()); + } + } + } +} + +// ============================================================================= +// Parent writes survive child abort +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(64))] + + /// Test that parent writes survive a child abort, and that the child's write is discarded (V1). + #[test] + fn parent_writes_survive_child_abort( + parent_key in arb_safe_key(), + parent_value in prop::collection::vec(any::(), 1..64), + child_key in arb_safe_key(), + child_value in prop::collection::vec(any::(), 1..64), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + + // Write in parent. + let put_result = txn.put(db, &parent_key, &parent_value, WriteFlags::empty()); + if put_result.is_err() { + return Ok(()); + } + + { + let nested = txn.begin_nested_txn().unwrap(); + let nested_db = nested.open_db(None).unwrap(); + // Write something in the child (ignore errors). + let _ = nested.put(nested_db, &child_key, &child_value, WriteFlags::empty()); + // Abort by dropping. + drop(nested); + } + + // Parent write must still be visible. + let retrieved: Option> = txn.get(db.dbi(), &parent_key).unwrap(); + prop_assert_eq!(retrieved, Some(parent_value)); + + // If the keys differ, child write must NOT be visible. + if parent_key != child_key { + let child_retrieved: Option> = txn.get(db.dbi(), &child_key).unwrap(); + prop_assert!(child_retrieved.is_none()); + } + } +} From 287cfc19e364b634971cf81d48fe9c0d553480d2 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 28 Mar 2026 10:13:31 -0400 Subject: [PATCH 06/24] test: complete proptest migration, delete proptest_inputs.rs Add missing cursor_set_range_correctness tests to proptest_cursor. Remove proptest_inputs.rs now that all tests are migrated. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/proptest_cursor.rs | 56 + tests/proptest_inputs.proptest-regressions | 7 - tests/proptest_inputs.rs | 1152 -------------------- 3 files changed, 56 insertions(+), 1159 deletions(-) delete mode 100644 tests/proptest_inputs.proptest-regressions delete mode 100644 tests/proptest_inputs.rs diff --git a/tests/proptest_cursor.rs b/tests/proptest_cursor.rs index cc4e47b..ca335e8 100644 --- a/tests/proptest_cursor.rs +++ b/tests/proptest_cursor.rs @@ -365,3 +365,59 @@ proptest! { prop_assert_eq!(retrieved, keys); } } + +// ============================================================================= +// Cursor set_range correctness (migrated) +// ============================================================================= + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + #[test] + fn cursor_set_range_correctness_v1( + entries in prop::collection::vec((arb_safe_key(), arb_bytes()), 2..10), + search_key in arb_safe_key(), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_sync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut inserted: Vec<(Vec, Vec)> = Vec::new(); + for (key, value) in &entries { + if txn.put(db, key, value, WriteFlags::empty()).is_ok() { + inserted.push((key.clone(), value.clone())); + } + } + prop_assume!(!inserted.is_empty()); + inserted.sort_by(|a, b| a.0.cmp(&b.0)); + inserted.dedup_by(|a, b| a.0 == b.0); + let expected = inserted.iter().find(|(k, _)| k >= &search_key).cloned(); + let mut cursor = txn.cursor(db).unwrap(); + let result: Option<(Vec, Vec)> = cursor.set_range(&search_key).unwrap(); + prop_assert_eq!(result, expected); + } + + #[test] + fn cursor_set_range_correctness_v2( + entries in prop::collection::vec((arb_safe_key(), arb_bytes()), 2..10), + search_key in arb_safe_key(), + ) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut inserted: Vec<(Vec, Vec)> = Vec::new(); + for (key, value) in &entries { + if txn.put(db, key, value, WriteFlags::empty()).is_ok() { + inserted.push((key.clone(), value.clone())); + } + } + prop_assume!(!inserted.is_empty()); + inserted.sort_by(|a, b| a.0.cmp(&b.0)); + inserted.dedup_by(|a, b| a.0 == b.0); + let expected = inserted.iter().find(|(k, _)| k >= &search_key).cloned(); + let mut cursor = txn.cursor(db).unwrap(); + let result: Option<(Vec, Vec)> = cursor.set_range(&search_key).unwrap(); + prop_assert_eq!(result, expected); + } +} diff --git a/tests/proptest_inputs.proptest-regressions b/tests/proptest_inputs.proptest-regressions deleted file mode 100644 index d148876..0000000 --- a/tests/proptest_inputs.proptest-regressions +++ /dev/null @@ -1,7 +0,0 @@ -# Seeds for failure cases proptest has generated in the past. It is -# automatically read and these particular cases re-run before any -# novel cases are generated. -# -# It is recommended to check this file in to source control so that -# everyone who runs the test benefits from these saved cases. -cc dc36b6543362c37ff0fe3b0fd4cda292813822e1d326c4b7a5b06419c4ee1369 # shrinks to value = [] diff --git a/tests/proptest_inputs.rs b/tests/proptest_inputs.rs deleted file mode 100644 index 0644b93..0000000 --- a/tests/proptest_inputs.rs +++ /dev/null @@ -1,1152 +0,0 @@ -//! Property-based tests to ensure arbitrary inputs do not cause panics. -//! -//! These tests focus on "does not panic" rather than correctness. Errors are -//! acceptable (e.g., `BadValSize`), panics are not. -#![allow(missing_docs)] - -use proptest::prelude::*; -use signet_libmdbx::{DatabaseFlags, Environment, WriteFlags}; -use tempfile::tempdir; - -/// Strategy for generating byte vectors of various sizes (0 to 1KB). -fn arb_bytes() -> impl Strategy> { - prop::collection::vec(any::(), 0..1024) -} - -/// Strategy for generating small byte vectors (0 to 64 bytes). -fn arb_small_bytes() -> impl Strategy> { - prop::collection::vec(any::(), 0..64) -} - -/// Strategy for valid database names (alphanumeric + underscore, 1-64 chars). -fn arb_db_name() -> impl Strategy { - "[a-zA-Z][a-zA-Z0-9_]{0,63}" -} - -// ============================================================================= -// Key/Value Operations - TxSync (V1) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(256))] - - /// Test that put/get with arbitrary key/value does not panic (V1). - #[test] - fn put_get_arbitrary_kv_v1(key in arb_bytes(), value in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - // Should not panic - may return error for invalid sizes - let put_result = txn.put(db, &key, &value, WriteFlags::empty()); - - // If put succeeded, get should not panic - if put_result.is_ok() { - let _: Option> = txn.get(db.dbi(), &key).unwrap(); - } - } - - /// Test that del with nonexistent arbitrary key does not panic (V1). - #[test] - fn del_nonexistent_key_v1(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - // Delete on nonexistent key should return Ok(false), not panic - let result = txn.del(db, &key, None); - prop_assert!(result.is_ok()); - prop_assert!(!result.unwrap()); - } - - /// Test that get with arbitrary key on empty db does not panic (V1). - #[test] - fn get_arbitrary_key_empty_db_v1(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_ro_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - // Get on nonexistent key should return Ok(None), not panic - let result: signet_libmdbx::ReadResult>> = txn.get(db.dbi(), &key); - prop_assert!(result.is_ok()); - prop_assert!(result.unwrap().is_none()); - } -} - -// ============================================================================= -// Key/Value Operations - TxUnsync (V2) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(256))] - - /// Test that put/get with arbitrary key/value does not panic (V2). - #[test] - fn put_get_arbitrary_kv_v2(key in arb_bytes(), value in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - // Should not panic - may return error for invalid sizes - let put_result = txn.put(db, &key, &value, WriteFlags::empty()); - - // If put succeeded, get should not panic - if put_result.is_ok() { - let _: Option> = txn.get(db.dbi(), &key).unwrap(); - } - } - - /// Test that del with nonexistent arbitrary key does not panic (V2). - #[test] - fn del_nonexistent_key_v2(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - // Delete on nonexistent key should return Ok(false), not panic - let result = txn.del(db, &key, None); - prop_assert!(result.is_ok()); - prop_assert!(!result.unwrap()); - } - - /// Test that get with arbitrary key on empty db does not panic (V2). - #[test] - fn get_arbitrary_key_empty_db_v2(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_ro_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - // Get on nonexistent key should return Ok(None), not panic - let result: signet_libmdbx::ReadResult>> = txn.get(db.dbi(), &key); - prop_assert!(result.is_ok()); - prop_assert!(result.unwrap().is_none()); - } -} - -// ============================================================================= -// Cursor Operations -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(256))] - - /// Test that cursor.set() with arbitrary key does not panic (V1). - #[test] - fn cursor_set_arbitrary_key_v1(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - // Add some data so cursor is positioned - txn.put(db, b"test_key", b"test_val", WriteFlags::empty()).unwrap(); - - let mut cursor = txn.cursor(db).unwrap(); - - // set() with arbitrary key should return None or value, never panic - let result: signet_libmdbx::ReadResult>> = cursor.set(&key); - prop_assert!(result.is_ok()); - } - - /// Test that cursor.set_range() with arbitrary key does not panic (V1). - #[test] - fn cursor_set_range_arbitrary_key_v1(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - // Add some data - txn.put(db, b"aaa", b"val_a", WriteFlags::empty()).unwrap(); - txn.put(db, b"zzz", b"val_z", WriteFlags::empty()).unwrap(); - - let mut cursor = txn.cursor(db).unwrap(); - - // set_range() with arbitrary key should not panic - let result: signet_libmdbx::ReadResult, Vec)>> = - cursor.set_range(&key); - prop_assert!(result.is_ok()); - } - - /// Test that cursor.set_key() with arbitrary key does not panic (V1). - #[test] - fn cursor_set_key_arbitrary_v1(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - txn.put(db, b"test", b"value", WriteFlags::empty()).unwrap(); - - let mut cursor = txn.cursor(db).unwrap(); - - // set_key() should not panic - let result: signet_libmdbx::ReadResult, Vec)>> = - cursor.set_key(&key); - prop_assert!(result.is_ok()); - } - - /// Test that cursor.set() with arbitrary key does not panic (V2). - #[test] - fn cursor_set_arbitrary_key_v2(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - txn.put(db, b"test_key", b"test_val", WriteFlags::empty()).unwrap(); - - let mut cursor = txn.cursor(db).unwrap(); - - let result: signet_libmdbx::ReadResult>> = cursor.set(&key); - prop_assert!(result.is_ok()); - } - - /// Test that cursor.set_range() with arbitrary key does not panic (V2). - #[test] - fn cursor_set_range_arbitrary_key_v2(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - txn.put(db, b"aaa", b"val_a", WriteFlags::empty()).unwrap(); - txn.put(db, b"zzz", b"val_z", WriteFlags::empty()).unwrap(); - - let mut cursor = txn.cursor(db).unwrap(); - - let result: signet_libmdbx::ReadResult, Vec)>> = - cursor.set_range(&key); - prop_assert!(result.is_ok()); - } - - /// Test that cursor.set_key() with arbitrary key does not panic (V2). - #[test] - fn cursor_set_key_arbitrary_v2(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - txn.put(db, b"test", b"value", WriteFlags::empty()).unwrap(); - - let mut cursor = txn.cursor(db).unwrap(); - - let result: signet_libmdbx::ReadResult, Vec)>> = - cursor.set_key(&key); - prop_assert!(result.is_ok()); - } -} - -// ============================================================================= -// Database Names -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(128))] - - /// Test that create_db with arbitrary valid names does not panic (V1). - #[test] - fn create_db_arbitrary_name_v1(name in arb_db_name()) { - let dir = tempdir().unwrap(); - let env = Environment::builder() - .set_max_dbs(16) - .open(dir.path()) - .unwrap(); - let txn = env.begin_rw_sync().unwrap(); - - // create_db should not panic, may return error for invalid names - let result = txn.create_db(Some(&name), DatabaseFlags::empty()); - // We accept both success and error, just no panic - let _ = result; - } - - /// Test that create_db with arbitrary valid names does not panic (V2). - #[test] - fn create_db_arbitrary_name_v2(name in arb_db_name()) { - let dir = tempdir().unwrap(); - let env = Environment::builder() - .set_max_dbs(16) - .open(dir.path()) - .unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - - let result = txn.create_db(Some(&name), DatabaseFlags::empty()); - let _ = result; - } -} - -// ============================================================================= -// DUP_SORT Operations -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(128))] - - /// Test that DUP_SORT put with multiple values does not panic (V1). - #[test] - fn dupsort_put_multiple_values_v1( - key in arb_small_bytes(), - values in prop::collection::vec(arb_small_bytes(), 1..10), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); - - for value in &values { - // Should not panic - let result = txn.put(db, &key, value, WriteFlags::empty()); - // Errors are acceptable, panics are not - let _ = result; - } - } - - /// Test that DUP_SORT put with multiple values does not panic (V2). - #[test] - fn dupsort_put_multiple_values_v2( - key in arb_small_bytes(), - values in prop::collection::vec(arb_small_bytes(), 1..10), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); - - for value in &values { - let result = txn.put(db, &key, value, WriteFlags::empty()); - let _ = result; - } - } - - /// Test cursor get_both with arbitrary key/value does not panic (V1). - #[test] - fn cursor_get_both_arbitrary_v1( - search_key in arb_small_bytes(), - search_value in arb_small_bytes(), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); - - // Add some data - txn.put(db, b"key1", b"val1", WriteFlags::empty()).unwrap(); - txn.put(db, b"key1", b"val2", WriteFlags::empty()).unwrap(); - - let mut cursor = txn.cursor(db).unwrap(); - - // get_both should not panic - let result: signet_libmdbx::ReadResult>> = - cursor.get_both(&search_key, &search_value); - prop_assert!(result.is_ok()); - } - - /// Test cursor get_both_range with arbitrary key/value does not panic (V1). - #[test] - fn cursor_get_both_range_arbitrary_v1( - search_key in arb_small_bytes(), - search_value in arb_small_bytes(), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); - - txn.put(db, b"key1", b"val1", WriteFlags::empty()).unwrap(); - txn.put(db, b"key1", b"val2", WriteFlags::empty()).unwrap(); - - let mut cursor = txn.cursor(db).unwrap(); - - // get_both_range should not panic - let result: signet_libmdbx::ReadResult>> = - cursor.get_both_range(&search_key, &search_value); - prop_assert!(result.is_ok()); - } - - /// Test cursor get_both with arbitrary key/value does not panic (V2). - #[test] - fn cursor_get_both_arbitrary_v2( - search_key in arb_small_bytes(), - search_value in arb_small_bytes(), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); - - txn.put(db, b"key1", b"val1", WriteFlags::empty()).unwrap(); - txn.put(db, b"key1", b"val2", WriteFlags::empty()).unwrap(); - - let mut cursor = txn.cursor(db).unwrap(); - - let result: signet_libmdbx::ReadResult>> = - cursor.get_both(&search_key, &search_value); - prop_assert!(result.is_ok()); - } - - /// Test cursor get_both_range with arbitrary key/value does not panic (V2). - #[test] - fn cursor_get_both_range_arbitrary_v2( - search_key in arb_small_bytes(), - search_value in arb_small_bytes(), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); - - txn.put(db, b"key1", b"val1", WriteFlags::empty()).unwrap(); - txn.put(db, b"key1", b"val2", WriteFlags::empty()).unwrap(); - - let mut cursor = txn.cursor(db).unwrap(); - - let result: signet_libmdbx::ReadResult>> = - cursor.get_both_range(&search_key, &search_value); - prop_assert!(result.is_ok()); - } -} - -// ============================================================================= -// Iterator Operations -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(128))] - - /// Test iter_from with arbitrary key does not panic (V1). - #[test] - fn iter_from_arbitrary_key_v1(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - // Add some data - for i in 0u8..10 { - txn.put(db, [i], [i], WriteFlags::empty()).unwrap(); - } - - let mut cursor = txn.cursor(db).unwrap(); - - // iter_from should not panic - let result = cursor.iter_from::, Vec>(&key); - prop_assert!(result.is_ok()); - - // Consuming the iterator should not panic - let count = result.unwrap().count(); - prop_assert!(count <= 10); - } - - /// Test iter_from with arbitrary key does not panic (V2). - #[test] - fn iter_from_arbitrary_key_v2(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - for i in 0u8..10 { - txn.put(db, [i], [i], WriteFlags::empty()).unwrap(); - } - - let mut cursor = txn.cursor(db).unwrap(); - - let result = cursor.iter_from::, Vec>(&key); - prop_assert!(result.is_ok()); - - let count = result.unwrap().count(); - prop_assert!(count <= 10); - } - - /// Test iter_dup_of with arbitrary key does not panic (V1). - #[test] - fn iter_dup_of_arbitrary_key_v1(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); - - // Add some dup data - for i in 0u8..5 { - txn.put(db, b"known_key", [i], WriteFlags::empty()).unwrap(); - } - - let mut cursor = txn.cursor(db).unwrap(); - - // iter_dup_of should not panic (yields just values, not (key, value)) - let result = cursor.iter_dup_of::>(&key); - prop_assert!(result.is_ok()); - - // Consuming the iterator should not panic - let _ = result.unwrap().count(); - } - - /// Test iter_dup_from with arbitrary key does not panic (V1). - #[test] - fn iter_dup_from_arbitrary_key_v1(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); - - for i in 0u8..5 { - txn.put(db, b"key_a", [i], WriteFlags::empty()).unwrap(); - txn.put(db, b"key_z", [i], WriteFlags::empty()).unwrap(); - } - - let mut cursor = txn.cursor(db).unwrap(); - - // iter_dup_from should not panic (now yields flat (key, value) pairs) - let result = cursor.iter_dup_from::, Vec>(&key); - prop_assert!(result.is_ok()); - - // Consuming iterator should not panic (no nested iteration anymore) - let _ = result.unwrap().count(); - } - - /// Test iter_dup_of with arbitrary key does not panic (V2). - #[test] - fn iter_dup_of_arbitrary_key_v2(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); - - for i in 0u8..5 { - txn.put(db, b"known_key", [i], WriteFlags::empty()).unwrap(); - } - - let mut cursor = txn.cursor(db).unwrap(); - - // iter_dup_of yields just values, not (key, value) - let result = cursor.iter_dup_of::>(&key); - prop_assert!(result.is_ok()); - - let _ = result.unwrap().count(); - } - - /// Test iter_dup_from with arbitrary key does not panic (V2). - #[test] - fn iter_dup_from_arbitrary_key_v2(key in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); - - for i in 0u8..5 { - txn.put(db, b"key_a", [i], WriteFlags::empty()).unwrap(); - txn.put(db, b"key_z", [i], WriteFlags::empty()).unwrap(); - } - - let mut cursor = txn.cursor(db).unwrap(); - - // iter_dup_from now yields flat (key, value) pairs - let result = cursor.iter_dup_from::, Vec>(&key); - prop_assert!(result.is_ok()); - - // No nested iteration anymore - just count the items - let _ = result.unwrap().count(); - } -} - -// ============================================================================= -// Cursor Put Operations -// ============================================================================= - -/// Strategy for keys that won't trigger MDBX assertion failures. -/// MDBX max key size is ~2022 bytes for 4KB pages. -fn arb_safe_key() -> impl Strategy> { - prop::collection::vec(any::(), 0..512) -} - -proptest! { - #![proptest_config(ProptestConfig::with_cases(128))] - - /// Test cursor.put with arbitrary key/value does not panic (V1). - /// - /// Note: Uses constrained key sizes because MDBX aborts on very large keys - /// via cursor.put (assertion failure in cursor_put_checklen). - #[test] - fn cursor_put_arbitrary_v1(key in arb_safe_key(), value in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let mut cursor = txn.cursor(db).unwrap(); - - // cursor.put should not panic - let result = cursor.put(&key, &value, WriteFlags::empty()); - // Errors are fine (e.g., BadValSize), panics are not - let _ = result; - } - - /// Test cursor.put with arbitrary key/value does not panic (V2). - #[test] - fn cursor_put_arbitrary_v2(key in arb_safe_key(), value in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let mut cursor = txn.cursor(db).unwrap(); - - let result = cursor.put(&key, &value, WriteFlags::empty()); - let _ = result; - } -} - -// ============================================================================= -// Edge Cases -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(64))] - - /// Test empty key handling does not panic (V1). - #[test] - fn empty_key_operations_v1(value in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - // Empty key should be valid - let put_result = txn.put(db, b"", &value, WriteFlags::empty()); - prop_assert!(put_result.is_ok()); - - let get_result: signet_libmdbx::ReadResult>> = - txn.get(db.dbi(), b""); - prop_assert!(get_result.is_ok()); - - let del_result = txn.del(db, b"", None); - prop_assert!(del_result.is_ok()); - } - - /// Test empty value handling does not panic (V1). - #[test] - fn empty_value_operations_v1(key in arb_small_bytes()) { - // Skip empty keys for this test - prop_assume!(!key.is_empty()); - - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - // Empty value should be valid - let put_result = txn.put(db, &key, b"", WriteFlags::empty()); - prop_assert!(put_result.is_ok()); - - let get_result: signet_libmdbx::ReadResult>> = - txn.get(db.dbi(), &key); - prop_assert!(get_result.is_ok()); - prop_assert!(get_result.unwrap().is_some()); - } - - /// Test empty key handling does not panic (V2). - #[test] - fn empty_key_operations_v2(value in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let put_result = txn.put(db, b"", &value, WriteFlags::empty()); - prop_assert!(put_result.is_ok()); - - let get_result: signet_libmdbx::ReadResult>> = - txn.get(db.dbi(), b""); - prop_assert!(get_result.is_ok()); - - let del_result = txn.del(db, b"", None); - prop_assert!(del_result.is_ok()); - } - - /// Test empty value handling does not panic (V2). - #[test] - fn empty_value_operations_v2(key in arb_small_bytes()) { - prop_assume!(!key.is_empty()); - - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let put_result = txn.put(db, &key, b"", WriteFlags::empty()); - prop_assert!(put_result.is_ok()); - - let get_result: signet_libmdbx::ReadResult>> = - txn.get(db.dbi(), &key); - prop_assert!(get_result.is_ok()); - prop_assert!(get_result.unwrap().is_some()); - } -} - -// ============================================================================= -// Correctness: Round-trip - TxSync (V1) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(256))] - - /// Test that put followed by get returns the same value (V1). - #[test] - fn roundtrip_correctness_v1(key in arb_safe_key(), value in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let put_result = txn.put(db, &key, &value, WriteFlags::empty()); - if put_result.is_ok() { - let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); - prop_assert_eq!(retrieved, Some(value)); - } - } -} - -// ============================================================================= -// Correctness: Round-trip - TxUnsync (V2) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(256))] - - /// Test that put followed by get returns the same value (V2). - #[test] - fn roundtrip_correctness_v2(key in arb_safe_key(), value in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let put_result = txn.put(db, &key, &value, WriteFlags::empty()); - if put_result.is_ok() { - let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); - prop_assert_eq!(retrieved, Some(value)); - } - } -} - -// ============================================================================= -// Correctness: Overwrite - TxSync (V1) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(256))] - - /// Test that overwriting a key returns the new value (V1). - #[test] - fn overwrite_correctness_v1( - key in arb_safe_key(), - value1 in arb_bytes(), - value2 in arb_bytes(), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let put1 = txn.put(db, &key, &value1, WriteFlags::empty()); - let put2 = txn.put(db, &key, &value2, WriteFlags::empty()); - - if put1.is_ok() && put2.is_ok() { - let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); - prop_assert_eq!(retrieved, Some(value2)); - } - } -} - -// ============================================================================= -// Correctness: Overwrite - TxUnsync (V2) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(256))] - - /// Test that overwriting a key returns the new value (V2). - #[test] - fn overwrite_correctness_v2( - key in arb_safe_key(), - value1 in arb_bytes(), - value2 in arb_bytes(), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let put1 = txn.put(db, &key, &value1, WriteFlags::empty()); - let put2 = txn.put(db, &key, &value2, WriteFlags::empty()); - - if put1.is_ok() && put2.is_ok() { - let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); - prop_assert_eq!(retrieved, Some(value2)); - } - } -} - -// ============================================================================= -// Correctness: Delete - TxSync (V1) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(256))] - - /// Test that delete removes the key and get returns None (V1). - #[test] - fn delete_correctness_v1(key in arb_safe_key(), value in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let put_result = txn.put(db, &key, &value, WriteFlags::empty()); - if put_result.is_ok() { - let deleted = txn.del(db, &key, None).unwrap(); - prop_assert!(deleted); - - let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); - prop_assert_eq!(retrieved, None); - } - } -} - -// ============================================================================= -// Correctness: Delete - TxUnsync (V2) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(256))] - - /// Test that delete removes the key and get returns None (V2). - #[test] - fn delete_correctness_v2(key in arb_safe_key(), value in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let put_result = txn.put(db, &key, &value, WriteFlags::empty()); - if put_result.is_ok() { - let deleted = txn.del(db, &key, None).unwrap(); - prop_assert!(deleted); - - let retrieved: Option> = txn.get(db.dbi(), &key).unwrap(); - prop_assert_eq!(retrieved, None); - } - } -} - -// ============================================================================= -// Correctness: DUP_SORT Values - TxSync (V1) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(128))] - - /// Test that all unique DUP_SORT values are retrievable via iter_dup_of (V1). - #[test] - fn dupsort_values_correctness_v1( - key in arb_small_bytes(), - values in prop::collection::vec(arb_small_bytes(), 1..10), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); - - // Insert all values - let mut inserted: Vec> = Vec::new(); - for value in &values { - if txn.put(db, &key, value, WriteFlags::empty()).is_ok() - && !inserted.contains(value) - { - inserted.push(value.clone()); - } - } - - // Skip if nothing was inserted - prop_assume!(!inserted.is_empty()); - - // Retrieve all values via iter_dup_of (yields just values, not (key, value)) - let mut cursor = txn.cursor(db).unwrap(); - let retrieved: Vec> = - cursor.iter_dup_of::>(&key).unwrap().filter_map(Result::ok).collect(); - - // All inserted values should be retrieved (order is sorted by MDBX) - inserted.sort(); - let mut retrieved_sorted = retrieved.clone(); - retrieved_sorted.sort(); - prop_assert_eq!(inserted, retrieved_sorted); - } -} - -// ============================================================================= -// Correctness: DUP_SORT Values - TxUnsync (V2) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(128))] - - /// Test that all unique DUP_SORT values are retrievable via iter_dup_of (V2). - #[test] - fn dupsort_values_correctness_v2( - key in arb_small_bytes(), - values in prop::collection::vec(arb_small_bytes(), 1..10), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.create_db(None, DatabaseFlags::DUP_SORT).unwrap(); - - let mut inserted: Vec> = Vec::new(); - for value in &values { - if txn.put(db, &key, value, WriteFlags::empty()).is_ok() - && !inserted.contains(value) - { - inserted.push(value.clone()); - } - } - - prop_assume!(!inserted.is_empty()); - - // iter_dup_of yields just values, not (key, value) - let mut cursor = txn.cursor(db).unwrap(); - let retrieved: Vec> = - cursor.iter_dup_of::>(&key).unwrap().filter_map(Result::ok).collect(); - - inserted.sort(); - let mut retrieved_sorted = retrieved.clone(); - retrieved_sorted.sort(); - prop_assert_eq!(inserted, retrieved_sorted); - } -} - -// ============================================================================= -// Correctness: Iteration Order - TxSync (V1) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(128))] - - /// Test that keys are returned in lexicographically sorted order (V1). - #[test] - fn iteration_order_correctness_v1( - entries in prop::collection::vec((arb_safe_key(), arb_bytes()), 1..20), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - // Insert all entries - let mut inserted_keys: Vec> = Vec::new(); - for (key, value) in &entries { - if txn.put(db, key, value, WriteFlags::empty()).is_ok() - && !inserted_keys.contains(key) - { - inserted_keys.push(key.clone()); - } - } - - prop_assume!(!inserted_keys.is_empty()); - - // Iterate and collect keys - let mut cursor = txn.cursor(db).unwrap(); - let retrieved_keys: Vec> = cursor - .iter::, Vec>() - .filter_map(Result::ok) - .map(|(k, _)| k) - .collect(); - - // Keys should be in sorted order - let mut expected = inserted_keys; - expected.sort(); - expected.dedup(); - prop_assert_eq!(retrieved_keys, expected); - } -} - -// ============================================================================= -// Correctness: Iteration Order - TxUnsync (V2) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(128))] - - /// Test that keys are returned in lexicographically sorted order (V2). - #[test] - fn iteration_order_correctness_v2( - entries in prop::collection::vec((arb_safe_key(), arb_bytes()), 1..20), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let mut inserted_keys: Vec> = Vec::new(); - for (key, value) in &entries { - if txn.put(db, key, value, WriteFlags::empty()).is_ok() - && !inserted_keys.contains(key) - { - inserted_keys.push(key.clone()); - } - } - - prop_assume!(!inserted_keys.is_empty()); - - let mut cursor = txn.cursor(db).unwrap(); - let retrieved_keys: Vec> = cursor - .iter::, Vec>() - .filter_map(Result::ok) - .map(|(k, _)| k) - .collect(); - - let mut expected = inserted_keys; - expected.sort(); - expected.dedup(); - prop_assert_eq!(retrieved_keys, expected); - } -} - -// ============================================================================= -// Correctness: Cursor Set - TxSync (V1) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(256))] - - /// Test that cursor.set returns the correct value when key exists (V1). - #[test] - fn cursor_set_correctness_v1(key in arb_safe_key(), value in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let put_result = txn.put(db, &key, &value, WriteFlags::empty()); - if put_result.is_ok() { - let mut cursor = txn.cursor(db).unwrap(); - let retrieved: Option> = cursor.set(&key).unwrap(); - prop_assert_eq!(retrieved, Some(value)); - } - } -} - -// ============================================================================= -// Correctness: Cursor Set - TxUnsync (V2) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(256))] - - /// Test that cursor.set returns the correct value when key exists (V2). - #[test] - fn cursor_set_correctness_v2(key in arb_safe_key(), value in arb_bytes()) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let put_result = txn.put(db, &key, &value, WriteFlags::empty()); - if put_result.is_ok() { - let mut cursor = txn.cursor(db).unwrap(); - let retrieved: Option> = cursor.set(&key).unwrap(); - prop_assert_eq!(retrieved, Some(value)); - } - } -} - -// ============================================================================= -// Correctness: Cursor Set Range - TxSync (V1) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(128))] - - /// Test that cursor.set_range returns the first key >= search key (V1). - #[test] - fn cursor_set_range_correctness_v1( - entries in prop::collection::vec((arb_safe_key(), arb_bytes()), 2..10), - search_key in arb_safe_key(), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let mut inserted: Vec<(Vec, Vec)> = Vec::new(); - for (key, value) in &entries { - if txn.put(db, key, value, WriteFlags::empty()).is_ok() { - inserted.push((key.clone(), value.clone())); - } - } - - prop_assume!(!inserted.is_empty()); - - // Sort by key to find expected result - inserted.sort_by(|a, b| a.0.cmp(&b.0)); - inserted.dedup_by(|a, b| a.0 == b.0); - - let expected = inserted - .iter() - .find(|(k, _)| k >= &search_key) - .cloned(); - - let mut cursor = txn.cursor(db).unwrap(); - let result: Option<(Vec, Vec)> = cursor.set_range(&search_key).unwrap(); - - prop_assert_eq!(result, expected); - } -} - -// ============================================================================= -// Correctness: Cursor Set Range - TxUnsync (V2) -// ============================================================================= - -proptest! { - #![proptest_config(ProptestConfig::with_cases(128))] - - /// Test that cursor.set_range returns the first key >= search key (V2). - #[test] - fn cursor_set_range_correctness_v2( - entries in prop::collection::vec((arb_safe_key(), arb_bytes()), 2..10), - search_key in arb_safe_key(), - ) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - - let mut inserted: Vec<(Vec, Vec)> = Vec::new(); - for (key, value) in &entries { - if txn.put(db, key, value, WriteFlags::empty()).is_ok() { - inserted.push((key.clone(), value.clone())); - } - } - - prop_assume!(!inserted.is_empty()); - - inserted.sort_by(|a, b| a.0.cmp(&b.0)); - inserted.dedup_by(|a, b| a.0 == b.0); - - let expected = inserted - .iter() - .find(|(k, _)| k >= &search_key) - .cloned(); - - let mut cursor = txn.cursor(db).unwrap(); - let result: Option<(Vec, Vec)> = cursor.set_range(&search_key).unwrap(); - - prop_assert_eq!(result, expected); - } -} From 5c3711866db40852102b9c8243a0ffb498f04648 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 28 Mar 2026 10:17:29 -0400 Subject: [PATCH 07/24] fuzz: add cargo-fuzz setup with 6 fuzz targets Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 5 +- fuzz/Cargo.toml | 46 +++++++++++ fuzz/fuzz_targets/decode_array.rs | 55 +++++++++++++ fuzz/fuzz_targets/decode_cow.rs | 44 ++++++++++ fuzz/fuzz_targets/decode_object_length.rs | 35 ++++++++ fuzz/fuzz_targets/dirty_page_roundtrip.rs | 66 +++++++++++++++ fuzz/fuzz_targets/dupfixed_page_decode.rs | 99 +++++++++++++++++++++++ fuzz/fuzz_targets/key_validation.rs | 35 ++++++++ 8 files changed, 384 insertions(+), 1 deletion(-) create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/fuzz_targets/decode_array.rs create mode 100644 fuzz/fuzz_targets/decode_cow.rs create mode 100644 fuzz/fuzz_targets/decode_object_length.rs create mode 100644 fuzz/fuzz_targets/dirty_page_roundtrip.rs create mode 100644 fuzz/fuzz_targets/dupfixed_page_decode.rs create mode 100644 fuzz/fuzz_targets/key_validation.rs diff --git a/.gitignore b/.gitignore index 90c2997..797d531 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,7 @@ Cargo.lock .idea/ mdbx-sys/target/ -docs/ \ No newline at end of file +docs/ +fuzz/corpus/ +fuzz/artifacts/ +fuzz/target/ \ No newline at end of file diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..a9d510a --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,46 @@ +[package] +name = "signet-libmdbx-fuzz" +version = "0.0.0" +publish = false +edition = "2024" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +signet-libmdbx = { path = ".." } +tempfile = "3" + +[workspace] +members = ["."] + +[[bin]] +name = "decode_cow" +path = "fuzz_targets/decode_cow.rs" +doc = false + +[[bin]] +name = "decode_array" +path = "fuzz_targets/decode_array.rs" +doc = false + +[[bin]] +name = "decode_object_length" +path = "fuzz_targets/decode_object_length.rs" +doc = false + +[[bin]] +name = "dirty_page_roundtrip" +path = "fuzz_targets/dirty_page_roundtrip.rs" +doc = false + +[[bin]] +name = "dupfixed_page_decode" +path = "fuzz_targets/dupfixed_page_decode.rs" +doc = false + +[[bin]] +name = "key_validation" +path = "fuzz_targets/key_validation.rs" +doc = false diff --git a/fuzz/fuzz_targets/decode_array.rs b/fuzz/fuzz_targets/decode_array.rs new file mode 100644 index 0000000..11d2426 --- /dev/null +++ b/fuzz/fuzz_targets/decode_array.rs @@ -0,0 +1,55 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use signet_libmdbx::{DatabaseFlags, Environment, MdbxError, ReadError, WriteFlags}; +use tempfile::tempdir; + +fuzz_target!(|data: &[u8]| { + // Need at least one byte for a key. + if data.is_empty() { + return; + } + + // Use first byte as key length (1..=16), rest is the value. + let key_len = ((data[0] as usize) % 16) + 1; + if data.len() < 1 + key_len { + return; + } + let key = &data[1..1 + key_len]; + let value = &data[1 + key_len..]; + + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::empty()).unwrap(); + txn.put(db, key, value, WriteFlags::empty()).unwrap(); + txn.commit().unwrap(); + + let ro_txn = env.begin_ro_unsync().unwrap(); + let ro_db = ro_txn.open_db(None).unwrap(); + + // Attempt decoding as fixed-size arrays. Length mismatches must produce an + // error, never a panic. + let r4: Result, ReadError> = ro_txn.get(ro_db.dbi(), key); + let r8: Result, ReadError> = ro_txn.get(ro_db.dbi(), key); + let r16: Result, ReadError> = ro_txn.get(ro_db.dbi(), key); + let r32: Result, ReadError> = ro_txn.get(ro_db.dbi(), key); + + // Validate: correct length → Ok, wrong length → DecodeErrorLenDiff. + for (result, expected_len) in [ + (r4.map(|o| o.map(|a| a.len())), 4usize), + (r8.map(|o| o.map(|a| a.len())), 8), + (r16.map(|o| o.map(|a| a.len())), 16), + (r32.map(|o| o.map(|a| a.len())), 32), + ] { + match result { + Ok(Some(len)) => assert_eq!(len, expected_len), + Ok(None) => {} + Err(ReadError::Mdbx(MdbxError::DecodeErrorLenDiff)) => { + // Expected when value.len() != expected_len. + assert_ne!(value.len(), expected_len); + } + Err(e) => panic!("unexpected error: {e:?}"), + } + } +}); diff --git a/fuzz/fuzz_targets/decode_cow.rs b/fuzz/fuzz_targets/decode_cow.rs new file mode 100644 index 0000000..d198da7 --- /dev/null +++ b/fuzz/fuzz_targets/decode_cow.rs @@ -0,0 +1,44 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use signet_libmdbx::{DatabaseFlags, Environment, WriteFlags}; +use std::borrow::Cow; +use tempfile::tempdir; + +fuzz_target!(|data: &[u8]| { + // Need at least one byte to split key/value. + if data.is_empty() { + return; + } + + // Use first byte as split point for key vs value. + let split = (data[0] as usize).min(data.len().saturating_sub(1)); + let (key, value) = data[1..].split_at(split.min(data.len().saturating_sub(1))); + + // Keys must be non-empty for MDBX. + if key.is_empty() { + return; + } + + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + + // Write in RW transaction, then read back as Cow (dirty page path). + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::empty()).unwrap(); + txn.put(db, key, value, WriteFlags::empty()).unwrap(); + + // Read while transaction is still open: data is on a dirty page, so + // Cow::decode_borrow should return Cow::Owned. + let readback: Option> = txn.get(db.dbi(), key).unwrap(); + let readback = readback.unwrap(); + assert_eq!(readback.as_ref(), value); + + txn.commit().unwrap(); + + // Read via RO transaction: data is on a clean page, so Cow should borrow. + let ro_txn = env.begin_ro_unsync().unwrap(); + let ro_db = ro_txn.open_db(None).unwrap(); + let clean: Option> = ro_txn.get(ro_db.dbi(), key).unwrap(); + let clean = clean.unwrap(); + assert_eq!(clean.as_ref(), value); +}); diff --git a/fuzz/fuzz_targets/decode_object_length.rs b/fuzz/fuzz_targets/decode_object_length.rs new file mode 100644 index 0000000..994df30 --- /dev/null +++ b/fuzz/fuzz_targets/decode_object_length.rs @@ -0,0 +1,35 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use signet_libmdbx::{DatabaseFlags, Environment, ObjectLength, WriteFlags}; +use tempfile::tempdir; + +fuzz_target!(|data: &[u8]| { + // Need at least 1 byte for the key. + if data.is_empty() { + return; + } + + // First byte: key length (1..=16). + let key_len = ((data[0] as usize) % 16) + 1; + if data.len() < 1 + key_len { + return; + } + let key = &data[1..1 + key_len]; + let value = &data[1 + key_len..]; + + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::empty()).unwrap(); + txn.put(db, key, value, WriteFlags::empty()).unwrap(); + txn.commit().unwrap(); + + let ro_txn = env.begin_ro_unsync().unwrap(); + let ro_db = ro_txn.open_db(None).unwrap(); + + // ObjectLength must return the exact byte length of the stored value. + let len: Option = ro_txn.get(ro_db.dbi(), key).unwrap(); + let len = len.unwrap(); + assert_eq!(*len, value.len()); +}); diff --git a/fuzz/fuzz_targets/dirty_page_roundtrip.rs b/fuzz/fuzz_targets/dirty_page_roundtrip.rs new file mode 100644 index 0000000..fa565d3 --- /dev/null +++ b/fuzz/fuzz_targets/dirty_page_roundtrip.rs @@ -0,0 +1,66 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use signet_libmdbx::{DatabaseFlags, Environment, WriteFlags}; +use tempfile::tempdir; + +/// Near-page-boundary value sizes to probe is_dirty_raw behaviour. +const BIASED_SIZES: [usize; 4] = [4094, 4096, 4098, 0]; + +fuzz_target!(|data: &[u8]| { + if data.len() < 2 { + return; + } + + // First byte selects value-size bias; remaining bytes provide content. + let bias_idx = (data[0] as usize) % BIASED_SIZES.len(); + let biased_size = BIASED_SIZES[bias_idx]; + let content = &data[1..]; + + // Build value: if biased_size > 0, pad/trim content to that size. + let value: Vec = if biased_size > 0 { + let mut v = content.to_vec(); + v.resize(biased_size, 0xAB); + v + } else { + content.to_vec() + }; + + // Key is always the first 4 bytes of content (or padded). + let mut key = [0u8; 4]; + let copy_len = content.len().min(4); + key[..copy_len].copy_from_slice(&content[..copy_len]); + + let dir = tempdir().unwrap(); + let env = Environment::builder() + .set_geometry(signet_libmdbx::Geometry { + size: Some(0..(1024 * 1024 * 64)), + ..Default::default() + }) + .open(dir.path()) + .unwrap(); + + // Write in RW transaction; read back on dirty page. + // We use Vec to force a copy out of the transaction before commit. + let dirty_bytes: Vec = { + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::empty()).unwrap(); + txn.put(db, &key, &value, WriteFlags::empty()).unwrap(); + + // Read while dirty; Vec always copies, so no lifetime tie to txn. + let dirty: Option> = txn.get(db.dbi(), &key).unwrap(); + let dirty = dirty.unwrap(); + assert_eq!(dirty.as_slice(), value.as_slice()); + + txn.commit().unwrap(); + dirty + }; + + // Read via RO transaction: data now on a clean page. + let ro_txn = env.begin_ro_unsync().unwrap(); + let ro_db = ro_txn.open_db(None).unwrap(); + let clean: Option> = ro_txn.get(ro_db.dbi(), &key).unwrap(); + let clean = clean.unwrap(); + + // Both reads must agree on value content. + assert_eq!(dirty_bytes.as_slice(), clean.as_slice()); +}); diff --git a/fuzz/fuzz_targets/dupfixed_page_decode.rs b/fuzz/fuzz_targets/dupfixed_page_decode.rs new file mode 100644 index 0000000..c7a4248 --- /dev/null +++ b/fuzz/fuzz_targets/dupfixed_page_decode.rs @@ -0,0 +1,99 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use signet_libmdbx::{DatabaseFlags, Environment, WriteFlags}; +use tempfile::tempdir; + +fuzz_target!(|data: &[u8]| { + if data.len() < 2 { + return; + } + + // First byte: value size in the range 4..=64 (must be uniform across all + // values in a DUP_FIXED database). + let value_size = (data[0] as usize % 61) + 4; + // Second byte: number of values to insert, clamped to 1..=100. + let n_values = (data[1] as usize % 100) + 1; + let payload = &data[2..]; + + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + + // Write phase: insert values, read back dirty page bytes, then commit. + let (dirty_len, inserted_count) = { + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.create_db(None, DatabaseFlags::DUP_SORT | DatabaseFlags::DUP_FIXED).unwrap(); + + // Build n_values distinct fixed-size values from the fuzz payload, + // padding or cycling as needed. We deduplicate before inserting: + // MDBX ignores exact duplicate key+value pairs silently. + let mut inserted: Vec> = Vec::with_capacity(n_values); + for i in 0..n_values { + let mut val = vec![0u8; value_size]; + // Fill from payload, cycling, then XOR with index for uniqueness. + for (j, byte) in val.iter_mut().enumerate() { + let src = if payload.is_empty() { + 0 + } else { + payload[(i * value_size + j) % payload.len()] + }; + *byte = src ^ ((i & 0xFF) as u8); + } + // Skip if we already have this exact value. + if !inserted.contains(&val) { + if txn.put(db, b"key", &val, WriteFlags::empty()).is_ok() { + inserted.push(val); + } + } + } + + if inserted.is_empty() { + return; + } + + // Read back via cursor while in the write transaction (dirty page). + // Use Vec so there is no lifetime tie to the transaction. + let dirty_len = { + let mut cursor = txn.cursor(db).unwrap(); + cursor.first::<(), ()>().unwrap(); + let dirty: Option> = cursor.get_multiple().unwrap(); + let page = dirty.unwrap(); + assert_eq!( + page.len() % value_size, + 0, + "dirty page length not a multiple of value_size" + ); + + // Reposition and read again; must be consistent. + cursor.first::<(), ()>().unwrap(); + let second: Option> = cursor.get_multiple().unwrap(); + assert_eq!( + page.as_slice(), + second.unwrap().as_slice(), + "inconsistent get_multiple reads" + ); + + page.len() + }; + + txn.commit().unwrap(); + (dirty_len, inserted.len()) + }; + + // Read via RO transaction (clean page) and verify consistency. + let ro_txn = env.begin_ro_unsync().unwrap(); + let ro_db = ro_txn.open_db(None).unwrap(); + let mut ro_cursor = ro_txn.cursor(ro_db).unwrap(); + ro_cursor.first::<(), ()>().unwrap(); + let clean: Option> = ro_cursor.get_multiple().unwrap(); + + let clean_len = clean.map(|p| { + assert_eq!(p.len() % value_size, 0, "clean page length not a multiple of value_size"); + p.len() + }); + + // Total items returned must match what we inserted (may span multiple + // pages; get_multiple only returns up to one page, so just verify + // divisibility and non-zero length). + assert!(clean_len.unwrap_or(0) > 0 || inserted_count == 0); + assert_eq!(dirty_len, clean_len.unwrap_or(0), "dirty vs clean page byte counts differ"); +}); diff --git a/fuzz/fuzz_targets/key_validation.rs b/fuzz/fuzz_targets/key_validation.rs new file mode 100644 index 0000000..1fbd82d --- /dev/null +++ b/fuzz/fuzz_targets/key_validation.rs @@ -0,0 +1,35 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use signet_libmdbx::{DatabaseFlags, Environment, WriteFlags}; +use tempfile::tempdir; + +fuzz_target!(|data: &[u8]| { + if data.is_empty() { + return; + } + + let dir = tempdir().unwrap(); + // Two named databases require set_max_dbs(2) on the environment. + let env = Environment::builder().set_max_dbs(2).open(dir.path()).unwrap(); + + let txn = env.begin_rw_unsync().unwrap(); + + // Database 1: default (no special flags). Accepts arbitrary byte keys. + let default_db = txn.create_db(None, DatabaseFlags::empty()).unwrap(); + + // Database 2: INTEGER_KEY. Requires 4- or 8-byte aligned keys. + let int_db = + txn.create_db(Some("intkeys"), DatabaseFlags::INTEGER_KEY | DatabaseFlags::CREATE).unwrap(); + + // Attempt put with the raw fuzz bytes as key. Should either succeed or + // return a typed error — never panic. + let _ = txn.put(default_db, data, b"value", WriteFlags::empty()); + + // Attempt put on the INTEGER_KEY database; fuzz bytes are likely not a + // valid 4- or 8-byte key, so we expect an error in most cases. + let _ = txn.put(int_db, data, b"value", WriteFlags::empty()); + + // Attempt get with fuzz bytes as key on both databases. + let _: signet_libmdbx::ReadResult>> = txn.get(default_db.dbi(), data); + let _: signet_libmdbx::ReadResult>> = txn.get(int_db.dbi(), data); +}); From 73715723599ffde766f4c7404f811498ba3d89e8 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 28 Mar 2026 10:34:37 -0400 Subject: [PATCH 08/24] bench: align benchmarks with evmdb for cross-project parity - Standardize keys to 32 bytes (key + 28-digit zero-padded int) - Add value size parameterization (32, 128, 512 bytes) to scaling benchmarks - Expand concurrent reader counts to include 32 and 128 - Standardize concurrent bench values to 128 bytes - Use get_key() for cursor_write append benchmarks - Add PARITY comments linking to evmdb equivalents Co-Authored-By: Claude Opus 4.6 (1M context) --- benches/concurrent.rs | 10 ++- benches/cursor_write.rs | 10 ++- benches/scaling.rs | 167 +++++++++++++++++++++------------------- benches/utils.rs | 2 +- 4 files changed, 102 insertions(+), 87 deletions(-) diff --git a/benches/concurrent.rs b/benches/concurrent.rs index 2aabf1a..836bfe4 100644 --- a/benches/concurrent.rs +++ b/benches/concurrent.rs @@ -11,7 +11,7 @@ use tempfile::tempdir; use utils::*; const N_ROWS: u32 = 1_000; -const READER_COUNTS: &[usize] = &[1, 4, 8]; +const READER_COUNTS: &[usize] = &[1, 4, 8, 32, 128]; fn setup_arc_env(n: u32) -> (tempfile::TempDir, Arc) { let dir = tempdir().unwrap(); @@ -20,14 +20,16 @@ fn setup_arc_env(n: u32) -> (tempfile::TempDir, Arc) { let txn = env.begin_rw_unsync().unwrap(); let db = txn.open_db(None).unwrap(); for i in 0..n { - txn.put(db, get_key(i), get_data(i), WriteFlags::empty()).unwrap(); + let value: Vec = + format!("data{i:010}").as_bytes().iter().copied().cycle().take(128).collect(); + txn.put(db, get_key(i), value, WriteFlags::empty()).unwrap(); } txn.commit().unwrap(); } (dir, Arc::new(env)) } -/// N readers, no writer — read throughput baseline. +// PARITY: evmdb/readers_no_writer fn bench_n_readers_no_writer(c: &mut Criterion) { let mut group = c.benchmark_group("concurrent::readers_no_writer"); @@ -75,7 +77,7 @@ fn bench_n_readers_no_writer(c: &mut Criterion) { group.finish(); } -/// N readers + 1 writer — read throughput under write contention. +// PARITY: evmdb/readers_with_writer fn bench_n_readers_one_writer(c: &mut Criterion) { let mut group = c.benchmark_group("concurrent::readers_one_writer"); diff --git a/benches/cursor_write.rs b/benches/cursor_write.rs index a666b42..34c44e0 100644 --- a/benches/cursor_write.rs +++ b/benches/cursor_write.rs @@ -25,6 +25,7 @@ fn setup_plain_env(n: u32) -> (tempfile::TempDir, signet_libmdbx::Environment) { // PUT +// PARITY: evmdb/write_put_100 fn bench_cursor_put_sync(c: &mut Criterion) { let items: Vec<(String, String)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); let (_dir, env) = setup_plain_env(0); @@ -47,6 +48,7 @@ fn bench_cursor_put_sync(c: &mut Criterion) { }); } +// PARITY: evmdb/write_put_100 fn bench_cursor_put_unsync(c: &mut Criterion) { let items: Vec<(String, String)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); let (_dir, env) = setup_plain_env(0); @@ -109,10 +111,10 @@ fn bench_cursor_del_unsync(c: &mut Criterion) { // APPEND +// PARITY: evmdb/write_put_100_sorted fn bench_cursor_append_sync(c: &mut Criterion) { // Keys must be lexicographically sorted for append; zero-pad to ensure order. - let items: Vec<(String, String)> = - (0..N).map(|i| (format!("key{i:05}"), get_data(i))).collect(); + let items: Vec<(String, String)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); let (_dir, env) = setup_plain_env(0); c.bench_function("cursor_write::append::sync", |b| { @@ -133,9 +135,9 @@ fn bench_cursor_append_sync(c: &mut Criterion) { }); } +// PARITY: evmdb/write_put_100_sorted fn bench_cursor_append_unsync(c: &mut Criterion) { - let items: Vec<(String, String)> = - (0..N).map(|i| (format!("key{i:05}"), get_data(i))).collect(); + let items: Vec<(String, String)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); let (_dir, env) = setup_plain_env(0); c.bench_function("cursor_write::append::single_thread", |b| { diff --git a/benches/scaling.rs b/benches/scaling.rs index bcba0ae..405aa09 100644 --- a/benches/scaling.rs +++ b/benches/scaling.rs @@ -6,127 +6,138 @@ use signet_libmdbx::{Environment, ObjectLength, WriteFlags}; use tempfile::tempdir; const ENTRY_COUNTS: &[u32] = &[100, 1_000, 10_000, 100_000]; +const VALUE_SIZES: &[usize] = &[32, 128, 512]; fn format_key(i: u32) -> String { - format!("key{i:010}") + format!("key{i:028}") } -fn format_data(i: u32) -> String { - format!("data{i:010}") +fn make_value(i: u32, size: usize) -> Vec { + let seed = format!("data{i:010}"); + seed.as_bytes().iter().copied().cycle().take(size).collect() } /// Set up a plain environment (default db only) with N entries pre-populated. -fn setup_scaling_env(n: u32) -> (tempfile::TempDir, Environment) { +fn setup_scaling_env(n: u32, value_size: usize) -> (tempfile::TempDir, Environment) { let dir = tempdir().unwrap(); let env = Environment::builder().open(dir.path()).unwrap(); { let txn = env.begin_rw_unsync().unwrap(); let db = txn.open_db(None).unwrap(); for i in 0..n { - txn.put(db, format_key(i), format_data(i), WriteFlags::empty()).unwrap(); + txn.put(db, format_key(i), make_value(i, value_size), WriteFlags::empty()).unwrap(); } txn.commit().unwrap(); } (dir, env) } -/// Sequential get: read every entry in insertion order. +// PARITY: evmdb/sequential_get fn bench_sequential_get(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::sequential_get"); - for &n in ENTRY_COUNTS { - let (_dir, env) = setup_scaling_env(n); - let keys: Vec = (0..n).map(format_key).collect(); - - group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { - b.iter(|| { - let txn = env.begin_ro_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - let mut total = 0usize; - for key in &keys { - total += *txn.get::(db.dbi(), key.as_bytes()).unwrap().unwrap(); - } - total - }) - }); + for &size in VALUE_SIZES { + for &n in ENTRY_COUNTS { + let (_dir, env) = setup_scaling_env(n, size); + let keys: Vec = (0..n).map(format_key).collect(); + + group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { + b.iter(|| { + let txn = env.begin_ro_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut total = 0usize; + for key in &keys { + total += + *txn.get::(db.dbi(), key.as_bytes()).unwrap().unwrap(); + } + total + }) + }); + } } group.finish(); } -/// Random get: read every entry in shuffled order. +// PARITY: evmdb/random_get fn bench_random_get(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::random_get"); - for &n in ENTRY_COUNTS { - let (_dir, env) = setup_scaling_env(n); - let mut keys: Vec = (0..n).map(format_key).collect(); - keys.shuffle(&mut StdRng::from_seed(Default::default())); - - group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { - b.iter(|| { - let txn = env.begin_ro_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - let mut total = 0usize; - for key in &keys { - total += *txn.get::(db.dbi(), key.as_bytes()).unwrap().unwrap(); - } - total - }) - }); + for &size in VALUE_SIZES { + for &n in ENTRY_COUNTS { + let (_dir, env) = setup_scaling_env(n, size); + let mut keys: Vec = (0..n).map(format_key).collect(); + keys.shuffle(&mut StdRng::from_seed(Default::default())); + + group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { + b.iter(|| { + let txn = env.begin_ro_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut total = 0usize; + for key in &keys { + total += + *txn.get::(db.dbi(), key.as_bytes()).unwrap().unwrap(); + } + total + }) + }); + } } group.finish(); } -/// Full iteration: walk every entry via a cursor. +// PARITY: evmdb/full_iteration fn bench_full_iteration(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::full_iteration"); - for &n in ENTRY_COUNTS { - let (_dir, env) = setup_scaling_env(n); - - group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { - b.iter(|| { - let txn = env.begin_ro_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - let mut cursor = txn.cursor(db).unwrap(); - let mut count = 0usize; - while cursor.next::, Vec>().unwrap().is_some() { - count += 1; - } - count - }) - }); + for &size in VALUE_SIZES { + for &n in ENTRY_COUNTS { + let (_dir, env) = setup_scaling_env(n, size); + + group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { + b.iter(|| { + let txn = env.begin_ro_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut cursor = txn.cursor(db).unwrap(); + let mut count = 0usize; + while cursor.next::, Vec>().unwrap().is_some() { + count += 1; + } + count + }) + }); + } } group.finish(); } -/// Append-ordered put: insert N entries in key order into a fresh environment. +// PARITY: evmdb/put_sorted fn bench_append_ordered_put(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::append_ordered_put"); - for &n in ENTRY_COUNTS { - // Keys use zero-padded format to ensure lexicographic ordering. - let items: Vec<(String, String)> = - (0..n).map(|i| (format_key(i), format_data(i))).collect(); - - group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { - b.iter_batched( - || { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - (dir, env) - }, - |(_dir, env)| { - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - for (key, data) in &items { - txn.append(db, key.as_bytes(), data.as_bytes()).unwrap(); - } - txn.commit().unwrap(); - }, - BatchSize::PerIteration, - ) - }); + for &size in VALUE_SIZES { + for &n in ENTRY_COUNTS { + let items: Vec<(String, Vec)> = + (0..n).map(|i| (format_key(i), make_value(i, size))).collect(); + + group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { + b.iter_batched( + || { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + (dir, env) + }, + |(_dir, env)| { + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + for (key, data) in &items { + txn.append(db, key.as_bytes(), data.as_slice()).unwrap(); + } + txn.commit().unwrap(); + }, + BatchSize::PerIteration, + ) + }); + } } group.finish(); } diff --git a/benches/utils.rs b/benches/utils.rs index 681c40f..3835990 100644 --- a/benches/utils.rs +++ b/benches/utils.rs @@ -14,7 +14,7 @@ pub const NAMED_DB: &str = "named_benchmark_db"; /// Generate a DB key string for testing. pub fn get_key(n: u32) -> String { - format!("key{n}") + format!("key{n:028}") } // Generate a DB data string for testing. From 5dfe4901a66a6a888eeb17571191017dc8cb5acf Mon Sep 17 00:00:00 2001 From: James Date: Sat, 28 Mar 2026 10:48:42 -0400 Subject: [PATCH 09/24] docs: document input validation model, update file layout Fix key_validation fuzz target to only feed valid-length keys to INTEGER_KEY databases (MDBX aborts on invalid sizes). Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 46 +++++++++++++++++++++++++---- fuzz/fuzz_targets/key_validation.rs | 13 ++++---- 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 5885505..934e66e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -46,11 +46,31 @@ DUP_SORT/DUP_FIXED methods validate flags at runtime: - `require_dup_sort()` returns `MdbxError::RequiresDupSort` - `require_dup_fixed()` returns `MdbxError::RequiresDupFixed` -- `debug_assert_integer_key()` validates key length (4 or 8 bytes) in debug builds Methods requiring DUP_SORT: `first_dup`, `last_dup`, `next_dup`, `prev_dup`, `get_both`, `get_both_range` Methods requiring DUP_FIXED: `get_multiple`, `next_multiple`, `prev_multiple` +### Input Validation Model + +MDBX's C layer aborts the process (via `cASSERT`) on certain constraint +violations — notably INTEGER_KEY size mismatches and oversized keys/values. +These aborts cannot be caught. + +Our validation model (in `src/tx/assertions.rs`): + +- **Debug builds:** `debug_assert` checks catch constraint violations in + Rust before they reach FFI. This includes key/value size limits, INTEGER_KEY + length (must be 4 or 8 bytes), INTEGER_DUP length, and append ordering. +- **Release builds:** No checks are performed. Invalid input passes through + to MDBX, which may abort the process. +- **Benchmarks and fuzz targets:** MUST constrain inputs to valid ranges. + Do not feed arbitrary-length keys to INTEGER_KEY databases or oversized + keys/values to any database. The fuzz/bench harness is responsible for + generating valid input, not the library. + +This is intentional. The library trusts callers in release mode for +performance. The debug assertions exist to catch bugs during development. + ### Error Types - `MdbxError` - FFI/database errors (in `src/error.rs`) @@ -68,6 +88,7 @@ src/ codec.rs - TableObject trait tx/ mod.rs + assertions.rs - Debug assertions for key/value constraints cursor.rs - Cursor impl database.rs - Database struct sync.rs - Transaction impl @@ -76,14 +97,29 @@ src/ sys/ environment.rs - Environment impl tests/ - cursor.rs - Cursor tests - transaction.rs - Transaction tests - environment.rs - Environment tests + cursor.rs - Cursor tests + transaction.rs - Transaction tests + environment.rs - Environment tests + proptest_kv.rs - Property tests: key/value operations + proptest_cursor.rs - Property tests: cursor operations + proptest_dupsort.rs - Property tests: DUPSORT operations + proptest_dupfixed.rs - Property tests: DUPFIXED operations + proptest_iter.rs - Property tests: iterator operations + proptest_nested.rs - Property tests: nested transactions benches/ - cursor.rs - Cursor benchmarks + cursor.rs - Cursor read benchmarks + cursor_write.rs - Cursor write benchmarks (PARITY: evmdb) transaction.rs - Transaction benchmarks db_open.rs - Database open benchmarks + reserve.rs - Reserve vs put benchmarks + nested_txn.rs - Nested transaction benchmarks + concurrent.rs - Concurrency benchmarks (PARITY: evmdb) + scaling.rs - Scaling benchmarks (PARITY: evmdb) + deletion.rs - Deletion benchmarks + iter.rs - Iterator benchmarks utils.rs - Benchmark utilities +fuzz/ + fuzz_targets/ - cargo-fuzz targets (FFI/unsafe boundary hardening) ``` ## Testing diff --git a/fuzz/fuzz_targets/key_validation.rs b/fuzz/fuzz_targets/key_validation.rs index 1fbd82d..d14654a 100644 --- a/fuzz/fuzz_targets/key_validation.rs +++ b/fuzz/fuzz_targets/key_validation.rs @@ -25,11 +25,14 @@ fuzz_target!(|data: &[u8]| { // return a typed error — never panic. let _ = txn.put(default_db, data, b"value", WriteFlags::empty()); - // Attempt put on the INTEGER_KEY database; fuzz bytes are likely not a - // valid 4- or 8-byte key, so we expect an error in most cases. - let _ = txn.put(int_db, data, b"value", WriteFlags::empty()); + // INTEGER_KEY requires exactly 4 or 8 byte keys. MDBX aborts (not + // errors) on invalid sizes, so only feed valid-length keys to this db. + // We still fuzz the *content* of those keys. + if data.len() == 4 || data.len() == 8 { + let _ = txn.put(int_db, data, b"value", WriteFlags::empty()); + let _: signet_libmdbx::ReadResult>> = txn.get(int_db.dbi(), data); + } - // Attempt get with fuzz bytes as key on both databases. + // Attempt get with fuzz bytes as key on the default database. let _: signet_libmdbx::ReadResult>> = txn.get(default_db.dbi(), data); - let _: signet_libmdbx::ReadResult>> = txn.get(int_db.dbi(), data); }); From a48a4f2a135bb88e18b37dbf699da17a7e8d6e96 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 28 Mar 2026 10:50:10 -0400 Subject: [PATCH 10/24] docs: document input validation model in README and lib.rs MDBX aborts the process on certain constraint violations (e.g. invalid INTEGER_KEY sizes). Document our debug-only validation model prominently in both the README and crate-level rustdoc. Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 18 ++++++++++++++++++ src/lib.rs | 37 ++++++++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 90ee126..4371ecd 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,24 @@ NOTE: Most of the repo came from [lmdb-rs bindings]. - `sys` - Environment and transaction management. - `tx` - module contains transactions, cursors, and iterators +## Input Validation + +MDBX's C layer **aborts the process** on certain constraint violations, +such as passing an invalid key size to an `INTEGER_KEY` database or +exceeding the maximum key/value size. These aborts cannot be caught. + +This crate uses a **debug-only validation model**: + +- **Debug builds** (`cfg(debug_assertions)`): Rust-side assertions check + key/value constraints before they reach FFI. Violations panic with a + descriptive message, catching bugs during development. +- **Release builds**: No validation is performed. Invalid input passes + directly to MDBX for maximum performance. + +**Callers are responsible for ensuring inputs are valid in release +builds.** The debug assertions exist to catch bugs during development, +not to provide runtime safety guarantees. + ## Updating the libmdbx Version To update the libmdbx version you must clone it and copy the `dist/` folder in diff --git a/src/lib.rs b/src/lib.rs index 6500322..be76c26 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -128,21 +128,40 @@ //! //! See the [`TableObject`] docs for more examples. //! -//! # Debug assertions +//! # Input Validation //! -//! When compiled with debug assertions enabled (the default for -//! `cargo build`), this crate performs additional runtime checks to -//! catch common mistakes. +//! MDBX's C layer **aborts the process** on certain constraint violations, +//! such as passing a key that is not exactly 4 or 8 bytes to an +//! [`DatabaseFlags::INTEGER_KEY`] database, or exceeding the maximum +//! key/value size for the configured page size. These aborts cannot be +//! caught or recovered from. +//! +//! This crate uses a **debug-only validation model**: +//! +//! - **Debug builds** (`cfg(debug_assertions)`): Rust-side assertions +//! check key/value constraints before they reach FFI. Violations panic +//! with a descriptive message. This catches bugs during development. +//! - **Release builds**: No validation is performed. Invalid input passes +//! directly to MDBX for maximum performance. If the input violates +//! MDBX constraints, the process may abort. +//! +//! The following checks are performed in debug builds: //! //! 1. Key sizes are checked against the database's configured -//! `pagesize` and `DatabaseFlags` (e.g. `INTEGERKEY`). +//! `pagesize` and `DatabaseFlags` (e.g. `INTEGER_KEY`). //! 2. Value sizes are checked against the database's configured -//! `pagesize` and `DatabaseFlags` (e.g. `INTEGERDUP`). -//! 3. For `append` operations, it checks that the key being appended is +//! `pagesize` and `DatabaseFlags` (e.g. `INTEGER_DUP`). +//! 3. `INTEGER_KEY` databases require keys of exactly 4 or 8 bytes. +//! 4. `INTEGER_DUP` databases require values of exactly 4 or 8 bytes. +//! 5. For `append` operations, it checks that the key being appended is //! greater than the current last key using lexicographic comparison. //! This check is skipped for `REVERSE_KEY` and `REVERSE_DUP` databases -//! since they use different comparison semantics (comparing bytes from -//! end to beginning). +//! since they use different comparison semantics. +//! +//! **Callers are responsible for ensuring inputs are valid in release +//! builds.** This is a deliberate design choice: the library trusts its +//! callers in release mode for performance, and the debug assertions +//! exist to catch bugs during development. //! //! # Provenance //! From 1f4ef67819bb7be2dce4c6edb88a2a3711812179 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 28 Mar 2026 10:57:25 -0400 Subject: [PATCH 11/24] bench: align cursor traversal benchmarks with evmdb - Standardize get_data() to 128-byte values (was variable ~7-14 bytes) - Change cursor bench entry count from 100 to 1000 - Add PARITY comments for cursor_seek_first_iterate - Fix type signatures for String->Vec value change Co-Authored-By: Claude Opus 4.6 (1M context) --- benches/cursor.rs | 18 +++++++++--------- benches/cursor_write.rs | 16 ++++++++-------- benches/transaction.rs | 8 ++++---- benches/utils.rs | 7 ++++--- 4 files changed, 25 insertions(+), 24 deletions(-) diff --git a/benches/cursor.rs b/benches/cursor.rs index fc8ac96..3aaf3fb 100644 --- a/benches/cursor.rs +++ b/benches/cursor.rs @@ -8,7 +8,7 @@ use utils::*; /// Benchmark of iterator sequential read performance. fn bench_get_seq_iter(c: &mut Criterion) { - let n = 100; + let n = 1000; let (_dir, env) = setup_bench_db(n); let txn = create_ro_sync(&env); let db = txn.open_db(None).unwrap(); @@ -51,9 +51,9 @@ fn bench_get_seq_iter(c: &mut Criterion) { }); } -/// Benchmark of cursor sequential read performance. +// PARITY: evmdb/cursor_seek_first_iterate fn bench_get_seq_cursor(c: &mut Criterion) { - let n = 100; + let n = 1000; let (_dir, env) = setup_bench_db(n); let txn = create_ro_sync(&env); let db = txn.open_db(None).unwrap(); @@ -76,7 +76,7 @@ fn bench_get_seq_cursor(c: &mut Criterion) { } fn bench_get_seq_for_loop(c: &mut Criterion) { - let n = 100; + let n = 1000; let (_dir, env) = setup_bench_db(n); let txn = create_ro_sync(&env); let db = txn.open_db(None).unwrap(); @@ -103,7 +103,7 @@ fn bench_get_seq_for_loop(c: &mut Criterion) { /// Benchmark of iterator sequential read performance (single-thread). fn bench_get_seq_iter_single_thread(c: &mut Criterion) { - let n = 100; + let n = 1000; let (_dir, env) = setup_bench_db(n); let txn = create_ro_unsync(&env); let db = txn.open_db(None).unwrap(); @@ -146,9 +146,9 @@ fn bench_get_seq_iter_single_thread(c: &mut Criterion) { }); } -/// Benchmark of cursor sequential read performance (single-thread). +// PARITY: evmdb/cursor_seek_first_iterate fn bench_get_seq_cursor_single_thread(c: &mut Criterion) { - let n = 100; + let n = 1000; let (_dir, env) = setup_bench_db(n); let txn = create_ro_unsync(&env); let db = txn.open_db(None).unwrap(); @@ -171,7 +171,7 @@ fn bench_get_seq_cursor_single_thread(c: &mut Criterion) { } fn bench_get_seq_for_loop_single_thread(c: &mut Criterion) { - let n = 100; + let n = 1000; let (_dir, env) = setup_bench_db(n); let txn = create_ro_unsync(&env); let db = txn.open_db(None).unwrap(); @@ -198,7 +198,7 @@ fn bench_get_seq_for_loop_single_thread(c: &mut Criterion) { /// Benchmark of raw MDBX sequential read performance (control). fn bench_get_seq_raw(c: &mut Criterion) { - let n = 100; + let n = 1000; let (_dir, env) = setup_bench_db(n); let mut key = MDBX_val { iov_len: 0, iov_base: ptr::null_mut() }; diff --git a/benches/cursor_write.rs b/benches/cursor_write.rs index 34c44e0..e3e55a7 100644 --- a/benches/cursor_write.rs +++ b/benches/cursor_write.rs @@ -27,7 +27,7 @@ fn setup_plain_env(n: u32) -> (tempfile::TempDir, signet_libmdbx::Environment) { // PARITY: evmdb/write_put_100 fn bench_cursor_put_sync(c: &mut Criterion) { - let items: Vec<(String, String)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); + let items: Vec<(String, Vec)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); let (_dir, env) = setup_plain_env(0); c.bench_function("cursor_write::put::sync", |b| { @@ -40,7 +40,7 @@ fn bench_cursor_put_sync(c: &mut Criterion) { |(txn, db)| { let mut cursor = txn.cursor(db).unwrap(); for (key, data) in &items { - cursor.put(key.as_bytes(), data.as_bytes(), WriteFlags::empty()).unwrap(); + cursor.put(key.as_bytes(), data.as_slice(), WriteFlags::empty()).unwrap(); } }, BatchSize::PerIteration, @@ -50,7 +50,7 @@ fn bench_cursor_put_sync(c: &mut Criterion) { // PARITY: evmdb/write_put_100 fn bench_cursor_put_unsync(c: &mut Criterion) { - let items: Vec<(String, String)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); + let items: Vec<(String, Vec)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); let (_dir, env) = setup_plain_env(0); c.bench_function("cursor_write::put::single_thread", |b| { @@ -63,7 +63,7 @@ fn bench_cursor_put_unsync(c: &mut Criterion) { |(txn, db)| { let mut cursor = txn.cursor(db).unwrap(); for (key, data) in &items { - cursor.put(key.as_bytes(), data.as_bytes(), WriteFlags::empty()).unwrap(); + cursor.put(key.as_bytes(), data.as_slice(), WriteFlags::empty()).unwrap(); } }, BatchSize::PerIteration, @@ -114,7 +114,7 @@ fn bench_cursor_del_unsync(c: &mut Criterion) { // PARITY: evmdb/write_put_100_sorted fn bench_cursor_append_sync(c: &mut Criterion) { // Keys must be lexicographically sorted for append; zero-pad to ensure order. - let items: Vec<(String, String)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); + let items: Vec<(String, Vec)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); let (_dir, env) = setup_plain_env(0); c.bench_function("cursor_write::append::sync", |b| { @@ -127,7 +127,7 @@ fn bench_cursor_append_sync(c: &mut Criterion) { |(txn, db)| { let mut cursor = txn.cursor(db).unwrap(); for (key, data) in &items { - cursor.append(key.as_bytes(), data.as_bytes()).unwrap(); + cursor.append(key.as_bytes(), data.as_slice()).unwrap(); } }, BatchSize::PerIteration, @@ -137,7 +137,7 @@ fn bench_cursor_append_sync(c: &mut Criterion) { // PARITY: evmdb/write_put_100_sorted fn bench_cursor_append_unsync(c: &mut Criterion) { - let items: Vec<(String, String)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); + let items: Vec<(String, Vec)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); let (_dir, env) = setup_plain_env(0); c.bench_function("cursor_write::append::single_thread", |b| { @@ -150,7 +150,7 @@ fn bench_cursor_append_unsync(c: &mut Criterion) { |(txn, db)| { let mut cursor = txn.cursor(db).unwrap(); for (key, data) in &items { - cursor.append(key.as_bytes(), data.as_bytes()).unwrap(); + cursor.append(key.as_bytes(), data.as_slice()).unwrap(); } }, BatchSize::PerIteration, diff --git a/benches/transaction.rs b/benches/transaction.rs index 6e3f868..0de5e69 100644 --- a/benches/transaction.rs +++ b/benches/transaction.rs @@ -93,7 +93,7 @@ fn bench_put_rand_raw(c: &mut Criterion) { let n = 100u32; let (_dir, env) = setup_bench_db(0); - let mut items: Vec<(String, String)> = (0..n).map(|n| (get_key(n), get_data(n))).collect(); + let mut items: Vec<(String, Vec)> = (0..n).map(|n| (get_key(n), get_data(n))).collect(); items.shuffle(&mut StdRng::from_seed(Default::default())); let dbi = create_ro_sync(&env).open_db(None).unwrap().dbi(); @@ -116,7 +116,7 @@ fn bench_put_rand_raw(c: &mut Criterion) { key_val.iov_len = key.len(); key_val.iov_base = key.as_bytes().as_ptr().cast_mut().cast(); data_val.iov_len = data.len(); - data_val.iov_base = data.as_bytes().as_ptr().cast_mut().cast(); + data_val.iov_base = data.as_ptr().cast_mut().cast(); i += mdbx_put(txn, dbi, &raw const key_val, &raw mut data_val, 0); } @@ -132,7 +132,7 @@ fn bench_put_rand_sync(c: &mut Criterion) { let n = 100u32; let (_dir, env) = setup_bench_db(0); - let mut items: Vec<(String, String)> = (0..n).map(|n| (get_key(n), get_data(n))).collect(); + let mut items: Vec<(String, Vec)> = (0..n).map(|n| (get_key(n), get_data(n))).collect(); items.shuffle(&mut StdRng::from_seed(Default::default())); c.bench_function("transaction::put::rand", |b| { @@ -156,7 +156,7 @@ fn bench_put_rand_unsync(c: &mut Criterion) { let n = 100u32; let (_dir, env) = setup_bench_db(0); - let mut items: Vec<(String, String)> = (0..n).map(|n| (get_key(n), get_data(n))).collect(); + let mut items: Vec<(String, Vec)> = (0..n).map(|n| (get_key(n), get_data(n))).collect(); items.shuffle(&mut StdRng::from_seed(Default::default())); c.bench_function("transaction::put::rand::single_thread", |b| { diff --git a/benches/utils.rs b/benches/utils.rs index 3835990..2050c87 100644 --- a/benches/utils.rs +++ b/benches/utils.rs @@ -17,9 +17,10 @@ pub fn get_key(n: u32) -> String { format!("key{n:028}") } -// Generate a DB data string for testing. -pub fn get_data(n: u32) -> String { - format!("data{n}") +/// Generate a 128-byte value for benchmarking. +pub fn get_data(n: u32) -> Vec { + let seed = format!("data{n:010}"); + seed.as_bytes().iter().copied().cycle().take(128).collect() } // Raw transaction utilities From 88204dcee4a52191f68430c6f06b5a3c7fa5dc4d Mon Sep 17 00:00:00 2001 From: James Date: Sat, 28 Mar 2026 13:54:38 -0400 Subject: [PATCH 12/24] bench: add commit cost isolation benchmark Measures commit() time separately from write time, parameterized over entry count (10-10K) and value size (32/128/512 bytes). Co-Authored-By: Claude Opus 4.6 (1M context) --- benches/transaction.rs | 52 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/benches/transaction.rs b/benches/transaction.rs index 0de5e69..25ee92a 100644 --- a/benches/transaction.rs +++ b/benches/transaction.rs @@ -205,11 +205,61 @@ fn bench_tx_create_unsync(c: &mut Criterion) { }); } +// COMMIT + +const COMMIT_ENTRY_COUNTS: &[u32] = &[10, 100, 1_000, 10_000]; +const COMMIT_VALUE_SIZES: &[usize] = &[32, 128, 512]; + +fn make_commit_value(i: u32, size: usize) -> Vec { + let seed = format!("data{i:010}"); + seed.as_bytes().iter().copied().cycle().take(size).collect() +} + +/// Measures commit cost in isolation. The setup phase writes N entries of +/// a given value size (excluded from timing), then the timed phase calls +/// only `commit()`. +fn bench_commit_cost(c: &mut Criterion) { + let mut group = c.benchmark_group("transaction::commit"); + + for &size in COMMIT_VALUE_SIZES { + for &n in COMMIT_ENTRY_COUNTS { + let keys: Vec = (0..n).map(get_key).collect(); + let values: Vec> = (0..n).map(|i| make_commit_value(i, size)).collect(); + + group.bench_with_input( + criterion::BenchmarkId::new(format!("{size}B"), n), + &n, + |b, _| { + b.iter_batched( + || { + let dir = tempfile::tempdir().unwrap(); + let env = + signet_libmdbx::Environment::builder().open(dir.path()).unwrap(); + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + for (key, value) in keys.iter().zip(values.iter()) { + txn.put(db, key, value, WriteFlags::empty()).unwrap(); + } + (dir, env, txn) + }, + |(_dir, _env, txn)| { + txn.commit().unwrap(); + }, + criterion::BatchSize::PerIteration, + ) + }, + ); + } + } + group.finish(); +} + criterion_group! { name = benches; config = Criterion::default(); targets = bench_get_rand_sync, bench_get_rand_raw, bench_get_rand_unsync, bench_put_rand_sync, bench_put_rand_raw, bench_put_rand_unsync, - bench_tx_create_raw, bench_tx_create_sync, bench_tx_create_unsync + bench_tx_create_raw, bench_tx_create_sync, bench_tx_create_unsync, + bench_commit_cost } criterion_main!(benches); From 965aaf6dadeb22b353b387c68d4075d2bb600903 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 28 Mar 2026 14:00:01 -0400 Subject: [PATCH 13/24] bench: mark parity benchmarks as do-not-edit Co-Authored-By: Claude Opus 4.6 (1M context) --- benches/concurrent.rs | 4 ++-- benches/cursor.rs | 4 ++-- benches/cursor_write.rs | 8 ++++---- benches/scaling.rs | 8 ++++---- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/benches/concurrent.rs b/benches/concurrent.rs index 836bfe4..4b69664 100644 --- a/benches/concurrent.rs +++ b/benches/concurrent.rs @@ -29,7 +29,7 @@ fn setup_arc_env(n: u32) -> (tempfile::TempDir, Arc) { (dir, Arc::new(env)) } -// PARITY: evmdb/readers_no_writer +// PARITY: evmdb/readers_no_writer — DO NOT EDIT without updating evmdb fn bench_n_readers_no_writer(c: &mut Criterion) { let mut group = c.benchmark_group("concurrent::readers_no_writer"); @@ -77,7 +77,7 @@ fn bench_n_readers_no_writer(c: &mut Criterion) { group.finish(); } -// PARITY: evmdb/readers_with_writer +// PARITY: evmdb/readers_with_writer — DO NOT EDIT without updating evmdb fn bench_n_readers_one_writer(c: &mut Criterion) { let mut group = c.benchmark_group("concurrent::readers_one_writer"); diff --git a/benches/cursor.rs b/benches/cursor.rs index 3aaf3fb..6801812 100644 --- a/benches/cursor.rs +++ b/benches/cursor.rs @@ -51,7 +51,7 @@ fn bench_get_seq_iter(c: &mut Criterion) { }); } -// PARITY: evmdb/cursor_seek_first_iterate +// PARITY: evmdb/cursor_seek_first_iterate — DO NOT EDIT without updating evmdb fn bench_get_seq_cursor(c: &mut Criterion) { let n = 1000; let (_dir, env) = setup_bench_db(n); @@ -146,7 +146,7 @@ fn bench_get_seq_iter_single_thread(c: &mut Criterion) { }); } -// PARITY: evmdb/cursor_seek_first_iterate +// PARITY: evmdb/cursor_seek_first_iterate — DO NOT EDIT without updating evmdb fn bench_get_seq_cursor_single_thread(c: &mut Criterion) { let n = 1000; let (_dir, env) = setup_bench_db(n); diff --git a/benches/cursor_write.rs b/benches/cursor_write.rs index e3e55a7..0062474 100644 --- a/benches/cursor_write.rs +++ b/benches/cursor_write.rs @@ -25,7 +25,7 @@ fn setup_plain_env(n: u32) -> (tempfile::TempDir, signet_libmdbx::Environment) { // PUT -// PARITY: evmdb/write_put_100 +// PARITY: evmdb/write_put_100 — DO NOT EDIT without updating evmdb fn bench_cursor_put_sync(c: &mut Criterion) { let items: Vec<(String, Vec)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); let (_dir, env) = setup_plain_env(0); @@ -48,7 +48,7 @@ fn bench_cursor_put_sync(c: &mut Criterion) { }); } -// PARITY: evmdb/write_put_100 +// PARITY: evmdb/write_put_100 — DO NOT EDIT without updating evmdb fn bench_cursor_put_unsync(c: &mut Criterion) { let items: Vec<(String, Vec)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); let (_dir, env) = setup_plain_env(0); @@ -111,7 +111,7 @@ fn bench_cursor_del_unsync(c: &mut Criterion) { // APPEND -// PARITY: evmdb/write_put_100_sorted +// PARITY: evmdb/write_put_100_sorted — DO NOT EDIT without updating evmdb fn bench_cursor_append_sync(c: &mut Criterion) { // Keys must be lexicographically sorted for append; zero-pad to ensure order. let items: Vec<(String, Vec)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); @@ -135,7 +135,7 @@ fn bench_cursor_append_sync(c: &mut Criterion) { }); } -// PARITY: evmdb/write_put_100_sorted +// PARITY: evmdb/write_put_100_sorted — DO NOT EDIT without updating evmdb fn bench_cursor_append_unsync(c: &mut Criterion) { let items: Vec<(String, Vec)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); let (_dir, env) = setup_plain_env(0); diff --git a/benches/scaling.rs b/benches/scaling.rs index 405aa09..2537525 100644 --- a/benches/scaling.rs +++ b/benches/scaling.rs @@ -32,7 +32,7 @@ fn setup_scaling_env(n: u32, value_size: usize) -> (tempfile::TempDir, Environme (dir, env) } -// PARITY: evmdb/sequential_get +// PARITY: evmdb/sequential_get — DO NOT EDIT without updating evmdb fn bench_sequential_get(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::sequential_get"); @@ -58,7 +58,7 @@ fn bench_sequential_get(c: &mut Criterion) { group.finish(); } -// PARITY: evmdb/random_get +// PARITY: evmdb/random_get — DO NOT EDIT without updating evmdb fn bench_random_get(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::random_get"); @@ -85,7 +85,7 @@ fn bench_random_get(c: &mut Criterion) { group.finish(); } -// PARITY: evmdb/full_iteration +// PARITY: evmdb/full_iteration — DO NOT EDIT without updating evmdb fn bench_full_iteration(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::full_iteration"); @@ -110,7 +110,7 @@ fn bench_full_iteration(c: &mut Criterion) { group.finish(); } -// PARITY: evmdb/put_sorted +// PARITY: evmdb/put_sorted — DO NOT EDIT without updating evmdb fn bench_append_ordered_put(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::append_ordered_put"); From 97dd1dd80915cf0326020a8dc06f9cdb64d1c502 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 30 Mar 2026 10:21:19 -0400 Subject: [PATCH 14/24] bench: add quick mode and skip slow benchmarks by default All benchmarks now use quick criterion config (10 samples, 1s warmup). Scaling benchmarks skip 100K entries and concurrent benchmarks skip 128 readers unless BENCH_FULL=1 is set. Skips print noisy warnings. Co-Authored-By: Claude Opus 4.6 (1M context) --- benches/concurrent.rs | 20 +++++++++++++++----- benches/cursor.rs | 2 +- benches/cursor_write.rs | 2 +- benches/db_open.rs | 2 +- benches/deletion.rs | 6 ++++-- benches/iter.rs | 4 ++-- benches/nested_txn.rs | 4 +++- benches/reserve.rs | 2 +- benches/scaling.rs | 24 ++++++++++++++++++------ benches/transaction.rs | 2 +- benches/utils.rs | 11 +++++++++++ 11 files changed, 58 insertions(+), 21 deletions(-) diff --git a/benches/concurrent.rs b/benches/concurrent.rs index 4b69664..d9d9f58 100644 --- a/benches/concurrent.rs +++ b/benches/concurrent.rs @@ -8,10 +8,20 @@ use std::{ thread, }; use tempfile::tempdir; -use utils::*; +use utils::{get_key, is_bench_full, quick_config}; const N_ROWS: u32 = 1_000; -const READER_COUNTS: &[usize] = &[1, 4, 8, 32, 128]; +const READER_COUNTS_FULL: &[usize] = &[1, 4, 8, 32, 128]; +const READER_COUNTS_QUICK: &[usize] = &[1, 4, 8, 32]; + +fn reader_counts() -> &'static [usize] { + if is_bench_full() { + READER_COUNTS_FULL + } else { + eprintln!("NOTE: skipping 128-reader benchmarks (set BENCH_FULL=1 for full suite)"); + READER_COUNTS_QUICK + } +} fn setup_arc_env(n: u32) -> (tempfile::TempDir, Arc) { let dir = tempdir().unwrap(); @@ -33,7 +43,7 @@ fn setup_arc_env(n: u32) -> (tempfile::TempDir, Arc) { fn bench_n_readers_no_writer(c: &mut Criterion) { let mut group = c.benchmark_group("concurrent::readers_no_writer"); - for &n_readers in READER_COUNTS { + for &n_readers in reader_counts() { let (_dir, env) = setup_arc_env(N_ROWS); let keys: Arc> = Arc::new((0..N_ROWS).map(get_key).collect()); @@ -81,7 +91,7 @@ fn bench_n_readers_no_writer(c: &mut Criterion) { fn bench_n_readers_one_writer(c: &mut Criterion) { let mut group = c.benchmark_group("concurrent::readers_one_writer"); - for &n_readers in READER_COUNTS { + for &n_readers in reader_counts() { let (_dir, env) = setup_arc_env(N_ROWS); let keys: Arc> = Arc::new((0..N_ROWS).map(get_key).collect()); @@ -174,7 +184,7 @@ fn bench_single_thread_sync_vs_unsync(c: &mut Criterion) { criterion_group! { name = benches; - config = Criterion::default(); + config = quick_config(); targets = bench_n_readers_no_writer, bench_n_readers_one_writer, diff --git a/benches/cursor.rs b/benches/cursor.rs index 6801812..4a79f63 100644 --- a/benches/cursor.rs +++ b/benches/cursor.rs @@ -245,7 +245,7 @@ fn bench_get_seq_raw(c: &mut Criterion) { criterion_group! { name = benches; - config = Criterion::default(); + config = quick_config(); targets = bench_get_seq_iter, bench_get_seq_cursor, bench_get_seq_for_loop, bench_get_seq_raw, bench_get_seq_iter_single_thread, bench_get_seq_cursor_single_thread, bench_get_seq_for_loop_single_thread } diff --git a/benches/cursor_write.rs b/benches/cursor_write.rs index 0062474..fbd5471 100644 --- a/benches/cursor_write.rs +++ b/benches/cursor_write.rs @@ -216,7 +216,7 @@ fn bench_cursor_append_dup_unsync(c: &mut Criterion) { criterion_group! { name = benches; - config = Criterion::default(); + config = quick_config(); targets = bench_cursor_put_sync, bench_cursor_put_unsync, bench_cursor_del_sync, bench_cursor_del_unsync, diff --git a/benches/db_open.rs b/benches/db_open.rs index 48217e4..5c5e97d 100644 --- a/benches/db_open.rs +++ b/benches/db_open.rs @@ -134,7 +134,7 @@ fn bench_open_db_no_cache_named(c: &mut Criterion) { criterion_group! { name = db_open; - config = Criterion::default(); + config = quick_config(); targets = bench_dbi_flags_ex_only, bench_dbi_open_only, diff --git a/benches/deletion.rs b/benches/deletion.rs index bb444a6..f6246b3 100644 --- a/benches/deletion.rs +++ b/benches/deletion.rs @@ -1,8 +1,10 @@ -#![allow(missing_docs)] +#![allow(missing_docs, dead_code)] +mod utils; use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; use signet_libmdbx::{DatabaseFlags, Environment, WriteFlags}; use tempfile::{TempDir, tempdir}; +use utils::quick_config; const VALUE_SIZE: usize = 100; const DB_NAME: &str = "deletion_bench"; @@ -73,7 +75,7 @@ fn bench_del_loop(c: &mut Criterion) { criterion_group! { name = benches; - config = Criterion::default(); + config = quick_config(); targets = bench_del_all_dups, bench_del_loop, } diff --git a/benches/iter.rs b/benches/iter.rs index a6ef52e..ce5d634 100644 --- a/benches/iter.rs +++ b/benches/iter.rs @@ -1,7 +1,7 @@ #![allow(missing_docs)] mod utils; -use crate::utils::{create_ro_sync, create_ro_unsync}; +use crate::utils::{create_ro_sync, create_ro_unsync, quick_config}; use criterion::{Criterion, criterion_group, criterion_main}; use signet_libmdbx::{DatabaseFlags, DupItem, Environment, WriteFlags}; use std::hint::black_box; @@ -120,7 +120,7 @@ fn bench_iter_simple_sync(c: &mut Criterion) { criterion_group! { name = benches; - config = Criterion::default(); + config = quick_config(); targets = bench_iter_dupfixed, bench_iter_simple, bench_iter_dupfixed_sync, bench_iter_simple_sync, } diff --git a/benches/nested_txn.rs b/benches/nested_txn.rs index 3c804ee..203e8e2 100644 --- a/benches/nested_txn.rs +++ b/benches/nested_txn.rs @@ -1,8 +1,10 @@ #![allow(missing_docs, dead_code)] +mod utils; use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main}; use signet_libmdbx::{Environment, WriteFlags}; use tempfile::tempdir; +use utils::quick_config; fn setup_env() -> (tempfile::TempDir, Environment) { let dir = tempdir().unwrap(); @@ -89,7 +91,7 @@ fn bench_nested_write_and_read(c: &mut Criterion) { criterion_group! { name = benches; - config = Criterion::default(); + config = quick_config(); targets = bench_flat_baseline, bench_nested_commit, bench_nested_write_and_read, } diff --git a/benches/reserve.rs b/benches/reserve.rs index 0c44685..d2dd661 100644 --- a/benches/reserve.rs +++ b/benches/reserve.rs @@ -59,7 +59,7 @@ fn bench_with_reservation(c: &mut Criterion) { criterion_group! { name = benches; - config = Criterion::default(); + config = quick_config(); targets = bench_put, bench_with_reservation, } diff --git a/benches/scaling.rs b/benches/scaling.rs index 2537525..969381d 100644 --- a/benches/scaling.rs +++ b/benches/scaling.rs @@ -1,13 +1,25 @@ #![allow(missing_docs, dead_code)] +mod utils; use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main}; use rand::{SeedableRng, prelude::SliceRandom, rngs::StdRng}; use signet_libmdbx::{Environment, ObjectLength, WriteFlags}; use tempfile::tempdir; +use utils::{is_bench_full, quick_config}; -const ENTRY_COUNTS: &[u32] = &[100, 1_000, 10_000, 100_000]; +const ENTRY_COUNTS_FULL: &[u32] = &[100, 1_000, 10_000, 100_000]; +const ENTRY_COUNTS_QUICK: &[u32] = &[100, 1_000, 10_000]; const VALUE_SIZES: &[usize] = &[32, 128, 512]; +fn entry_counts() -> &'static [u32] { + if is_bench_full() { + ENTRY_COUNTS_FULL + } else { + eprintln!("NOTE: skipping 100K entry benchmarks (set BENCH_FULL=1 for full suite)"); + ENTRY_COUNTS_QUICK + } +} + fn format_key(i: u32) -> String { format!("key{i:028}") } @@ -37,7 +49,7 @@ fn bench_sequential_get(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::sequential_get"); for &size in VALUE_SIZES { - for &n in ENTRY_COUNTS { + for &n in entry_counts() { let (_dir, env) = setup_scaling_env(n, size); let keys: Vec = (0..n).map(format_key).collect(); @@ -63,7 +75,7 @@ fn bench_random_get(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::random_get"); for &size in VALUE_SIZES { - for &n in ENTRY_COUNTS { + for &n in entry_counts() { let (_dir, env) = setup_scaling_env(n, size); let mut keys: Vec = (0..n).map(format_key).collect(); keys.shuffle(&mut StdRng::from_seed(Default::default())); @@ -90,7 +102,7 @@ fn bench_full_iteration(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::full_iteration"); for &size in VALUE_SIZES { - for &n in ENTRY_COUNTS { + for &n in entry_counts() { let (_dir, env) = setup_scaling_env(n, size); group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { @@ -115,7 +127,7 @@ fn bench_append_ordered_put(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::append_ordered_put"); for &size in VALUE_SIZES { - for &n in ENTRY_COUNTS { + for &n in entry_counts() { let items: Vec<(String, Vec)> = (0..n).map(|i| (format_key(i), make_value(i, size))).collect(); @@ -144,7 +156,7 @@ fn bench_append_ordered_put(c: &mut Criterion) { criterion_group! { name = benches; - config = Criterion::default(); + config = quick_config(); targets = bench_sequential_get, bench_random_get, diff --git a/benches/transaction.rs b/benches/transaction.rs index 25ee92a..2d392f2 100644 --- a/benches/transaction.rs +++ b/benches/transaction.rs @@ -256,7 +256,7 @@ fn bench_commit_cost(c: &mut Criterion) { criterion_group! { name = benches; - config = Criterion::default(); + config = quick_config(); targets = bench_get_rand_sync, bench_get_rand_raw, bench_get_rand_unsync, bench_put_rand_sync, bench_put_rand_raw, bench_put_rand_unsync, bench_tx_create_raw, bench_tx_create_sync, bench_tx_create_unsync, diff --git a/benches/utils.rs b/benches/utils.rs index 2050c87..b93d330 100644 --- a/benches/utils.rs +++ b/benches/utils.rs @@ -1,6 +1,7 @@ //! Utility functions for benchmarks. #![allow(dead_code, unreachable_pub)] +use criterion::Criterion; use signet_libmdbx::{ Environment, WriteFlags, ffi::{MDBX_TXN_RDONLY, MDBX_env, MDBX_txn, mdbx_txn_begin_ex}, @@ -9,6 +10,16 @@ use signet_libmdbx::{ use std::ptr; use tempfile::{TempDir, tempdir}; +/// Returns true if `BENCH_FULL=1` is set in the environment. +pub fn is_bench_full() -> bool { + std::env::var("BENCH_FULL").is_ok_and(|v| v == "1") +} + +/// Quick criterion config: 10 samples, 1s warmup. +pub fn quick_config() -> Criterion { + Criterion::default().sample_size(10).warm_up_time(std::time::Duration::from_secs(1)) +} + /// Name of the named benchmark database. pub const NAMED_DB: &str = "named_benchmark_db"; From 6507b227803138e1ab4f915b11aed008176e0d7c Mon Sep 17 00:00:00 2001 From: James Date: Mon, 30 Mar 2026 10:31:24 -0400 Subject: [PATCH 15/24] bench: deduplicate skip warnings with Once Co-Authored-By: Claude Opus 4.6 (1M context) --- benches/concurrent.rs | 6 +++++- benches/scaling.rs | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/benches/concurrent.rs b/benches/concurrent.rs index d9d9f58..1adfd8e 100644 --- a/benches/concurrent.rs +++ b/benches/concurrent.rs @@ -15,10 +15,14 @@ const READER_COUNTS_FULL: &[usize] = &[1, 4, 8, 32, 128]; const READER_COUNTS_QUICK: &[usize] = &[1, 4, 8, 32]; fn reader_counts() -> &'static [usize] { + use std::sync::Once; + static WARN: Once = Once::new(); if is_bench_full() { READER_COUNTS_FULL } else { - eprintln!("NOTE: skipping 128-reader benchmarks (set BENCH_FULL=1 for full suite)"); + WARN.call_once(|| { + eprintln!("NOTE: skipping 128-reader benchmarks (set BENCH_FULL=1 for full suite)"); + }); READER_COUNTS_QUICK } } diff --git a/benches/scaling.rs b/benches/scaling.rs index 969381d..5d5133c 100644 --- a/benches/scaling.rs +++ b/benches/scaling.rs @@ -12,10 +12,14 @@ const ENTRY_COUNTS_QUICK: &[u32] = &[100, 1_000, 10_000]; const VALUE_SIZES: &[usize] = &[32, 128, 512]; fn entry_counts() -> &'static [u32] { + use std::sync::Once; + static WARN: Once = Once::new(); if is_bench_full() { ENTRY_COUNTS_FULL } else { - eprintln!("NOTE: skipping 100K entry benchmarks (set BENCH_FULL=1 for full suite)"); + WARN.call_once(|| { + eprintln!("NOTE: skipping 100K entry benchmarks (set BENCH_FULL=1 for full suite)"); + }); ENTRY_COUNTS_QUICK } } From 2ac4f4216aa6249913ec14670b18d4ef1768fcdb Mon Sep 17 00:00:00 2001 From: James Date: Tue, 31 Mar 2026 10:50:56 -0400 Subject: [PATCH 16/24] bench: cold-read parity benchmarks and encoding alignment - Add cold_random_get and cold_sequential_scan parity benchmarks (posix_fadvise FADV_DONTNEED for cache clearing, no-op on macOS) - Align all parity bench key/value encoding with evmdb: 32-byte binary keys (parity_key) and 128-byte binary values (parity_value) - Add parity benchmark instructions and table to CLAUDE.md - 10k rows for cold benchmarks Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 34 +++++++ Cargo.toml | 1 + benches/concurrent.rs | 27 +++-- benches/cursor.rs | 14 +-- benches/cursor_write.rs | 31 +++--- benches/scaling.rs | 217 +++++++++++++++++++++++++--------------- benches/utils.rs | 31 ++++++ 7 files changed, 242 insertions(+), 113 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 934e66e..4e97ee5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -145,3 +145,37 @@ This SHOULD be run alongside local tests and linting, especially for changes tha - Modify build configuration - Add new dependencies - Change platform-specific code + +## Parity Benchmarks + +Parity-tagged benchmarks (`// PARITY:`) have evmdb equivalents with +identical parameters for cross-project comparison. Both repos use +identical 32-byte binary keys and 128-byte binary values via +`parity_key`/`parity_value` in `benches/utils.rs`. + +```bash +# Run all parity benchmarks (native macOS or Linux) +cargo bench --bench cursor_write -- "cursor_write::put::sync|cursor_write::append::sync" +cargo bench --bench cursor -- "cursor::traverse::iter$" +cargo bench --bench concurrent -- "readers_no_writer|readers_one_writer" +cargo bench --bench scaling -- "sequential_get|random_get|full_iteration|append_ordered_put" + +# Cold-read parity benchmarks (Linux only — posix_fadvise is a no-op on macOS) +cargo bench --bench scaling -- "cold_random_get|cold_sequential_scan" +``` + +### Parity bench list + +| Bench | File | evmdb counterpart | +|-------|------|-------------------| +| `cursor_write::put::sync` | cursor_write.rs | write_put_100 | +| `cursor_write::append::sync` | cursor_write.rs | write_put_100_sorted | +| `cursor::traverse::iter` | cursor.rs | cursor_seek_first_iterate | +| `readers_no_writer` | concurrent.rs | readers_no_writer | +| `readers_one_writer` | concurrent.rs | readers_with_writer | +| `sequential_get` | scaling.rs | sequential_get | +| `random_get` | scaling.rs | random_get | +| `full_iteration` | scaling.rs | full_iteration | +| `append_ordered_put` | scaling.rs | put_sorted | +| `cold_random_get` | scaling.rs | cold_random_get | +| `cold_sequential_scan` | scaling.rs | cold_sequential_scan | diff --git a/Cargo.toml b/Cargo.toml index 92682f6..2a52a7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ tracing = "0.1.44" [dev-dependencies] criterion = "0.8.1" +libc = "0.2" proptest = "1" rand = "0.9.2" tempfile = "3.20.0" diff --git a/benches/concurrent.rs b/benches/concurrent.rs index 1adfd8e..8ee6269 100644 --- a/benches/concurrent.rs +++ b/benches/concurrent.rs @@ -8,7 +8,7 @@ use std::{ thread, }; use tempfile::tempdir; -use utils::{get_key, is_bench_full, quick_config}; +use utils::{get_key, is_bench_full, parity_key, parity_value, quick_config, setup_parity_env}; const N_ROWS: u32 = 1_000; const READER_COUNTS_FULL: &[usize] = &[1, 4, 8, 32, 128]; @@ -48,8 +48,9 @@ fn bench_n_readers_no_writer(c: &mut Criterion) { let mut group = c.benchmark_group("concurrent::readers_no_writer"); for &n_readers in reader_counts() { - let (_dir, env) = setup_arc_env(N_ROWS); - let keys: Arc> = Arc::new((0..N_ROWS).map(get_key).collect()); + let (_dir, env) = setup_parity_env(N_ROWS); + let env = Arc::new(env); + let keys: Arc> = Arc::new((0..N_ROWS).map(parity_key).collect()); group.bench_with_input( BenchmarkId::from_parameter(n_readers), @@ -70,7 +71,7 @@ fn bench_n_readers_no_writer(c: &mut Criterion) { let mut total = 0usize; for key in keys.iter() { total += *txn - .get::(db.dbi(), key.as_bytes()) + .get::(db.dbi(), key.as_slice()) .unwrap() .unwrap(); } @@ -96,8 +97,9 @@ fn bench_n_readers_one_writer(c: &mut Criterion) { let mut group = c.benchmark_group("concurrent::readers_one_writer"); for &n_readers in reader_counts() { - let (_dir, env) = setup_arc_env(N_ROWS); - let keys: Arc> = Arc::new((0..N_ROWS).map(get_key).collect()); + let (_dir, env) = setup_parity_env(N_ROWS); + let env = Arc::new(env); + let keys: Arc> = Arc::new((0..N_ROWS).map(parity_key).collect()); group.bench_with_input( BenchmarkId::from_parameter(n_readers), @@ -119,7 +121,7 @@ fn bench_n_readers_one_writer(c: &mut Criterion) { let mut total = 0usize; for key in keys.iter() { total += *txn - .get::(db.dbi(), key.as_bytes()) + .get::(db.dbi(), key.as_slice()) .unwrap() .unwrap(); } @@ -128,7 +130,7 @@ fn bench_n_readers_one_writer(c: &mut Criterion) { }) .collect(); - // Spawn one writer. + // Spawn one writer inserting one extra parity-encoded entry. let writer = { let env = Arc::clone(&env); let barrier = Arc::clone(&barrier); @@ -136,8 +138,13 @@ fn bench_n_readers_one_writer(c: &mut Criterion) { barrier.wait(); let txn = env.begin_rw_sync().unwrap(); let db = txn.open_db(None).unwrap(); - txn.put(db, b"writer_key", b"writer_val", WriteFlags::empty()) - .unwrap(); + txn.put( + db, + parity_key(N_ROWS), + parity_value(N_ROWS), + WriteFlags::empty(), + ) + .unwrap(); txn.commit().unwrap(); }) }; diff --git a/benches/cursor.rs b/benches/cursor.rs index 4a79f63..bf458c8 100644 --- a/benches/cursor.rs +++ b/benches/cursor.rs @@ -54,12 +54,9 @@ fn bench_get_seq_iter(c: &mut Criterion) { // PARITY: evmdb/cursor_seek_first_iterate — DO NOT EDIT without updating evmdb fn bench_get_seq_cursor(c: &mut Criterion) { let n = 1000; - let (_dir, env) = setup_bench_db(n); + let (_dir, env) = setup_parity_env(n); let txn = create_ro_sync(&env); let db = txn.open_db(None).unwrap(); - // Note: setup_bench_db creates a named database which adds metadata to the - // main database, so actual item count is n + 1 - let actual_items = n + 1; c.bench_function("cursor::traverse::iter", |b| { b.iter(|| { let (i, count) = txn @@ -70,7 +67,7 @@ fn bench_get_seq_cursor(c: &mut Criterion) { .fold((0, 0), |(i, count), (key, val)| (i + *key + *val, count + 1)); black_box(i); - assert_eq!(count, actual_items); + assert_eq!(count, n); }) }); } @@ -149,12 +146,9 @@ fn bench_get_seq_iter_single_thread(c: &mut Criterion) { // PARITY: evmdb/cursor_seek_first_iterate — DO NOT EDIT without updating evmdb fn bench_get_seq_cursor_single_thread(c: &mut Criterion) { let n = 1000; - let (_dir, env) = setup_bench_db(n); + let (_dir, env) = setup_parity_env(n); let txn = create_ro_unsync(&env); let db = txn.open_db(None).unwrap(); - // Note: setup_bench_db creates a named database which adds metadata to the - // main database, so actual item count is n + 1 - let actual_items = n + 1; c.bench_function("cursor::traverse::iter::single_thread", |b| { b.iter(|| { let (i, count) = txn @@ -165,7 +159,7 @@ fn bench_get_seq_cursor_single_thread(c: &mut Criterion) { .fold((0, 0), |(i, count), (key, val)| (i + *key + *val, count + 1)); black_box(i); - assert_eq!(count, actual_items); + assert_eq!(count, n); }) }); } diff --git a/benches/cursor_write.rs b/benches/cursor_write.rs index fbd5471..f194ef7 100644 --- a/benches/cursor_write.rs +++ b/benches/cursor_write.rs @@ -27,8 +27,9 @@ fn setup_plain_env(n: u32) -> (tempfile::TempDir, signet_libmdbx::Environment) { // PARITY: evmdb/write_put_100 — DO NOT EDIT without updating evmdb fn bench_cursor_put_sync(c: &mut Criterion) { - let items: Vec<(String, Vec)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); - let (_dir, env) = setup_plain_env(0); + let items: Vec<([u8; 32], [u8; 128])> = + (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); + let (_dir, env) = setup_parity_env(0); c.bench_function("cursor_write::put::sync", |b| { b.iter_batched( @@ -40,7 +41,7 @@ fn bench_cursor_put_sync(c: &mut Criterion) { |(txn, db)| { let mut cursor = txn.cursor(db).unwrap(); for (key, data) in &items { - cursor.put(key.as_bytes(), data.as_slice(), WriteFlags::empty()).unwrap(); + cursor.put(key.as_slice(), data.as_slice(), WriteFlags::empty()).unwrap(); } }, BatchSize::PerIteration, @@ -50,8 +51,9 @@ fn bench_cursor_put_sync(c: &mut Criterion) { // PARITY: evmdb/write_put_100 — DO NOT EDIT without updating evmdb fn bench_cursor_put_unsync(c: &mut Criterion) { - let items: Vec<(String, Vec)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); - let (_dir, env) = setup_plain_env(0); + let items: Vec<([u8; 32], [u8; 128])> = + (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); + let (_dir, env) = setup_parity_env(0); c.bench_function("cursor_write::put::single_thread", |b| { b.iter_batched( @@ -63,7 +65,7 @@ fn bench_cursor_put_unsync(c: &mut Criterion) { |(txn, db)| { let mut cursor = txn.cursor(db).unwrap(); for (key, data) in &items { - cursor.put(key.as_bytes(), data.as_slice(), WriteFlags::empty()).unwrap(); + cursor.put(key.as_slice(), data.as_slice(), WriteFlags::empty()).unwrap(); } }, BatchSize::PerIteration, @@ -113,9 +115,11 @@ fn bench_cursor_del_unsync(c: &mut Criterion) { // PARITY: evmdb/write_put_100_sorted — DO NOT EDIT without updating evmdb fn bench_cursor_append_sync(c: &mut Criterion) { - // Keys must be lexicographically sorted for append; zero-pad to ensure order. - let items: Vec<(String, Vec)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); - let (_dir, env) = setup_plain_env(0); + // Keys are big-endian u32 in first 4 bytes — inserting 0..N in order is + // already lexicographically sorted, satisfying the append precondition. + let items: Vec<([u8; 32], [u8; 128])> = + (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); + let (_dir, env) = setup_parity_env(0); c.bench_function("cursor_write::append::sync", |b| { b.iter_batched( @@ -127,7 +131,7 @@ fn bench_cursor_append_sync(c: &mut Criterion) { |(txn, db)| { let mut cursor = txn.cursor(db).unwrap(); for (key, data) in &items { - cursor.append(key.as_bytes(), data.as_slice()).unwrap(); + cursor.append(key.as_slice(), data.as_slice()).unwrap(); } }, BatchSize::PerIteration, @@ -137,8 +141,9 @@ fn bench_cursor_append_sync(c: &mut Criterion) { // PARITY: evmdb/write_put_100_sorted — DO NOT EDIT without updating evmdb fn bench_cursor_append_unsync(c: &mut Criterion) { - let items: Vec<(String, Vec)> = (0..N).map(|i| (get_key(i), get_data(i))).collect(); - let (_dir, env) = setup_plain_env(0); + let items: Vec<([u8; 32], [u8; 128])> = + (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); + let (_dir, env) = setup_parity_env(0); c.bench_function("cursor_write::append::single_thread", |b| { b.iter_batched( @@ -150,7 +155,7 @@ fn bench_cursor_append_unsync(c: &mut Criterion) { |(txn, db)| { let mut cursor = txn.cursor(db).unwrap(); for (key, data) in &items { - cursor.append(key.as_bytes(), data.as_slice()).unwrap(); + cursor.append(key.as_slice(), data.as_slice()).unwrap(); } }, BatchSize::PerIteration, diff --git a/benches/scaling.rs b/benches/scaling.rs index 5d5133c..af1f454 100644 --- a/benches/scaling.rs +++ b/benches/scaling.rs @@ -2,10 +2,14 @@ mod utils; use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main}; -use rand::{SeedableRng, prelude::SliceRandom, rngs::StdRng}; +use rand::{Rng, SeedableRng, prelude::SliceRandom, rngs::StdRng}; use signet_libmdbx::{Environment, ObjectLength, WriteFlags}; use tempfile::tempdir; -use utils::{is_bench_full, quick_config}; +use utils::{is_bench_full, parity_key, parity_value, quick_config, setup_parity_env}; + +const COLD_N_ROWS: u32 = 10_000; +const COLD_LOOKUPS: u32 = 100; +const COLD_VALUE_SIZE: usize = 128; const ENTRY_COUNTS_FULL: &[u32] = &[100, 1_000, 10_000, 100_000]; const ENTRY_COUNTS_QUICK: &[u32] = &[100, 1_000, 10_000]; @@ -52,24 +56,21 @@ fn setup_scaling_env(n: u32, value_size: usize) -> (tempfile::TempDir, Environme fn bench_sequential_get(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::sequential_get"); - for &size in VALUE_SIZES { - for &n in entry_counts() { - let (_dir, env) = setup_scaling_env(n, size); - let keys: Vec = (0..n).map(format_key).collect(); - - group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { - b.iter(|| { - let txn = env.begin_ro_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - let mut total = 0usize; - for key in &keys { - total += - *txn.get::(db.dbi(), key.as_bytes()).unwrap().unwrap(); - } - total - }) - }); - } + for &n in entry_counts() { + let (_dir, env) = setup_parity_env(n); + let keys: Vec<[u8; 32]> = (0..n).map(parity_key).collect(); + + group.bench_with_input(BenchmarkId::new("128B", n), &n, |b, _| { + b.iter(|| { + let txn = env.begin_ro_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut total = 0usize; + for key in &keys { + total += *txn.get::(db.dbi(), key.as_slice()).unwrap().unwrap(); + } + total + }) + }); } group.finish(); } @@ -78,25 +79,22 @@ fn bench_sequential_get(c: &mut Criterion) { fn bench_random_get(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::random_get"); - for &size in VALUE_SIZES { - for &n in entry_counts() { - let (_dir, env) = setup_scaling_env(n, size); - let mut keys: Vec = (0..n).map(format_key).collect(); - keys.shuffle(&mut StdRng::from_seed(Default::default())); - - group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { - b.iter(|| { - let txn = env.begin_ro_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - let mut total = 0usize; - for key in &keys { - total += - *txn.get::(db.dbi(), key.as_bytes()).unwrap().unwrap(); - } - total - }) - }); - } + for &n in entry_counts() { + let (_dir, env) = setup_parity_env(n); + let mut keys: Vec<[u8; 32]> = (0..n).map(parity_key).collect(); + keys.shuffle(&mut StdRng::from_seed(Default::default())); + + group.bench_with_input(BenchmarkId::new("128B", n), &n, |b, _| { + b.iter(|| { + let txn = env.begin_ro_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut total = 0usize; + for key in &keys { + total += *txn.get::(db.dbi(), key.as_slice()).unwrap().unwrap(); + } + total + }) + }); } group.finish(); } @@ -105,23 +103,21 @@ fn bench_random_get(c: &mut Criterion) { fn bench_full_iteration(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::full_iteration"); - for &size in VALUE_SIZES { - for &n in entry_counts() { - let (_dir, env) = setup_scaling_env(n, size); - - group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { - b.iter(|| { - let txn = env.begin_ro_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - let mut cursor = txn.cursor(db).unwrap(); - let mut count = 0usize; - while cursor.next::, Vec>().unwrap().is_some() { - count += 1; - } - count - }) - }); - } + for &n in entry_counts() { + let (_dir, env) = setup_parity_env(n); + + group.bench_with_input(BenchmarkId::new("128B", n), &n, |b, _| { + b.iter(|| { + let txn = env.begin_ro_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut cursor = txn.cursor(db).unwrap(); + let mut count = 0usize; + while cursor.next::, Vec>().unwrap().is_some() { + count += 1; + } + count + }) + }); } group.finish(); } @@ -130,34 +126,93 @@ fn bench_full_iteration(c: &mut Criterion) { fn bench_append_ordered_put(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::append_ordered_put"); - for &size in VALUE_SIZES { - for &n in entry_counts() { - let items: Vec<(String, Vec)> = - (0..n).map(|i| (format_key(i), make_value(i, size))).collect(); - - group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { - b.iter_batched( - || { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - (dir, env) - }, - |(_dir, env)| { - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - for (key, data) in &items { - txn.append(db, key.as_bytes(), data.as_slice()).unwrap(); - } - txn.commit().unwrap(); - }, - BatchSize::PerIteration, - ) - }); - } + for &n in entry_counts() { + let items: Vec<([u8; 32], [u8; 128])> = + (0..n).map(|i| (parity_key(i), parity_value(i))).collect(); + + group.bench_with_input(BenchmarkId::new("128B", n), &n, |b, _| { + b.iter_batched( + || { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + (dir, env) + }, + |(_dir, env)| { + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + for (key, data) in &items { + txn.append(db, key.as_slice(), data.as_slice()).unwrap(); + } + txn.commit().unwrap(); + }, + BatchSize::PerIteration, + ) + }); } group.finish(); } +/// Drops OS page cache for the environment's data file via posix_fadvise. +#[cfg(target_os = "linux")] +fn drop_os_cache(env: &Environment) { + env.with_raw_env_ptr(|env_ptr| { + let mut fd: libc::c_int = -1; + // SAFETY: env_ptr is valid, fd is a stack variable. + let rc = unsafe { signet_libmdbx::ffi::mdbx_env_get_fd(env_ptr, &mut fd) }; + assert_eq!(rc, 0, "mdbx_env_get_fd failed: {rc}"); + // SAFETY: fd is valid from mdbx_env_get_fd. + let rc = unsafe { libc::posix_fadvise(fd, 0, 0, libc::POSIX_FADV_DONTNEED) }; + assert_eq!(rc, 0, "posix_fadvise failed: {rc}"); + }); +} + +/// Drops OS page cache for the environment's data file via posix_fadvise. +#[cfg(not(target_os = "linux"))] +fn drop_os_cache(_env: &Environment) { + // posix_fadvise not available on macOS; reads will be warm. +} + +// PARITY: evmdb/cold_random_get — DO NOT EDIT without updating evmdb counterpart +fn bench_cold_random_get(c: &mut Criterion) { + let (_dir, env) = setup_parity_env(COLD_N_ROWS); + + let mut rng = StdRng::seed_from_u64(42); + let indices: Vec = (0..COLD_LOOKUPS).map(|_| rng.random_range(0..COLD_N_ROWS)).collect(); + + c.bench_function("cold_random_get", |b| { + b.iter(|| { + drop_os_cache(&env); + for &i in &indices { + let key = parity_key(i); + let txn = env.begin_ro_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + let val: Option = + txn.get::(db.dbi(), key.as_slice()).unwrap(); + assert!(val.is_some()); + } + }); + }); +} + +// PARITY: evmdb/cold_sequential_scan — DO NOT EDIT without updating evmdb counterpart +fn bench_cold_sequential_scan(c: &mut Criterion) { + let (_dir, env) = setup_parity_env(COLD_N_ROWS); + + c.bench_function("cold_sequential_scan", |b| { + b.iter(|| { + drop_os_cache(&env); + let txn = env.begin_ro_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + let mut cursor = txn.cursor(db).unwrap(); + let mut count = 0u32; + while cursor.next::, Vec>().unwrap().is_some() { + count += 1; + } + assert_eq!(count, COLD_N_ROWS); + }); + }); +} + criterion_group! { name = benches; config = quick_config(); @@ -166,6 +221,8 @@ criterion_group! { bench_random_get, bench_full_iteration, bench_append_ordered_put, + bench_cold_random_get, + bench_cold_sequential_scan, } criterion_main!(benches); diff --git a/benches/utils.rs b/benches/utils.rs index b93d330..6601e6a 100644 --- a/benches/utils.rs +++ b/benches/utils.rs @@ -86,6 +86,37 @@ pub fn create_rw_unsync(env: &Environment) -> RwTxUnsync { env.begin_rw_unsync().unwrap() } +/// Parity key encoding — matches evmdb's make_key exactly. +/// 32-byte key with i as big-endian u32 in the first 4 bytes, rest zeroed. +pub fn parity_key(i: u32) -> [u8; 32] { + let mut key = [0u8; 32]; + key[..4].copy_from_slice(&i.to_be_bytes()); + key +} + +/// Parity value encoding — matches evmdb's make_value exactly. +/// 128-byte value with i as little-endian u32 in the first 4 bytes, rest zeroed. +pub fn parity_value(i: u32) -> [u8; 128] { + let mut value = [0u8; 128]; + value[..4].copy_from_slice(&i.to_le_bytes()); + value +} + +/// Set up environment with N rows using parity encoding (default DB only). +pub fn setup_parity_env(n: u32) -> (TempDir, Environment) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + { + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + for i in 0..n { + txn.put(db, parity_key(i), parity_value(i), WriteFlags::empty()).unwrap(); + } + txn.commit().unwrap(); + } + (dir, env) +} + /// Create a temporary benchmark database with the specified number of rows. pub fn setup_bench_db(num_rows: u32) -> (TempDir, Environment) { let dir = tempdir().unwrap(); From 713e1fd1d9272081c8c76c34176ca4296cb689f9 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 31 Mar 2026 15:57:09 -0400 Subject: [PATCH 17/24] bench: fix concurrent reader benchmarks for 128-reader parity Set max_readers = 256 on the mdbx environment for concurrent benchmarks to prevent ReadersFull panics at 128 readers. Hardcode READER_COUNTS to [1, 4, 8, 32, 128] (removing BENCH_FULL gating), add black_box to prevent dead-code elimination, and use parity key encoding throughout. Co-Authored-By: Claude Opus 4.6 (1M context) --- benches/concurrent.rs | 68 +++++++++++++------------------------------ benches/utils.rs | 15 +++++++++- 2 files changed, 35 insertions(+), 48 deletions(-) diff --git a/benches/concurrent.rs b/benches/concurrent.rs index 8ee6269..cad6407 100644 --- a/benches/concurrent.rs +++ b/benches/concurrent.rs @@ -2,53 +2,27 @@ mod utils; use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main}; -use signet_libmdbx::{Environment, ObjectLength, WriteFlags}; +use signet_libmdbx::{ObjectLength, WriteFlags}; use std::{ + hint::black_box, sync::{Arc, Barrier}, thread, }; -use tempfile::tempdir; -use utils::{get_key, is_bench_full, parity_key, parity_value, quick_config, setup_parity_env}; +use utils::{parity_key, parity_value, quick_config, setup_parity_env_with_max_readers}; const N_ROWS: u32 = 1_000; -const READER_COUNTS_FULL: &[usize] = &[1, 4, 8, 32, 128]; -const READER_COUNTS_QUICK: &[usize] = &[1, 4, 8, 32]; - -fn reader_counts() -> &'static [usize] { - use std::sync::Once; - static WARN: Once = Once::new(); - if is_bench_full() { - READER_COUNTS_FULL - } else { - WARN.call_once(|| { - eprintln!("NOTE: skipping 128-reader benchmarks (set BENCH_FULL=1 for full suite)"); - }); - READER_COUNTS_QUICK - } -} +const READER_COUNTS: &[usize] = &[1, 4, 8, 32, 128]; -fn setup_arc_env(n: u32) -> (tempfile::TempDir, Arc) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - { - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - for i in 0..n { - let value: Vec = - format!("data{i:010}").as_bytes().iter().copied().cycle().take(128).collect(); - txn.put(db, get_key(i), value, WriteFlags::empty()).unwrap(); - } - txn.commit().unwrap(); - } - (dir, Arc::new(env)) -} +/// Max readers set high enough for the largest reader count plus criterion +/// overhead threads. +const MAX_READERS: u64 = 256; // PARITY: evmdb/readers_no_writer — DO NOT EDIT without updating evmdb fn bench_n_readers_no_writer(c: &mut Criterion) { let mut group = c.benchmark_group("concurrent::readers_no_writer"); - for &n_readers in reader_counts() { - let (_dir, env) = setup_parity_env(N_ROWS); + for &n_readers in READER_COUNTS { + let (_dir, env) = setup_parity_env_with_max_readers(N_ROWS, Some(MAX_READERS)); let env = Arc::new(env); let keys: Arc> = Arc::new((0..N_ROWS).map(parity_key).collect()); @@ -75,7 +49,7 @@ fn bench_n_readers_no_writer(c: &mut Criterion) { .unwrap() .unwrap(); } - total + black_box(total) }) }) .collect(); @@ -96,8 +70,8 @@ fn bench_n_readers_no_writer(c: &mut Criterion) { fn bench_n_readers_one_writer(c: &mut Criterion) { let mut group = c.benchmark_group("concurrent::readers_one_writer"); - for &n_readers in reader_counts() { - let (_dir, env) = setup_parity_env(N_ROWS); + for &n_readers in READER_COUNTS { + let (_dir, env) = setup_parity_env_with_max_readers(N_ROWS, Some(MAX_READERS)); let env = Arc::new(env); let keys: Arc> = Arc::new((0..N_ROWS).map(parity_key).collect()); @@ -125,7 +99,7 @@ fn bench_n_readers_one_writer(c: &mut Criterion) { .unwrap() .unwrap(); } - total + black_box(total) }) }) .collect(); @@ -165,18 +139,18 @@ fn bench_n_readers_one_writer(c: &mut Criterion) { /// Single-thread comparison: sync vs unsync transaction creation. fn bench_single_thread_sync_vs_unsync(c: &mut Criterion) { - let (_dir, env) = setup_arc_env(N_ROWS); - let keys: Vec = (0..N_ROWS).map(get_key).collect(); + let (_dir, env) = setup_parity_env_with_max_readers(N_ROWS, None); + let keys: Arc> = Arc::new((0..N_ROWS).map(parity_key).collect()); c.bench_function("concurrent::single_thread::sync", |b| { b.iter(|| { let txn = env.begin_ro_sync().unwrap(); let db = txn.open_db(None).unwrap(); let mut total = 0usize; - for key in &keys { - total += *txn.get::(db.dbi(), key.as_bytes()).unwrap().unwrap(); + for key in keys.iter() { + total += *txn.get::(db.dbi(), key.as_slice()).unwrap().unwrap(); } - total + black_box(total) }) }); @@ -185,10 +159,10 @@ fn bench_single_thread_sync_vs_unsync(c: &mut Criterion) { let txn = env.begin_ro_unsync().unwrap(); let db = txn.open_db(None).unwrap(); let mut total = 0usize; - for key in &keys { - total += *txn.get::(db.dbi(), key.as_bytes()).unwrap().unwrap(); + for key in keys.iter() { + total += *txn.get::(db.dbi(), key.as_slice()).unwrap().unwrap(); } - total + black_box(total) }) }); } diff --git a/benches/utils.rs b/benches/utils.rs index 6601e6a..0e1882b 100644 --- a/benches/utils.rs +++ b/benches/utils.rs @@ -104,8 +104,21 @@ pub fn parity_value(i: u32) -> [u8; 128] { /// Set up environment with N rows using parity encoding (default DB only). pub fn setup_parity_env(n: u32) -> (TempDir, Environment) { + setup_parity_env_with_max_readers(n, None) +} + +/// Set up environment with N rows using parity encoding and a custom max +/// reader count. Pass [`None`] for the mdbx default (126). +pub fn setup_parity_env_with_max_readers( + n: u32, + max_readers: Option, +) -> (TempDir, Environment) { let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); + let mut builder = Environment::builder(); + if let Some(max) = max_readers { + builder.set_max_readers(max); + } + let env = builder.open(dir.path()).unwrap(); { let txn = env.begin_rw_unsync().unwrap(); let db = txn.open_db(None).unwrap(); From 16ea2cc80b87300a6dabd785ab475d0aa9cc1357 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 1 Apr 2026 07:04:02 -0400 Subject: [PATCH 18/24] bench: add committed parity benchmarks with durable and nosync modes Adds put+commit and append+commit benchmarks that include txn.commit() in the measured closure, with both MDBX_SYNC_DURABLE and MDBX_SAFE_NOSYNC variants for parity comparison with evmdb's commit_blocking_durable and commit_blocking. Co-Authored-By: Claude Opus 4.6 (1M context) --- benches/cursor_write.rs | 86 +++++++++++++++++++++++++++++++++++++++++ benches/utils.rs | 21 +++++++++- 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/benches/cursor_write.rs b/benches/cursor_write.rs index f194ef7..0ed5aee 100644 --- a/benches/cursor_write.rs +++ b/benches/cursor_write.rs @@ -73,6 +73,90 @@ fn bench_cursor_put_unsync(c: &mut Criterion) { }); } +// PUT + COMMIT (durable) + +// PARITY: evmdb/write_put_100 (durable) — DO NOT EDIT without updating evmdb +fn bench_cursor_put_commit_durable(c: &mut Criterion) { + let items: Vec<([u8; 32], [u8; 128])> = + (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); + let (_dir, env) = setup_parity_env(0); + + c.bench_function("cursor_write::put_commit::durable", |b| { + b.iter(|| { + let txn = create_rw_unsync(&env); + let db = txn.open_db(None).unwrap(); + let mut cursor = txn.cursor(db).unwrap(); + for (key, data) in &items { + cursor.put(key.as_slice(), data.as_slice(), WriteFlags::empty()).unwrap(); + } + drop(cursor); + txn.commit().unwrap(); + }) + }); +} + +// PARITY: evmdb/write_put_100_nondurable — DO NOT EDIT without updating evmdb +fn bench_cursor_put_commit_nosync(c: &mut Criterion) { + let items: Vec<([u8; 32], [u8; 128])> = + (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); + let (_dir, env) = setup_parity_env_nosync(0); + + c.bench_function("cursor_write::put_commit::nosync", |b| { + b.iter(|| { + let txn = create_rw_unsync(&env); + let db = txn.open_db(None).unwrap(); + let mut cursor = txn.cursor(db).unwrap(); + for (key, data) in &items { + cursor.put(key.as_slice(), data.as_slice(), WriteFlags::empty()).unwrap(); + } + drop(cursor); + txn.commit().unwrap(); + }) + }); +} + +// APPEND + COMMIT (durable) + +// PARITY: evmdb/write_put_100_sorted (durable) — DO NOT EDIT without updating evmdb +fn bench_cursor_append_commit_durable(c: &mut Criterion) { + let items: Vec<([u8; 32], [u8; 128])> = + (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); + let (_dir, env) = setup_parity_env(0); + + c.bench_function("cursor_write::append_commit::durable", |b| { + b.iter(|| { + let txn = create_rw_unsync(&env); + let db = txn.open_db(None).unwrap(); + let mut cursor = txn.cursor(db).unwrap(); + for (key, data) in &items { + cursor.append(key.as_slice(), data.as_slice()).unwrap(); + } + drop(cursor); + txn.commit().unwrap(); + }) + }); +} + +// PARITY: evmdb/write_put_100_sorted_nondurable — DO NOT EDIT without updating evmdb +fn bench_cursor_append_commit_nosync(c: &mut Criterion) { + let items: Vec<([u8; 32], [u8; 128])> = + (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); + let (_dir, env) = setup_parity_env_nosync(0); + + c.bench_function("cursor_write::append_commit::nosync", |b| { + b.iter(|| { + let txn = create_rw_unsync(&env); + let db = txn.open_db(None).unwrap(); + let mut cursor = txn.cursor(db).unwrap(); + for (key, data) in &items { + cursor.append(key.as_slice(), data.as_slice()).unwrap(); + } + drop(cursor); + txn.commit().unwrap(); + }) + }); +} + // DEL fn bench_cursor_del_sync(c: &mut Criterion) { @@ -224,8 +308,10 @@ criterion_group! { config = quick_config(); targets = bench_cursor_put_sync, bench_cursor_put_unsync, + bench_cursor_put_commit_durable, bench_cursor_put_commit_nosync, bench_cursor_del_sync, bench_cursor_del_unsync, bench_cursor_append_sync, bench_cursor_append_unsync, + bench_cursor_append_commit_durable, bench_cursor_append_commit_nosync, bench_cursor_append_dup_sync, bench_cursor_append_dup_unsync, } diff --git a/benches/utils.rs b/benches/utils.rs index 0e1882b..29d996e 100644 --- a/benches/utils.rs +++ b/benches/utils.rs @@ -3,7 +3,7 @@ use criterion::Criterion; use signet_libmdbx::{ - Environment, WriteFlags, + Environment, Mode, SyncMode, WriteFlags, ffi::{MDBX_TXN_RDONLY, MDBX_env, MDBX_txn, mdbx_txn_begin_ex}, tx::aliases::{RoTxSync, RoTxUnsync, RwTxSync, RwTxUnsync}, }; @@ -103,10 +103,29 @@ pub fn parity_value(i: u32) -> [u8; 128] { } /// Set up environment with N rows using parity encoding (default DB only). +/// Uses the default durable sync mode. pub fn setup_parity_env(n: u32) -> (TempDir, Environment) { setup_parity_env_with_max_readers(n, None) } +/// Set up environment with N rows using SafeNoSync mode (no fsync). +pub fn setup_parity_env_nosync(n: u32) -> (TempDir, Environment) { + let dir = tempdir().unwrap(); + let env = Environment::builder() + .set_flags(Mode::ReadWrite { sync_mode: SyncMode::SafeNoSync }.into()) + .open(dir.path()) + .unwrap(); + { + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + for i in 0..n { + txn.put(db, parity_key(i), parity_value(i), WriteFlags::empty()).unwrap(); + } + txn.commit().unwrap(); + } + (dir, env) +} + /// Set up environment with N rows using parity encoding and a custom max /// reader count. Pass [`None`] for the mdbx default (126). pub fn setup_parity_env_with_max_readers( From aad9d1f2d2ded8aeaaf155f045aff48c2f87fa66 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 1 Apr 2026 07:29:20 -0400 Subject: [PATCH 19/24] bench: align parity benchmarks with evmdb counterparts - Move open_db out of timed regions (real apps open once at startup) - Advance key base across write iterations (measure tree growth, not overwrites) - Read full values instead of ObjectLength in get benchmarks - Use ObjectLength for iteration benchmarks (no per-entry Vec allocation) - New read txn each iteration in cursor iteration bench - Add value size matrix (32B, 128B, 512B, 4096B) to scaling benches - 4096B hits overflow pages on both engines - Fix writer key to N_ROWS+1 in readers_with_writer - Add parity_value_sized and setup_parity_env_sized helpers Co-Authored-By: Claude Opus 4.6 (1M context) --- benches/concurrent.rs | 32 +++--- benches/cursor.rs | 16 ++- benches/cursor_write.rs | 85 ++++++++++++---- benches/scaling.rs | 209 +++++++++++++++++++++------------------- benches/utils.rs | 26 ++++- 5 files changed, 232 insertions(+), 136 deletions(-) diff --git a/benches/concurrent.rs b/benches/concurrent.rs index cad6407..f680968 100644 --- a/benches/concurrent.rs +++ b/benches/concurrent.rs @@ -4,6 +4,7 @@ mod utils; use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main}; use signet_libmdbx::{ObjectLength, WriteFlags}; use std::{ + borrow::Cow, hint::black_box, sync::{Arc, Barrier}, thread, @@ -25,6 +26,11 @@ fn bench_n_readers_no_writer(c: &mut Criterion) { let (_dir, env) = setup_parity_env_with_max_readers(N_ROWS, Some(MAX_READERS)); let env = Arc::new(env); let keys: Arc> = Arc::new((0..N_ROWS).map(parity_key).collect()); + // Open the db handle once — dbi is stable for the environment lifetime. + let db = { + let txn = env.begin_ro_sync().unwrap(); + txn.open_db(None).unwrap() + }; group.bench_with_input( BenchmarkId::from_parameter(n_readers), @@ -40,14 +46,12 @@ fn bench_n_readers_no_writer(c: &mut Criterion) { let barrier = Arc::clone(&barrier); thread::spawn(move || { let txn = env.begin_ro_sync().unwrap(); - let db = txn.open_db(None).unwrap(); barrier.wait(); let mut total = 0usize; for key in keys.iter() { - total += *txn - .get::(db.dbi(), key.as_slice()) - .unwrap() - .unwrap(); + let val: Cow<'_, [u8]> = + txn.get(db.dbi(), key.as_slice()).unwrap().unwrap(); + total += val.len(); } black_box(total) }) @@ -74,6 +78,11 @@ fn bench_n_readers_one_writer(c: &mut Criterion) { let (_dir, env) = setup_parity_env_with_max_readers(N_ROWS, Some(MAX_READERS)); let env = Arc::new(env); let keys: Arc> = Arc::new((0..N_ROWS).map(parity_key).collect()); + // Open the db handle once — dbi is stable for the environment lifetime. + let db = { + let txn = env.begin_ro_sync().unwrap(); + txn.open_db(None).unwrap() + }; group.bench_with_input( BenchmarkId::from_parameter(n_readers), @@ -90,14 +99,12 @@ fn bench_n_readers_one_writer(c: &mut Criterion) { let barrier = Arc::clone(&barrier); thread::spawn(move || { let txn = env.begin_ro_sync().unwrap(); - let db = txn.open_db(None).unwrap(); barrier.wait(); let mut total = 0usize; for key in keys.iter() { - total += *txn - .get::(db.dbi(), key.as_slice()) - .unwrap() - .unwrap(); + let val: Cow<'_, [u8]> = + txn.get(db.dbi(), key.as_slice()).unwrap().unwrap(); + total += val.len(); } black_box(total) }) @@ -111,11 +118,10 @@ fn bench_n_readers_one_writer(c: &mut Criterion) { thread::spawn(move || { barrier.wait(); let txn = env.begin_rw_sync().unwrap(); - let db = txn.open_db(None).unwrap(); txn.put( db, - parity_key(N_ROWS), - parity_value(N_ROWS), + parity_key(N_ROWS + 1), + parity_value(N_ROWS + 1), WriteFlags::empty(), ) .unwrap(); diff --git a/benches/cursor.rs b/benches/cursor.rs index bf458c8..742248d 100644 --- a/benches/cursor.rs +++ b/benches/cursor.rs @@ -55,10 +55,14 @@ fn bench_get_seq_iter(c: &mut Criterion) { fn bench_get_seq_cursor(c: &mut Criterion) { let n = 1000; let (_dir, env) = setup_parity_env(n); - let txn = create_ro_sync(&env); - let db = txn.open_db(None).unwrap(); + // Open the db handle once — dbi is stable for the environment lifetime. + let db = { + let txn = create_ro_sync(&env); + txn.open_db(None).unwrap() + }; c.bench_function("cursor::traverse::iter", |b| { b.iter(|| { + let txn = create_ro_sync(&env); let (i, count) = txn .cursor(db) .unwrap() @@ -147,10 +151,14 @@ fn bench_get_seq_iter_single_thread(c: &mut Criterion) { fn bench_get_seq_cursor_single_thread(c: &mut Criterion) { let n = 1000; let (_dir, env) = setup_parity_env(n); - let txn = create_ro_unsync(&env); - let db = txn.open_db(None).unwrap(); + // Open the db handle once — dbi is stable for the environment lifetime. + let db = { + let txn = create_ro_unsync(&env); + txn.open_db(None).unwrap() + }; c.bench_function("cursor::traverse::iter::single_thread", |b| { b.iter(|| { + let txn = create_ro_unsync(&env); let (i, count) = txn .cursor(db) .unwrap() diff --git a/benches/cursor_write.rs b/benches/cursor_write.rs index 0ed5aee..29a0e0e 100644 --- a/benches/cursor_write.rs +++ b/benches/cursor_write.rs @@ -3,6 +3,7 @@ mod utils; use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; use signet_libmdbx::{DatabaseFlags, WriteFlags}; +use std::cell::Cell; use utils::*; const N: u32 = 100; @@ -77,40 +78,64 @@ fn bench_cursor_put_unsync(c: &mut Criterion) { // PARITY: evmdb/write_put_100 (durable) — DO NOT EDIT without updating evmdb fn bench_cursor_put_commit_durable(c: &mut Criterion) { - let items: Vec<([u8; 32], [u8; 128])> = - (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); let (_dir, env) = setup_parity_env(0); + // Open the db handle once — dbi is stable for the environment lifetime. + let db = { + let txn = create_rw_unsync(&env); + let db = txn.open_db(None).unwrap(); + txn.commit().unwrap(); + db + }; + // Advancing base counter — each iteration writes to fresh keys, matching + // evmdb's pattern. XOR with a fixed value to produce unsorted order. + let base = Cell::new(0u32); + let xor_mask = 0xDEAD_BEEFu32; c.bench_function("cursor_write::put_commit::durable", |b| { b.iter(|| { + let b_val = base.get(); let txn = create_rw_unsync(&env); - let db = txn.open_db(None).unwrap(); let mut cursor = txn.cursor(db).unwrap(); - for (key, data) in &items { - cursor.put(key.as_slice(), data.as_slice(), WriteFlags::empty()).unwrap(); + for i in 0..N { + let key = parity_key((b_val + i) ^ xor_mask); + let value = parity_value(b_val + i); + cursor.put(key.as_slice(), value.as_slice(), WriteFlags::empty()).unwrap(); } drop(cursor); txn.commit().unwrap(); + base.set(b_val + N); }) }); } // PARITY: evmdb/write_put_100_nondurable — DO NOT EDIT without updating evmdb fn bench_cursor_put_commit_nosync(c: &mut Criterion) { - let items: Vec<([u8; 32], [u8; 128])> = - (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); let (_dir, env) = setup_parity_env_nosync(0); + // Open the db handle once — dbi is stable for the environment lifetime. + let db = { + let txn = create_rw_unsync(&env); + let db = txn.open_db(None).unwrap(); + txn.commit().unwrap(); + db + }; + // Advancing base counter — each iteration writes to fresh keys, matching + // evmdb's pattern. XOR with a fixed value to produce unsorted order. + let base = Cell::new(0u32); + let xor_mask = 0xDEAD_BEEFu32; c.bench_function("cursor_write::put_commit::nosync", |b| { b.iter(|| { + let b_val = base.get(); let txn = create_rw_unsync(&env); - let db = txn.open_db(None).unwrap(); let mut cursor = txn.cursor(db).unwrap(); - for (key, data) in &items { - cursor.put(key.as_slice(), data.as_slice(), WriteFlags::empty()).unwrap(); + for i in 0..N { + let key = parity_key((b_val + i) ^ xor_mask); + let value = parity_value(b_val + i); + cursor.put(key.as_slice(), value.as_slice(), WriteFlags::empty()).unwrap(); } drop(cursor); txn.commit().unwrap(); + base.set(b_val + N); }) }); } @@ -119,40 +144,60 @@ fn bench_cursor_put_commit_nosync(c: &mut Criterion) { // PARITY: evmdb/write_put_100_sorted (durable) — DO NOT EDIT without updating evmdb fn bench_cursor_append_commit_durable(c: &mut Criterion) { - let items: Vec<([u8; 32], [u8; 128])> = - (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); let (_dir, env) = setup_parity_env(0); + // Open the db handle once — dbi is stable for the environment lifetime. + let db = { + let txn = create_rw_unsync(&env); + let db = txn.open_db(None).unwrap(); + txn.commit().unwrap(); + db + }; + // Advancing base counter — each iteration appends to fresh sorted keys. + let base = Cell::new(0u32); c.bench_function("cursor_write::append_commit::durable", |b| { b.iter(|| { + let b_val = base.get(); let txn = create_rw_unsync(&env); - let db = txn.open_db(None).unwrap(); let mut cursor = txn.cursor(db).unwrap(); - for (key, data) in &items { - cursor.append(key.as_slice(), data.as_slice()).unwrap(); + for i in 0..N { + let key = parity_key(b_val + i); + let value = parity_value(b_val + i); + cursor.append(key.as_slice(), value.as_slice()).unwrap(); } drop(cursor); txn.commit().unwrap(); + base.set(b_val + N); }) }); } // PARITY: evmdb/write_put_100_sorted_nondurable — DO NOT EDIT without updating evmdb fn bench_cursor_append_commit_nosync(c: &mut Criterion) { - let items: Vec<([u8; 32], [u8; 128])> = - (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); let (_dir, env) = setup_parity_env_nosync(0); + // Open the db handle once — dbi is stable for the environment lifetime. + let db = { + let txn = create_rw_unsync(&env); + let db = txn.open_db(None).unwrap(); + txn.commit().unwrap(); + db + }; + // Advancing base counter — each iteration appends to fresh sorted keys. + let base = Cell::new(0u32); c.bench_function("cursor_write::append_commit::nosync", |b| { b.iter(|| { + let b_val = base.get(); let txn = create_rw_unsync(&env); - let db = txn.open_db(None).unwrap(); let mut cursor = txn.cursor(db).unwrap(); - for (key, data) in &items { - cursor.append(key.as_slice(), data.as_slice()).unwrap(); + for i in 0..N { + let key = parity_key(b_val + i); + let value = parity_value(b_val + i); + cursor.append(key.as_slice(), value.as_slice()).unwrap(); } drop(cursor); txn.commit().unwrap(); + base.set(b_val + N); }) }); } diff --git a/benches/scaling.rs b/benches/scaling.rs index af1f454..8edf902 100644 --- a/benches/scaling.rs +++ b/benches/scaling.rs @@ -3,17 +3,22 @@ mod utils; use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main}; use rand::{Rng, SeedableRng, prelude::SliceRandom, rngs::StdRng}; -use signet_libmdbx::{Environment, ObjectLength, WriteFlags}; +use signet_libmdbx::{Environment, ObjectLength}; +use std::borrow::Cow; use tempfile::tempdir; -use utils::{is_bench_full, parity_key, parity_value, quick_config, setup_parity_env}; +use utils::{ + is_bench_full, parity_key, parity_value_sized, quick_config, setup_parity_env, + setup_parity_env_sized, +}; const COLD_N_ROWS: u32 = 10_000; const COLD_LOOKUPS: u32 = 100; -const COLD_VALUE_SIZE: usize = 128; const ENTRY_COUNTS_FULL: &[u32] = &[100, 1_000, 10_000, 100_000]; const ENTRY_COUNTS_QUICK: &[u32] = &[100, 1_000, 10_000]; -const VALUE_SIZES: &[usize] = &[32, 128, 512]; +/// Value sizes for parity benchmarks. 4096B exceeds the mdbx overflow +/// threshold (~1996B for 32-byte keys on 4KB pages). +const VALUE_SIZES: &[usize] = &[32, 128, 512, 4096]; fn entry_counts() -> &'static [u32] { use std::sync::Once; @@ -28,49 +33,33 @@ fn entry_counts() -> &'static [u32] { } } -fn format_key(i: u32) -> String { - format!("key{i:028}") -} - -fn make_value(i: u32, size: usize) -> Vec { - let seed = format!("data{i:010}"); - seed.as_bytes().iter().copied().cycle().take(size).collect() -} - -/// Set up a plain environment (default db only) with N entries pre-populated. -fn setup_scaling_env(n: u32, value_size: usize) -> (tempfile::TempDir, Environment) { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - { - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - for i in 0..n { - txn.put(db, format_key(i), make_value(i, value_size), WriteFlags::empty()).unwrap(); - } - txn.commit().unwrap(); - } - (dir, env) -} - // PARITY: evmdb/sequential_get — DO NOT EDIT without updating evmdb fn bench_sequential_get(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::sequential_get"); - for &n in entry_counts() { - let (_dir, env) = setup_parity_env(n); - let keys: Vec<[u8; 32]> = (0..n).map(parity_key).collect(); - - group.bench_with_input(BenchmarkId::new("128B", n), &n, |b, _| { - b.iter(|| { + for &size in VALUE_SIZES { + for &n in entry_counts() { + let (_dir, env) = setup_parity_env_sized(n, size); + let keys: Vec<[u8; 32]> = (0..n).map(parity_key).collect(); + // Open the db handle once — dbi is stable for the environment lifetime. + let db = { let txn = env.begin_ro_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - let mut total = 0usize; - for key in &keys { - total += *txn.get::(db.dbi(), key.as_slice()).unwrap().unwrap(); - } - total - }) - }); + txn.open_db(None).unwrap() + }; + + group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { + b.iter(|| { + let txn = env.begin_ro_unsync().unwrap(); + let mut total = 0usize; + for key in &keys { + let val: Cow<'_, [u8]> = + txn.get(db.dbi(), key.as_slice()).unwrap().unwrap(); + total += val.len(); + } + total + }) + }); + } } group.finish(); } @@ -79,22 +68,30 @@ fn bench_sequential_get(c: &mut Criterion) { fn bench_random_get(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::random_get"); - for &n in entry_counts() { - let (_dir, env) = setup_parity_env(n); - let mut keys: Vec<[u8; 32]> = (0..n).map(parity_key).collect(); - keys.shuffle(&mut StdRng::from_seed(Default::default())); - - group.bench_with_input(BenchmarkId::new("128B", n), &n, |b, _| { - b.iter(|| { + for &size in VALUE_SIZES { + for &n in entry_counts() { + let (_dir, env) = setup_parity_env_sized(n, size); + let mut keys: Vec<[u8; 32]> = (0..n).map(parity_key).collect(); + keys.shuffle(&mut StdRng::from_seed(Default::default())); + // Open the db handle once — dbi is stable for the environment lifetime. + let db = { let txn = env.begin_ro_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - let mut total = 0usize; - for key in &keys { - total += *txn.get::(db.dbi(), key.as_slice()).unwrap().unwrap(); - } - total - }) - }); + txn.open_db(None).unwrap() + }; + + group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { + b.iter(|| { + let txn = env.begin_ro_unsync().unwrap(); + let mut total = 0usize; + for key in &keys { + let val: Cow<'_, [u8]> = + txn.get(db.dbi(), key.as_slice()).unwrap().unwrap(); + total += val.len(); + } + total + }) + }); + } } group.finish(); } @@ -103,21 +100,27 @@ fn bench_random_get(c: &mut Criterion) { fn bench_full_iteration(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::full_iteration"); - for &n in entry_counts() { - let (_dir, env) = setup_parity_env(n); - - group.bench_with_input(BenchmarkId::new("128B", n), &n, |b, _| { - b.iter(|| { + for &size in VALUE_SIZES { + for &n in entry_counts() { + let (_dir, env) = setup_parity_env_sized(n, size); + // Open the db handle once — dbi is stable for the environment lifetime. + let db = { let txn = env.begin_ro_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - let mut cursor = txn.cursor(db).unwrap(); - let mut count = 0usize; - while cursor.next::, Vec>().unwrap().is_some() { - count += 1; - } - count - }) - }); + txn.open_db(None).unwrap() + }; + + group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { + b.iter(|| { + let txn = env.begin_ro_unsync().unwrap(); + let mut cursor = txn.cursor(db).unwrap(); + let mut count = 0usize; + while cursor.next::().unwrap().is_some() { + count += 1; + } + count + }) + }); + } } group.finish(); } @@ -126,28 +129,30 @@ fn bench_full_iteration(c: &mut Criterion) { fn bench_append_ordered_put(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::append_ordered_put"); - for &n in entry_counts() { - let items: Vec<([u8; 32], [u8; 128])> = - (0..n).map(|i| (parity_key(i), parity_value(i))).collect(); - - group.bench_with_input(BenchmarkId::new("128B", n), &n, |b, _| { - b.iter_batched( - || { - let dir = tempdir().unwrap(); - let env = Environment::builder().open(dir.path()).unwrap(); - (dir, env) - }, - |(_dir, env)| { - let txn = env.begin_rw_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - for (key, data) in &items { - txn.append(db, key.as_slice(), data.as_slice()).unwrap(); - } - txn.commit().unwrap(); - }, - BatchSize::PerIteration, - ) - }); + for &size in VALUE_SIZES { + for &n in entry_counts() { + let items: Vec<([u8; 32], Vec)> = + (0..n).map(|i| (parity_key(i), parity_value_sized(i, size))).collect(); + + group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { + b.iter_batched( + || { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + (dir, env) + }, + |(_dir, env)| { + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + for (key, data) in &items { + txn.append(db, key.as_slice(), data.as_slice()).unwrap(); + } + txn.commit().unwrap(); + }, + BatchSize::PerIteration, + ) + }); + } } group.finish(); } @@ -175,6 +180,11 @@ fn drop_os_cache(_env: &Environment) { // PARITY: evmdb/cold_random_get — DO NOT EDIT without updating evmdb counterpart fn bench_cold_random_get(c: &mut Criterion) { let (_dir, env) = setup_parity_env(COLD_N_ROWS); + // Open the db handle once — dbi is stable for the environment lifetime. + let db = { + let txn = env.begin_ro_unsync().unwrap(); + txn.open_db(None).unwrap() + }; let mut rng = StdRng::seed_from_u64(42); let indices: Vec = (0..COLD_LOOKUPS).map(|_| rng.random_range(0..COLD_N_ROWS)).collect(); @@ -185,9 +195,8 @@ fn bench_cold_random_get(c: &mut Criterion) { for &i in &indices { let key = parity_key(i); let txn = env.begin_ro_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); - let val: Option = - txn.get::(db.dbi(), key.as_slice()).unwrap(); + let val: Option> = + txn.get(db.dbi(), key.as_slice()).unwrap(); assert!(val.is_some()); } }); @@ -197,15 +206,19 @@ fn bench_cold_random_get(c: &mut Criterion) { // PARITY: evmdb/cold_sequential_scan — DO NOT EDIT without updating evmdb counterpart fn bench_cold_sequential_scan(c: &mut Criterion) { let (_dir, env) = setup_parity_env(COLD_N_ROWS); + // Open the db handle once — dbi is stable for the environment lifetime. + let db = { + let txn = env.begin_ro_unsync().unwrap(); + txn.open_db(None).unwrap() + }; c.bench_function("cold_sequential_scan", |b| { b.iter(|| { drop_os_cache(&env); let txn = env.begin_ro_unsync().unwrap(); - let db = txn.open_db(None).unwrap(); let mut cursor = txn.cursor(db).unwrap(); let mut count = 0u32; - while cursor.next::, Vec>().unwrap().is_some() { + while cursor.next::().unwrap().is_some() { count += 1; } assert_eq!(count, COLD_N_ROWS); diff --git a/benches/utils.rs b/benches/utils.rs index 29d996e..c520326 100644 --- a/benches/utils.rs +++ b/benches/utils.rs @@ -102,12 +102,36 @@ pub fn parity_value(i: u32) -> [u8; 128] { value } +/// Variable-size parity value encoding — matches evmdb's make_value(i, size). +/// Repeats the little-endian u32 bytes of `i` across `size` bytes. +pub fn parity_value_sized(i: u32, size: usize) -> Vec { + let bytes = i.to_le_bytes(); + (0..size).map(|j| bytes[j % 4]).collect() +} + /// Set up environment with N rows using parity encoding (default DB only). -/// Uses the default durable sync mode. +/// Uses the default durable sync mode. Values are 128 bytes. pub fn setup_parity_env(n: u32) -> (TempDir, Environment) { setup_parity_env_with_max_readers(n, None) } +/// Set up environment with N rows using variable-size parity encoding. +/// Uses the default durable sync mode. +pub fn setup_parity_env_sized(n: u32, value_size: usize) -> (TempDir, Environment) { + let dir = tempdir().unwrap(); + let env = Environment::builder().open(dir.path()).unwrap(); + { + let txn = env.begin_rw_unsync().unwrap(); + let db = txn.open_db(None).unwrap(); + for i in 0..n { + txn.put(db, parity_key(i), parity_value_sized(i, value_size), WriteFlags::empty()) + .unwrap(); + } + txn.commit().unwrap(); + } + (dir, env) +} + /// Set up environment with N rows using SafeNoSync mode (no fsync). pub fn setup_parity_env_nosync(n: u32) -> (TempDir, Environment) { let dir = tempdir().unwrap(); From 09e8605bb272a69045d5ab7ffa28852c4ecf0da2 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 1 Apr 2026 07:48:09 -0400 Subject: [PATCH 20/24] fix: rustfmt in scaling bench Co-Authored-By: Claude Opus 4.6 (1M context) --- benches/scaling.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/benches/scaling.rs b/benches/scaling.rs index 8edf902..73e43de 100644 --- a/benches/scaling.rs +++ b/benches/scaling.rs @@ -195,8 +195,7 @@ fn bench_cold_random_get(c: &mut Criterion) { for &i in &indices { let key = parity_key(i); let txn = env.begin_ro_unsync().unwrap(); - let val: Option> = - txn.get(db.dbi(), key.as_slice()).unwrap(); + let val: Option> = txn.get(db.dbi(), key.as_slice()).unwrap(); assert!(val.is_some()); } }); From 840312a4d9d6e029385eedcec1cf6908d25e7103 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 1 Apr 2026 08:12:50 -0400 Subject: [PATCH 21/24] bench: remove 4096B from parity value sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Matches evmdb change — 4096B triggers a known multi-page overflow bug on evmdb and is not representative of real EVM workload value sizes. Co-Authored-By: Claude Opus 4.6 (1M context) --- benches/scaling.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benches/scaling.rs b/benches/scaling.rs index 73e43de..695e82e 100644 --- a/benches/scaling.rs +++ b/benches/scaling.rs @@ -16,9 +16,9 @@ const COLD_LOOKUPS: u32 = 100; const ENTRY_COUNTS_FULL: &[u32] = &[100, 1_000, 10_000, 100_000]; const ENTRY_COUNTS_QUICK: &[u32] = &[100, 1_000, 10_000]; -/// Value sizes for parity benchmarks. 4096B exceeds the mdbx overflow -/// threshold (~1996B for 32-byte keys on 4KB pages). -const VALUE_SIZES: &[usize] = &[32, 128, 512, 4096]; +/// Value sizes for parity benchmarks. 4096B excluded to match evmdb +/// (triggers a known multi-page overflow bug there). +const VALUE_SIZES: &[usize] = &[32, 128, 512]; fn entry_counts() -> &'static [u32] { use std::sync::Once; From 9ac0c513613640ca2a51a86dad549ae6539bad73 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 1 Apr 2026 14:47:38 -0400 Subject: [PATCH 22/24] =?UTF-8?q?bench:=20deterministic=20cold=20reads=20v?= =?UTF-8?q?ia=20close=E2=86=92fadvise=E2=86=92reopen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace posix_fadvise on live mmap fd (unreliable — kernel ignores FADV_DONTNEED when active mappings pin pages) with close→fadvise on plain fd→reopen. Bump dataset to 1M rows / 1k lookups for realistic tree depth. Co-Authored-By: Claude Opus 4.6 (1M context) --- benches/scaling.rs | 61 ++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/benches/scaling.rs b/benches/scaling.rs index 695e82e..8eb4ae6 100644 --- a/benches/scaling.rs +++ b/benches/scaling.rs @@ -11,8 +11,8 @@ use utils::{ setup_parity_env_sized, }; -const COLD_N_ROWS: u32 = 10_000; -const COLD_LOOKUPS: u32 = 100; +const COLD_N_ROWS: u32 = 1_000_000; +const COLD_LOOKUPS: u32 = 1_000; const ENTRY_COUNTS_FULL: &[u32] = &[100, 1_000, 10_000, 100_000]; const ENTRY_COUNTS_QUICK: &[u32] = &[100, 1_000, 10_000]; @@ -157,41 +157,43 @@ fn bench_append_ordered_put(c: &mut Criterion) { group.finish(); } -/// Drops OS page cache for the environment's data file via posix_fadvise. +/// Evicts OS page cache for the mdbx data file. Must be called while no +/// mmap exists on the file (i.e. after closing the environment) so the +/// kernel is free to drop the pages. `posix_fadvise` is advisory but +/// reliable when no active mappings pin the pages. #[cfg(target_os = "linux")] -fn drop_os_cache(env: &Environment) { - env.with_raw_env_ptr(|env_ptr| { - let mut fd: libc::c_int = -1; - // SAFETY: env_ptr is valid, fd is a stack variable. - let rc = unsafe { signet_libmdbx::ffi::mdbx_env_get_fd(env_ptr, &mut fd) }; - assert_eq!(rc, 0, "mdbx_env_get_fd failed: {rc}"); - // SAFETY: fd is valid from mdbx_env_get_fd. - let rc = unsafe { libc::posix_fadvise(fd, 0, 0, libc::POSIX_FADV_DONTNEED) }; - assert_eq!(rc, 0, "posix_fadvise failed: {rc}"); - }); +fn evict_os_cache(dir: &std::path::Path) { + use std::os::unix::io::AsRawFd; + let data_path = dir.join("mdbx.dat"); + let file = std::fs::File::open(&data_path).unwrap(); + // SAFETY: fd is valid from File::open. + let rc = unsafe { libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_DONTNEED) }; + assert_eq!(rc, 0, "posix_fadvise failed: {rc}"); + // File (and fd) dropped here. } -/// Drops OS page cache for the environment's data file via posix_fadvise. #[cfg(not(target_os = "linux"))] -fn drop_os_cache(_env: &Environment) { +fn evict_os_cache(_dir: &std::path::Path) { // posix_fadvise not available on macOS; reads will be warm. } // PARITY: evmdb/cold_random_get — DO NOT EDIT without updating evmdb counterpart fn bench_cold_random_get(c: &mut Criterion) { - let (_dir, env) = setup_parity_env(COLD_N_ROWS); - // Open the db handle once — dbi is stable for the environment lifetime. - let db = { - let txn = env.begin_ro_unsync().unwrap(); - txn.open_db(None).unwrap() - }; + let (dir, env) = setup_parity_env(COLD_N_ROWS); + // Drop the env so the mmap is unmapped before we evict cache. + drop(env); let mut rng = StdRng::seed_from_u64(42); let indices: Vec = (0..COLD_LOOKUPS).map(|_| rng.random_range(0..COLD_N_ROWS)).collect(); c.bench_function("cold_random_get", |b| { b.iter(|| { - drop_os_cache(&env); + evict_os_cache(dir.path()); + let env = Environment::builder().open(dir.path()).unwrap(); + let db = { + let txn = env.begin_ro_unsync().unwrap(); + txn.open_db(None).unwrap() + }; for &i in &indices { let key = parity_key(i); let txn = env.begin_ro_unsync().unwrap(); @@ -204,16 +206,17 @@ fn bench_cold_random_get(c: &mut Criterion) { // PARITY: evmdb/cold_sequential_scan — DO NOT EDIT without updating evmdb counterpart fn bench_cold_sequential_scan(c: &mut Criterion) { - let (_dir, env) = setup_parity_env(COLD_N_ROWS); - // Open the db handle once — dbi is stable for the environment lifetime. - let db = { - let txn = env.begin_ro_unsync().unwrap(); - txn.open_db(None).unwrap() - }; + let (dir, env) = setup_parity_env(COLD_N_ROWS); + drop(env); c.bench_function("cold_sequential_scan", |b| { b.iter(|| { - drop_os_cache(&env); + evict_os_cache(dir.path()); + let env = Environment::builder().open(dir.path()).unwrap(); + let db = { + let txn = env.begin_ro_unsync().unwrap(); + txn.open_db(None).unwrap() + }; let txn = env.begin_ro_unsync().unwrap(); let mut cursor = txn.cursor(db).unwrap(); let mut count = 0u32; From 74119962f5aa887f7201f1dfcb1ab476059d38b5 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 2 Apr 2026 11:16:24 -0400 Subject: [PATCH 23/24] refactor: remove evmdb/parity references from benchmarks Rename parity_* helpers to bench_* and strip all PARITY comment annotations. The benchmarks are standalone and no longer need to track an external project. Co-Authored-By: Claude Opus 4.6 (1M context) --- benches/concurrent.rs | 22 ++++++++--------- benches/cursor.rs | 6 ++--- benches/cursor_write.rs | 52 +++++++++++++++++------------------------ benches/scaling.rs | 31 ++++++++++-------------- benches/utils.rs | 34 +++++++++++++-------------- 5 files changed, 62 insertions(+), 83 deletions(-) diff --git a/benches/concurrent.rs b/benches/concurrent.rs index f680968..62cbb97 100644 --- a/benches/concurrent.rs +++ b/benches/concurrent.rs @@ -9,7 +9,7 @@ use std::{ sync::{Arc, Barrier}, thread, }; -use utils::{parity_key, parity_value, quick_config, setup_parity_env_with_max_readers}; +use utils::{bench_key, bench_value, quick_config, setup_bench_env_with_max_readers}; const N_ROWS: u32 = 1_000; const READER_COUNTS: &[usize] = &[1, 4, 8, 32, 128]; @@ -18,14 +18,13 @@ const READER_COUNTS: &[usize] = &[1, 4, 8, 32, 128]; /// overhead threads. const MAX_READERS: u64 = 256; -// PARITY: evmdb/readers_no_writer — DO NOT EDIT without updating evmdb fn bench_n_readers_no_writer(c: &mut Criterion) { let mut group = c.benchmark_group("concurrent::readers_no_writer"); for &n_readers in READER_COUNTS { - let (_dir, env) = setup_parity_env_with_max_readers(N_ROWS, Some(MAX_READERS)); + let (_dir, env) = setup_bench_env_with_max_readers(N_ROWS, Some(MAX_READERS)); let env = Arc::new(env); - let keys: Arc> = Arc::new((0..N_ROWS).map(parity_key).collect()); + let keys: Arc> = Arc::new((0..N_ROWS).map(bench_key).collect()); // Open the db handle once — dbi is stable for the environment lifetime. let db = { let txn = env.begin_ro_sync().unwrap(); @@ -70,14 +69,13 @@ fn bench_n_readers_no_writer(c: &mut Criterion) { group.finish(); } -// PARITY: evmdb/readers_with_writer — DO NOT EDIT without updating evmdb fn bench_n_readers_one_writer(c: &mut Criterion) { let mut group = c.benchmark_group("concurrent::readers_one_writer"); for &n_readers in READER_COUNTS { - let (_dir, env) = setup_parity_env_with_max_readers(N_ROWS, Some(MAX_READERS)); + let (_dir, env) = setup_bench_env_with_max_readers(N_ROWS, Some(MAX_READERS)); let env = Arc::new(env); - let keys: Arc> = Arc::new((0..N_ROWS).map(parity_key).collect()); + let keys: Arc> = Arc::new((0..N_ROWS).map(bench_key).collect()); // Open the db handle once — dbi is stable for the environment lifetime. let db = { let txn = env.begin_ro_sync().unwrap(); @@ -111,7 +109,7 @@ fn bench_n_readers_one_writer(c: &mut Criterion) { }) .collect(); - // Spawn one writer inserting one extra parity-encoded entry. + // Spawn one writer inserting one extra entry. let writer = { let env = Arc::clone(&env); let barrier = Arc::clone(&barrier); @@ -120,8 +118,8 @@ fn bench_n_readers_one_writer(c: &mut Criterion) { let txn = env.begin_rw_sync().unwrap(); txn.put( db, - parity_key(N_ROWS + 1), - parity_value(N_ROWS + 1), + bench_key(N_ROWS + 1), + bench_value(N_ROWS + 1), WriteFlags::empty(), ) .unwrap(); @@ -145,8 +143,8 @@ fn bench_n_readers_one_writer(c: &mut Criterion) { /// Single-thread comparison: sync vs unsync transaction creation. fn bench_single_thread_sync_vs_unsync(c: &mut Criterion) { - let (_dir, env) = setup_parity_env_with_max_readers(N_ROWS, None); - let keys: Arc> = Arc::new((0..N_ROWS).map(parity_key).collect()); + let (_dir, env) = setup_bench_env_with_max_readers(N_ROWS, None); + let keys: Arc> = Arc::new((0..N_ROWS).map(bench_key).collect()); c.bench_function("concurrent::single_thread::sync", |b| { b.iter(|| { diff --git a/benches/cursor.rs b/benches/cursor.rs index 742248d..20687af 100644 --- a/benches/cursor.rs +++ b/benches/cursor.rs @@ -51,10 +51,9 @@ fn bench_get_seq_iter(c: &mut Criterion) { }); } -// PARITY: evmdb/cursor_seek_first_iterate — DO NOT EDIT without updating evmdb fn bench_get_seq_cursor(c: &mut Criterion) { let n = 1000; - let (_dir, env) = setup_parity_env(n); + let (_dir, env) = setup_bench_env(n); // Open the db handle once — dbi is stable for the environment lifetime. let db = { let txn = create_ro_sync(&env); @@ -147,10 +146,9 @@ fn bench_get_seq_iter_single_thread(c: &mut Criterion) { }); } -// PARITY: evmdb/cursor_seek_first_iterate — DO NOT EDIT without updating evmdb fn bench_get_seq_cursor_single_thread(c: &mut Criterion) { let n = 1000; - let (_dir, env) = setup_parity_env(n); + let (_dir, env) = setup_bench_env(n); // Open the db handle once — dbi is stable for the environment lifetime. let db = { let txn = create_ro_unsync(&env); diff --git a/benches/cursor_write.rs b/benches/cursor_write.rs index 29a0e0e..feccf3f 100644 --- a/benches/cursor_write.rs +++ b/benches/cursor_write.rs @@ -26,11 +26,10 @@ fn setup_plain_env(n: u32) -> (tempfile::TempDir, signet_libmdbx::Environment) { // PUT -// PARITY: evmdb/write_put_100 — DO NOT EDIT without updating evmdb fn bench_cursor_put_sync(c: &mut Criterion) { let items: Vec<([u8; 32], [u8; 128])> = - (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); - let (_dir, env) = setup_parity_env(0); + (0..N).map(|i| (bench_key(i), bench_value(i))).collect(); + let (_dir, env) = setup_bench_env(0); c.bench_function("cursor_write::put::sync", |b| { b.iter_batched( @@ -50,11 +49,10 @@ fn bench_cursor_put_sync(c: &mut Criterion) { }); } -// PARITY: evmdb/write_put_100 — DO NOT EDIT without updating evmdb fn bench_cursor_put_unsync(c: &mut Criterion) { let items: Vec<([u8; 32], [u8; 128])> = - (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); - let (_dir, env) = setup_parity_env(0); + (0..N).map(|i| (bench_key(i), bench_value(i))).collect(); + let (_dir, env) = setup_bench_env(0); c.bench_function("cursor_write::put::single_thread", |b| { b.iter_batched( @@ -76,9 +74,8 @@ fn bench_cursor_put_unsync(c: &mut Criterion) { // PUT + COMMIT (durable) -// PARITY: evmdb/write_put_100 (durable) — DO NOT EDIT without updating evmdb fn bench_cursor_put_commit_durable(c: &mut Criterion) { - let (_dir, env) = setup_parity_env(0); + let (_dir, env) = setup_bench_env(0); // Open the db handle once — dbi is stable for the environment lifetime. let db = { let txn = create_rw_unsync(&env); @@ -87,7 +84,7 @@ fn bench_cursor_put_commit_durable(c: &mut Criterion) { db }; // Advancing base counter — each iteration writes to fresh keys, matching - // evmdb's pattern. XOR with a fixed value to produce unsorted order. + // XOR with a fixed value to produce unsorted order. let base = Cell::new(0u32); let xor_mask = 0xDEAD_BEEFu32; @@ -97,8 +94,8 @@ fn bench_cursor_put_commit_durable(c: &mut Criterion) { let txn = create_rw_unsync(&env); let mut cursor = txn.cursor(db).unwrap(); for i in 0..N { - let key = parity_key((b_val + i) ^ xor_mask); - let value = parity_value(b_val + i); + let key = bench_key((b_val + i) ^ xor_mask); + let value = bench_value(b_val + i); cursor.put(key.as_slice(), value.as_slice(), WriteFlags::empty()).unwrap(); } drop(cursor); @@ -108,9 +105,8 @@ fn bench_cursor_put_commit_durable(c: &mut Criterion) { }); } -// PARITY: evmdb/write_put_100_nondurable — DO NOT EDIT without updating evmdb fn bench_cursor_put_commit_nosync(c: &mut Criterion) { - let (_dir, env) = setup_parity_env_nosync(0); + let (_dir, env) = setup_bench_env_nosync(0); // Open the db handle once — dbi is stable for the environment lifetime. let db = { let txn = create_rw_unsync(&env); @@ -119,7 +115,7 @@ fn bench_cursor_put_commit_nosync(c: &mut Criterion) { db }; // Advancing base counter — each iteration writes to fresh keys, matching - // evmdb's pattern. XOR with a fixed value to produce unsorted order. + // XOR with a fixed value to produce unsorted order. let base = Cell::new(0u32); let xor_mask = 0xDEAD_BEEFu32; @@ -129,8 +125,8 @@ fn bench_cursor_put_commit_nosync(c: &mut Criterion) { let txn = create_rw_unsync(&env); let mut cursor = txn.cursor(db).unwrap(); for i in 0..N { - let key = parity_key((b_val + i) ^ xor_mask); - let value = parity_value(b_val + i); + let key = bench_key((b_val + i) ^ xor_mask); + let value = bench_value(b_val + i); cursor.put(key.as_slice(), value.as_slice(), WriteFlags::empty()).unwrap(); } drop(cursor); @@ -142,9 +138,8 @@ fn bench_cursor_put_commit_nosync(c: &mut Criterion) { // APPEND + COMMIT (durable) -// PARITY: evmdb/write_put_100_sorted (durable) — DO NOT EDIT without updating evmdb fn bench_cursor_append_commit_durable(c: &mut Criterion) { - let (_dir, env) = setup_parity_env(0); + let (_dir, env) = setup_bench_env(0); // Open the db handle once — dbi is stable for the environment lifetime. let db = { let txn = create_rw_unsync(&env); @@ -161,8 +156,8 @@ fn bench_cursor_append_commit_durable(c: &mut Criterion) { let txn = create_rw_unsync(&env); let mut cursor = txn.cursor(db).unwrap(); for i in 0..N { - let key = parity_key(b_val + i); - let value = parity_value(b_val + i); + let key = bench_key(b_val + i); + let value = bench_value(b_val + i); cursor.append(key.as_slice(), value.as_slice()).unwrap(); } drop(cursor); @@ -172,9 +167,8 @@ fn bench_cursor_append_commit_durable(c: &mut Criterion) { }); } -// PARITY: evmdb/write_put_100_sorted_nondurable — DO NOT EDIT without updating evmdb fn bench_cursor_append_commit_nosync(c: &mut Criterion) { - let (_dir, env) = setup_parity_env_nosync(0); + let (_dir, env) = setup_bench_env_nosync(0); // Open the db handle once — dbi is stable for the environment lifetime. let db = { let txn = create_rw_unsync(&env); @@ -191,8 +185,8 @@ fn bench_cursor_append_commit_nosync(c: &mut Criterion) { let txn = create_rw_unsync(&env); let mut cursor = txn.cursor(db).unwrap(); for i in 0..N { - let key = parity_key(b_val + i); - let value = parity_value(b_val + i); + let key = bench_key(b_val + i); + let value = bench_value(b_val + i); cursor.append(key.as_slice(), value.as_slice()).unwrap(); } drop(cursor); @@ -242,13 +236,12 @@ fn bench_cursor_del_unsync(c: &mut Criterion) { // APPEND -// PARITY: evmdb/write_put_100_sorted — DO NOT EDIT without updating evmdb fn bench_cursor_append_sync(c: &mut Criterion) { // Keys are big-endian u32 in first 4 bytes — inserting 0..N in order is // already lexicographically sorted, satisfying the append precondition. let items: Vec<([u8; 32], [u8; 128])> = - (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); - let (_dir, env) = setup_parity_env(0); + (0..N).map(|i| (bench_key(i), bench_value(i))).collect(); + let (_dir, env) = setup_bench_env(0); c.bench_function("cursor_write::append::sync", |b| { b.iter_batched( @@ -268,11 +261,10 @@ fn bench_cursor_append_sync(c: &mut Criterion) { }); } -// PARITY: evmdb/write_put_100_sorted — DO NOT EDIT without updating evmdb fn bench_cursor_append_unsync(c: &mut Criterion) { let items: Vec<([u8; 32], [u8; 128])> = - (0..N).map(|i| (parity_key(i), parity_value(i))).collect(); - let (_dir, env) = setup_parity_env(0); + (0..N).map(|i| (bench_key(i), bench_value(i))).collect(); + let (_dir, env) = setup_bench_env(0); c.bench_function("cursor_write::append::single_thread", |b| { b.iter_batched( diff --git a/benches/scaling.rs b/benches/scaling.rs index 8eb4ae6..37c086d 100644 --- a/benches/scaling.rs +++ b/benches/scaling.rs @@ -7,8 +7,8 @@ use signet_libmdbx::{Environment, ObjectLength}; use std::borrow::Cow; use tempfile::tempdir; use utils::{ - is_bench_full, parity_key, parity_value_sized, quick_config, setup_parity_env, - setup_parity_env_sized, + bench_key, bench_value_sized, is_bench_full, quick_config, setup_bench_env, + setup_bench_env_sized, }; const COLD_N_ROWS: u32 = 1_000_000; @@ -16,8 +16,7 @@ const COLD_LOOKUPS: u32 = 1_000; const ENTRY_COUNTS_FULL: &[u32] = &[100, 1_000, 10_000, 100_000]; const ENTRY_COUNTS_QUICK: &[u32] = &[100, 1_000, 10_000]; -/// Value sizes for parity benchmarks. 4096B excluded to match evmdb -/// (triggers a known multi-page overflow bug there). +/// Value sizes for benchmarks. const VALUE_SIZES: &[usize] = &[32, 128, 512]; fn entry_counts() -> &'static [u32] { @@ -33,14 +32,13 @@ fn entry_counts() -> &'static [u32] { } } -// PARITY: evmdb/sequential_get — DO NOT EDIT without updating evmdb fn bench_sequential_get(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::sequential_get"); for &size in VALUE_SIZES { for &n in entry_counts() { - let (_dir, env) = setup_parity_env_sized(n, size); - let keys: Vec<[u8; 32]> = (0..n).map(parity_key).collect(); + let (_dir, env) = setup_bench_env_sized(n, size); + let keys: Vec<[u8; 32]> = (0..n).map(bench_key).collect(); // Open the db handle once — dbi is stable for the environment lifetime. let db = { let txn = env.begin_ro_unsync().unwrap(); @@ -64,14 +62,13 @@ fn bench_sequential_get(c: &mut Criterion) { group.finish(); } -// PARITY: evmdb/random_get — DO NOT EDIT without updating evmdb fn bench_random_get(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::random_get"); for &size in VALUE_SIZES { for &n in entry_counts() { - let (_dir, env) = setup_parity_env_sized(n, size); - let mut keys: Vec<[u8; 32]> = (0..n).map(parity_key).collect(); + let (_dir, env) = setup_bench_env_sized(n, size); + let mut keys: Vec<[u8; 32]> = (0..n).map(bench_key).collect(); keys.shuffle(&mut StdRng::from_seed(Default::default())); // Open the db handle once — dbi is stable for the environment lifetime. let db = { @@ -96,13 +93,12 @@ fn bench_random_get(c: &mut Criterion) { group.finish(); } -// PARITY: evmdb/full_iteration — DO NOT EDIT without updating evmdb fn bench_full_iteration(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::full_iteration"); for &size in VALUE_SIZES { for &n in entry_counts() { - let (_dir, env) = setup_parity_env_sized(n, size); + let (_dir, env) = setup_bench_env_sized(n, size); // Open the db handle once — dbi is stable for the environment lifetime. let db = { let txn = env.begin_ro_unsync().unwrap(); @@ -125,14 +121,13 @@ fn bench_full_iteration(c: &mut Criterion) { group.finish(); } -// PARITY: evmdb/put_sorted — DO NOT EDIT without updating evmdb fn bench_append_ordered_put(c: &mut Criterion) { let mut group = c.benchmark_group("scaling::append_ordered_put"); for &size in VALUE_SIZES { for &n in entry_counts() { let items: Vec<([u8; 32], Vec)> = - (0..n).map(|i| (parity_key(i), parity_value_sized(i, size))).collect(); + (0..n).map(|i| (bench_key(i), bench_value_sized(i, size))).collect(); group.bench_with_input(BenchmarkId::new(format!("{size}B"), n), &n, |b, _| { b.iter_batched( @@ -177,9 +172,8 @@ fn evict_os_cache(_dir: &std::path::Path) { // posix_fadvise not available on macOS; reads will be warm. } -// PARITY: evmdb/cold_random_get — DO NOT EDIT without updating evmdb counterpart fn bench_cold_random_get(c: &mut Criterion) { - let (dir, env) = setup_parity_env(COLD_N_ROWS); + let (dir, env) = setup_bench_env(COLD_N_ROWS); // Drop the env so the mmap is unmapped before we evict cache. drop(env); @@ -195,7 +189,7 @@ fn bench_cold_random_get(c: &mut Criterion) { txn.open_db(None).unwrap() }; for &i in &indices { - let key = parity_key(i); + let key = bench_key(i); let txn = env.begin_ro_unsync().unwrap(); let val: Option> = txn.get(db.dbi(), key.as_slice()).unwrap(); assert!(val.is_some()); @@ -204,9 +198,8 @@ fn bench_cold_random_get(c: &mut Criterion) { }); } -// PARITY: evmdb/cold_sequential_scan — DO NOT EDIT without updating evmdb counterpart fn bench_cold_sequential_scan(c: &mut Criterion) { - let (dir, env) = setup_parity_env(COLD_N_ROWS); + let (dir, env) = setup_bench_env(COLD_N_ROWS); drop(env); c.bench_function("cold_sequential_scan", |b| { diff --git a/benches/utils.rs b/benches/utils.rs index c520326..84e6297 100644 --- a/benches/utils.rs +++ b/benches/utils.rs @@ -86,45 +86,43 @@ pub fn create_rw_unsync(env: &Environment) -> RwTxUnsync { env.begin_rw_unsync().unwrap() } -/// Parity key encoding — matches evmdb's make_key exactly. /// 32-byte key with i as big-endian u32 in the first 4 bytes, rest zeroed. -pub fn parity_key(i: u32) -> [u8; 32] { +pub fn bench_key(i: u32) -> [u8; 32] { let mut key = [0u8; 32]; key[..4].copy_from_slice(&i.to_be_bytes()); key } -/// Parity value encoding — matches evmdb's make_value exactly. /// 128-byte value with i as little-endian u32 in the first 4 bytes, rest zeroed. -pub fn parity_value(i: u32) -> [u8; 128] { +pub fn bench_value(i: u32) -> [u8; 128] { let mut value = [0u8; 128]; value[..4].copy_from_slice(&i.to_le_bytes()); value } -/// Variable-size parity value encoding — matches evmdb's make_value(i, size). +/// Variable-size value encoding. /// Repeats the little-endian u32 bytes of `i` across `size` bytes. -pub fn parity_value_sized(i: u32, size: usize) -> Vec { +pub fn bench_value_sized(i: u32, size: usize) -> Vec { let bytes = i.to_le_bytes(); (0..size).map(|j| bytes[j % 4]).collect() } -/// Set up environment with N rows using parity encoding (default DB only). +/// Set up environment with N rows (default DB only). /// Uses the default durable sync mode. Values are 128 bytes. -pub fn setup_parity_env(n: u32) -> (TempDir, Environment) { - setup_parity_env_with_max_readers(n, None) +pub fn setup_bench_env(n: u32) -> (TempDir, Environment) { + setup_bench_env_with_max_readers(n, None) } -/// Set up environment with N rows using variable-size parity encoding. +/// Set up environment with N rows using variable-size encoding. /// Uses the default durable sync mode. -pub fn setup_parity_env_sized(n: u32, value_size: usize) -> (TempDir, Environment) { +pub fn setup_bench_env_sized(n: u32, value_size: usize) -> (TempDir, Environment) { let dir = tempdir().unwrap(); let env = Environment::builder().open(dir.path()).unwrap(); { let txn = env.begin_rw_unsync().unwrap(); let db = txn.open_db(None).unwrap(); for i in 0..n { - txn.put(db, parity_key(i), parity_value_sized(i, value_size), WriteFlags::empty()) + txn.put(db, bench_key(i), bench_value_sized(i, value_size), WriteFlags::empty()) .unwrap(); } txn.commit().unwrap(); @@ -133,7 +131,7 @@ pub fn setup_parity_env_sized(n: u32, value_size: usize) -> (TempDir, Environmen } /// Set up environment with N rows using SafeNoSync mode (no fsync). -pub fn setup_parity_env_nosync(n: u32) -> (TempDir, Environment) { +pub fn setup_bench_env_nosync(n: u32) -> (TempDir, Environment) { let dir = tempdir().unwrap(); let env = Environment::builder() .set_flags(Mode::ReadWrite { sync_mode: SyncMode::SafeNoSync }.into()) @@ -143,16 +141,16 @@ pub fn setup_parity_env_nosync(n: u32) -> (TempDir, Environment) { let txn = env.begin_rw_unsync().unwrap(); let db = txn.open_db(None).unwrap(); for i in 0..n { - txn.put(db, parity_key(i), parity_value(i), WriteFlags::empty()).unwrap(); + txn.put(db, bench_key(i), bench_value(i), WriteFlags::empty()).unwrap(); } txn.commit().unwrap(); } (dir, env) } -/// Set up environment with N rows using parity encoding and a custom max -/// reader count. Pass [`None`] for the mdbx default (126). -pub fn setup_parity_env_with_max_readers( +/// Set up environment with N rows and a custom max reader count. +/// Pass [`None`] for the mdbx default (126). +pub fn setup_bench_env_with_max_readers( n: u32, max_readers: Option, ) -> (TempDir, Environment) { @@ -166,7 +164,7 @@ pub fn setup_parity_env_with_max_readers( let txn = env.begin_rw_unsync().unwrap(); let db = txn.open_db(None).unwrap(); for i in 0..n { - txn.put(db, parity_key(i), parity_value(i), WriteFlags::empty()).unwrap(); + txn.put(db, bench_key(i), bench_value(i), WriteFlags::empty()).unwrap(); } txn.commit().unwrap(); } From 9faaf713c9b71f2130ad414e577d38f41015815d Mon Sep 17 00:00:00 2001 From: James Date: Thu, 2 Apr 2026 11:51:53 -0400 Subject: [PATCH 24/24] chore: bump version to 0.8.2 Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2a52a7c..7b0f1d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "signet-libmdbx" description = "Idiomatic and safe MDBX wrapper" -version = "0.8.1" +version = "0.8.2" edition = "2024" rust-version = "1.92" license = "MIT OR Apache-2.0"