From fd00e78b0e954a64874620462e3aacfe1f7e3d78 Mon Sep 17 00:00:00 2001
From: SantiagoPittella <pittellasantiago@gmail.com>
Date: Mon, 11 May 2026 11:00:25 -0300
Subject: [PATCH 1/9] feat(benchmark): generate proven transactions

---
 Cargo.lock                |  17 ++
 Cargo.toml                |   2 +
 Makefile                  |   4 +
 bin/benchmark/Cargo.toml  |  30 ++++
 bin/benchmark/README.md   |   0
 bin/benchmark/src/main.rs | 352 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 405 insertions(+)
 create mode 100644 bin/benchmark/Cargo.toml
 create mode 100644 bin/benchmark/README.md
 create mode 100644 bin/benchmark/src/main.rs

diff --git a/Cargo.lock b/Cargo.lock
index 64c6977a46..4af0a7785d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2968,6 +2968,23 @@ dependencies = [
  "thiserror 2.0.18",
 ]
 
+[[package]]
+name = "miden-benchmark"
+version = "0.15.0"
+dependencies = [
+ "anyhow",
+ "clap",
+ "miden-node-block-producer",
+ "miden-node-proto",
+ "miden-node-store",
+ "miden-node-utils",
+ "miden-protocol",
+ "miden-standards",
+ "rand 0.9.2",
+ "tokio",
+ "url",
+]
+
 [[package]]
 name = "miden-block-prover"
 version = "0.15.0"
diff --git a/Cargo.toml b/Cargo.toml
index f0c9c8f2e5..c8168701d3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,6 @@
 [workspace]
 members = [
+  "bin/benchmark",
   "bin/genesis",
   "bin/network-monitor",
   "bin/node",
@@ -43,6 +44,7 @@ debug = true
 
 [workspace.dependencies]
 # Workspace crates.
+miden-benchmark                 = { path = "crates/benchmark", version = "0.15" }
 miden-large-smt-backend-rocksdb = { path = "crates/large-smt-backend-rocksdb", version = "0.15" }
 miden-node-block-producer       = { path = "crates/block-producer", version = "0.15" }
 miden-node-db                   = { path = "crates/db", version = "0.15" }
diff --git a/Makefile b/Makefile
index 2628151c20..1b81264d82 100644
--- a/Makefile
+++ b/Makefile
@@ -129,6 +129,10 @@ install-stress-test: ## Installs stress-test binary
 install-network-monitor: ## Installs network monitor binary
 	cargo install --path bin/network-monitor --locked
 
+.PHONY: install-benchmark
+install-benchmark: ## Installs the benchmark binary
+	cargo install --path bin/benchmark --locked
+
 # --- docker --------------------------------------------------------------------------------------
 
 .PHONY: compose-genesis
diff --git a/bin/benchmark/Cargo.toml b/bin/benchmark/Cargo.toml
new file mode 100644
index 0000000000..737fc51136
--- /dev/null
+++ b/bin/benchmark/Cargo.toml
@@ -0,0 +1,30 @@
+[package]
+authors.workspace      = true
+description            = "A binary to run benchmarks of the Miden network"
+edition.workspace      = true
+exclude.workspace      = true
+homepage.workspace     = true
+keywords               = ["benchmark", "miden", "node"]
+license.workspace      = true
+name                   = "miden-benchmark"
+publish                = true
+readme.workspace       = true
+repository.workspace   = true
+rust-version.workspace = true
+version.workspace      = true
+
+[lints]
+workspace = true
+
+[dependencies]
+anyhow                    = { workspace = true }
+clap                      = { workspace = true, features = ["env", "string"] }
+miden-node-block-producer = { workspace = true }
+miden-node-proto          = { workspace = true }
+miden-node-store          = { workspace = true }
+miden-node-utils          = { workspace = true }
+miden-protocol            = { workspace = true }
+miden-standards           = { workspace = true }
+rand                      = { workspace = true }
+url                       = { features = ["serde"], workspace = true }
+tokio                     = { features = ["full"], workspace = true }
diff --git a/bin/benchmark/README.md b/bin/benchmark/README.md
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/bin/benchmark/src/main.rs b/bin/benchmark/src/main.rs
new file mode 100644
index 0000000000..099b417852
--- /dev/null
+++ b/bin/benchmark/src/main.rs
@@ -0,0 +1,352 @@
+//! Runs benchmarks
+
+use std::sync::{Arc, Mutex};
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use clap::{Parser, Subcommand};
+use miden_node_proto::clients::{Builder, RpcClient};
+use miden_node_proto::generated::rpc::BlockHeaderByNumberRequest;
+use miden_protocol::account::auth::AuthScheme;
+use miden_protocol::account::delta::AccountUpdateDetails;
+use miden_protocol::account::{
+    Account,
+    AccountBuilder,
+    AccountId,
+    AccountStorageMode,
+    AccountType,
+};
+use miden_protocol::asset::{Asset, FungibleAsset, TokenSymbol};
+use miden_protocol::block::{BlockHeader, BlockNumber};
+use miden_protocol::crypto::dsa::falcon512_poseidon2::{PublicKey, SecretKey};
+use miden_protocol::crypto::rand::RandomCoin;
+use miden_protocol::note::Note;
+use miden_protocol::transaction::{
+    InputNoteCommitment,
+    OutputNote,
+    ProvenTransaction,
+    PublicOutputNote,
+    TxAccountUpdate,
+};
+use miden_protocol::vm::ExecutionProof;
+use miden_protocol::{Felt, ONE, Word};
+use miden_standards::account::auth::AuthSingleSig;
+use miden_standards::account::faucets::{BasicFungibleFaucet, TokenMetadata};
+use miden_standards::account::metadata::{FungibleTokenMetadata, TokenName};
+use miden_standards::account::policies::{
+    BurnPolicyConfig,
+    MintPolicyConfig,
+    PolicyAuthority,
+    TokenPolicyManager,
+};
+use miden_standards::account::wallets::BasicWallet;
+use miden_standards::note::P2idNote;
+use rand::Rng;
+use url::Url;
+
+const TOTAL_WALLETS: u64 = 1_000_000;
+
+// COMMANDS
+// ================================================================================================
+
+#[derive(Parser)]
+#[command(version, about, long_about = None)]
+pub struct Cli {
+    #[command(subcommand)]
+    pub command: Command,
+}
+
+#[derive(Subcommand)]
+pub enum Command {
+    CreateProofs,
+    RunBenchmark,
+}
+
+#[tokio::main]
+async fn main() {
+    let cli = Cli::parse();
+    cli.run().await;
+}
+
+impl Cli {
+    async fn run(self) {
+        match self.command {
+            Command::CreateProofs => self.create_proofs().await,
+            Command::RunBenchmark => self.run_benchmark(),
+        }
+    }
+
+    async fn create_proofs(self) {
+        let url = Url::parse("https://rpc.devnet.miden.io").unwrap();
+        let mut rpc_client =
+            create_genesis_aware_rpc_client(&url, Duration::from_secs(10)).await.unwrap();
+
+        // We need to:
+        // 1. Create 1 faucet
+        let mut faucet = create_faucet();
+
+        // 2. Create N wallets
+        let mut wallets = vec![];
+
+        // share random coin seed and key pair for all accounts to avoid key generation overhead
+        let coin_seed: [u64; 4] = rand::rng().random();
+        let rng = Arc::new(Mutex::new(RandomCoin::new(coin_seed.map(Felt::new).into())));
+        let key_pair = {
+            let mut rng = rng.lock().unwrap();
+            SecretKey::with_rng(&mut *rng)
+        };
+
+        for index in 0..TOTAL_WALLETS {
+            let wallet = create_account(key_pair.public_key(), index, AccountStorageMode::Public);
+            wallets.push(wallet);
+        }
+
+        // 3. Create 1 mint tx per wallet
+        let block_header_from_rpc = rpc_client
+            .get_block_header_by_number(get_genesis_header_request())
+            .await
+            .unwrap()
+            .into_inner()
+            .block_header;
+        let genesis_block_header: BlockHeader = block_header_from_rpc.unwrap().try_into().unwrap();
+
+        let mut mint_output_notes = vec![];
+        let mut mint_txs = vec![];
+
+        let faucet_id = faucet.id();
+        for index in 0..TOTAL_WALLETS {
+            let note = {
+                let mut rng = rng.lock().unwrap();
+                create_mint_note(faucet_id.clone(), wallets[index as usize].id().clone(), &mut rng)
+            };
+            mint_output_notes.push(note.clone());
+
+            let mint_tx = create_mint_tx(&genesis_block_header, &mut faucet, vec![note]);
+            mint_txs.push(mint_tx);
+        }
+        // 4. Create 1 consume tx per mint
+        let mut consume_txs = vec![];
+
+        for index in 0..TOTAL_WALLETS {
+            let tx = create_consume_tx(
+                &genesis_block_header,
+                &mut wallets[index as usize],
+                mint_output_notes[index as usize].clone(),
+            );
+
+            consume_txs.push(tx);
+        }
+
+        // Save everything to files
+    }
+
+    fn run_benchmark(self) {
+        println!("run_benchmark");
+    }
+}
+
+/// Create an RPC client configured with the correct genesis metadata in the
+/// `Accept` header so that write RPCs such as `SubmitProvenTransaction` are
+/// accepted by the node.
+pub async fn create_genesis_aware_rpc_client(
+    rpc_url: &Url,
+    timeout: Duration,
+) -> Result<RpcClient> {
+    // First, create a temporary client without genesis metadata to discover the
+    // genesis block header and its commitment.
+    let mut rpc: RpcClient = Builder::new(rpc_url.clone())
+        .with_tls()
+        .context("Failed to configure TLS for RPC client")?
+        .with_timeout(timeout)
+        .without_metadata_version()
+        .without_metadata_genesis()
+        .without_otel_context_injection()
+        .connect()
+        .await
+        .context("Failed to create RPC client for genesis discovery")?;
+
+    let response = rpc
+        .get_block_header_by_number(get_genesis_header_request())
+        .await
+        .context("Failed to get genesis block header from RPC")?
+        .into_inner();
+
+    let genesis_block_header = response
+        .block_header
+        .ok_or_else(|| anyhow::anyhow!("No block header in response"))?;
+
+    let genesis_header: BlockHeader =
+        genesis_block_header.try_into().context("Failed to convert block header")?;
+
+    let genesis_commitment = genesis_header.commitment();
+    let genesis = genesis_commitment.to_hex();
+
+    // Rebuild the client, this time including the required genesis metadata so that
+    // write RPCs like SubmitProvenTransaction are accepted by the node.
+    let rpc_client = Builder::new(rpc_url.clone())
+        .with_tls()
+        .context("Failed to configure TLS for RPC client")?
+        .with_timeout(timeout)
+        .without_metadata_version()
+        .with_metadata_genesis(genesis)
+        .without_otel_context_injection()
+        .connect()
+        .await
+        .context("Failed to connect to RPC server with genesis metadata")?;
+
+    Ok(rpc_client)
+}
+
+fn get_genesis_header_request() -> BlockHeaderByNumberRequest {
+    BlockHeaderByNumberRequest {
+        block_num: Some(BlockNumber::GENESIS.as_u32()),
+        include_mmr_proof: None,
+    }
+}
+
+/// Creates a new faucet account.
+fn create_faucet() -> Account {
+    let coin_seed: [u64; 4] = rand::rng().random();
+    let mut rng = RandomCoin::new(coin_seed.map(Felt::new).into());
+    let key_pair = SecretKey::with_rng(&mut rng);
+    let init_seed = [0_u8; 32];
+
+    let token_symbol = TokenSymbol::new("TEST").unwrap();
+    let token_metadata = FungibleTokenMetadata::builder(
+        TokenName::new("TEST").unwrap(),
+        token_symbol,
+        2,
+        FungibleAsset::MAX_AMOUNT,
+    )
+    .build()
+    .unwrap();
+    AccountBuilder::new(init_seed)
+        .account_type(AccountType::FungibleFaucet)
+        .storage_mode(AccountStorageMode::Private)
+        .with_component(token_metadata)
+        .with_component(BasicFungibleFaucet)
+        .with_components(TokenPolicyManager::new(
+            PolicyAuthority::AuthControlled,
+            MintPolicyConfig::AllowAll,
+            BurnPolicyConfig::AllowAll,
+        ))
+        .with_auth_component(AuthSingleSig::new(
+            key_pair.public_key().into(),
+            AuthScheme::Falcon512Poseidon2,
+        ))
+        .build()
+        .unwrap()
+}
+
+/// Creates a new wallet account with a given public key.
+fn create_account(public_key: PublicKey, index: u64, storage_mode: AccountStorageMode) -> Account {
+    let init_seed: Vec<_> = index.to_be_bytes().into_iter().chain([0u8; 24]).collect();
+    AccountBuilder::new(init_seed.try_into().unwrap())
+        .account_type(AccountType::RegularAccountImmutableCode)
+        .storage_mode(storage_mode)
+        .with_auth_component(AuthSingleSig::new(public_key.into(), AuthScheme::Falcon512Poseidon2))
+        .with_component(BasicWallet)
+        .build()
+        .unwrap()
+}
+
+/// Creates a public P2ID note containing 10 tokens of the fungible asset associated with the
+/// specified `faucet_id` and sent to the specified target account.
+fn create_mint_note(faucet_id: AccountId, target_id: AccountId, rng: &mut RandomCoin) -> Note {
+    let asset = Asset::Fungible(FungibleAsset::new(faucet_id, 10).unwrap());
+    P2idNote::create(
+        faucet_id,
+        target_id,
+        vec![asset],
+        miden_protocol::note::NoteType::Public,
+        miden_protocol::note::NoteAttachment::default(),
+        rng,
+    )
+    .expect("note creation failed")
+}
+
+/// Creates a transaction from the faucet that creates the given output notes.
+/// Updates the faucet account to increase the issuance slot and it's nonce.
+fn create_mint_tx(
+    block_ref: &BlockHeader,
+    faucet: &mut Account,
+    output_notes: Vec<Note>,
+) -> ProvenTransaction {
+    let initial_account_hash = faucet.to_commitment();
+
+    let metadata_slot_name = TokenMetadata::metadata_slot();
+    let slot = faucet.storage().get_item(metadata_slot_name).unwrap();
+    faucet
+        .storage_mut()
+        .set_item(metadata_slot_name, [slot[0] + Felt::new(10), slot[1], slot[2], slot[3]].into())
+        .unwrap();
+
+    faucet.increment_nonce(ONE).unwrap();
+
+    let account_update = TxAccountUpdate::new(
+        faucet.id(),
+        initial_account_hash,
+        faucet.to_commitment(),
+        Word::empty(),
+        AccountUpdateDetails::Private,
+    )
+    .unwrap();
+    ProvenTransaction::new(
+        account_update,
+        Vec::<InputNoteCommitment>::new(),
+        output_notes
+            .into_iter()
+            .map(|note| OutputNote::Public(PublicOutputNote::new(note).unwrap()))
+            .collect::<Vec<OutputNote>>(),
+        block_ref.block_num(),
+        block_ref.commitment(),
+        FungibleAsset::new(
+            block_ref.fee_parameters().fee_faucet_id(),
+            u64::from(block_ref.fee_parameters().verification_base_fee()),
+        )
+        .unwrap(),
+        u32::MAX.into(),
+        ExecutionProof::new_dummy(),
+    )
+    .unwrap()
+}
+
+/// Creates a transaction from the wallet that will the given output note.
+fn create_consume_tx(
+    block_ref: &BlockHeader,
+    wallet: &mut Account,
+    input_note: Note,
+) -> ProvenTransaction {
+    let initial_account_hash = wallet.to_commitment();
+
+    wallet.increment_nonce(ONE).unwrap();
+
+    let account_update = TxAccountUpdate::new(
+        wallet.id(),
+        initial_account_hash,
+        wallet.to_commitment(),
+        Word::empty(),
+        AccountUpdateDetails::Private,
+    )
+    .unwrap();
+
+    let nullifier = input_note.nullifier();
+    let header = input_note.header().clone();
+    let input_note_commitment = InputNoteCommitment::from_parts_unchecked(nullifier, Some(header));
+
+    ProvenTransaction::new(
+        account_update,
+        vec![input_note_commitment],
+        Vec::<OutputNote>::new(),
+        block_ref.block_num(),
+        block_ref.commitment(),
+        FungibleAsset::new(
+            block_ref.fee_parameters().fee_faucet_id(),
+            u64::from(block_ref.fee_parameters().verification_base_fee()),
+        )
+        .unwrap(),
+        u32::MAX.into(),
+        ExecutionProof::new_dummy(),
+    )
+    .unwrap()
+}

From 5b52f36674d8a6c5ccc74774eef74324506f4f99 Mon Sep 17 00:00:00 2001
From: SantiagoPittella <pittellasantiago@gmail.com>
Date: Tue, 12 May 2026 11:43:10 -0300
Subject: [PATCH 2/9] feat(benchmark): submit proven transactions

---
 Cargo.lock                |   2 +
 Cargo.toml                |   2 +-
 bin/benchmark/Cargo.toml  |   6 +-
 bin/benchmark/src/main.rs | 734 ++++++++++++++++++++++++++++----------
 4 files changed, 562 insertions(+), 182 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 4af0a7785d..b24fd37825 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2980,7 +2980,9 @@ dependencies = [
  "miden-node-utils",
  "miden-protocol",
  "miden-standards",
+ "miden-tx",
  "rand 0.9.2",
+ "rayon",
  "tokio",
  "url",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index c8168701d3..1fea4309dd 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -44,7 +44,7 @@ debug = true
 
 [workspace.dependencies]
 # Workspace crates.
-miden-benchmark                 = { path = "crates/benchmark", version = "0.15" }
+miden-benchmark                 = { path = "bin/benchmark", version = "0.15" }
 miden-large-smt-backend-rocksdb = { path = "crates/large-smt-backend-rocksdb", version = "0.15" }
 miden-node-block-producer       = { path = "crates/block-producer", version = "0.15" }
 miden-node-db                   = { path = "crates/db", version = "0.15" }
diff --git a/bin/benchmark/Cargo.toml b/bin/benchmark/Cargo.toml
index 737fc51136..bb12efb267 100644
--- a/bin/benchmark/Cargo.toml
+++ b/bin/benchmark/Cargo.toml
@@ -23,8 +23,10 @@ miden-node-block-producer = { workspace = true }
 miden-node-proto          = { workspace = true }
 miden-node-store          = { workspace = true }
 miden-node-utils          = { workspace = true }
-miden-protocol            = { workspace = true }
+miden-protocol            = { features = ["std", "testing"], workspace = true }
 miden-standards           = { workspace = true }
+miden-tx                  = { features = ["concurrent", "std"], workspace = true }
 rand                      = { workspace = true }
-url                       = { features = ["serde"], workspace = true }
+rayon                     = { workspace = true }
 tokio                     = { features = ["full"], workspace = true }
+url                       = { features = ["serde"], workspace = true }
diff --git a/bin/benchmark/src/main.rs b/bin/benchmark/src/main.rs
index 099b417852..26c97aaf83 100644
--- a/bin/benchmark/src/main.rs
+++ b/bin/benchmark/src/main.rs
@@ -1,37 +1,45 @@
 //! Runs benchmarks
 
-use std::sync::{Arc, Mutex};
-use std::time::Duration;
+use std::collections::{BTreeSet, HashMap};
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::time::{Duration, Instant};
 
 use anyhow::{Context, Result};
 use clap::{Parser, Subcommand};
 use miden_node_proto::clients::{Builder, RpcClient};
+use miden_node_proto::generated as proto;
 use miden_node_proto::generated::rpc::BlockHeaderByNumberRequest;
-use miden_protocol::account::auth::AuthScheme;
-use miden_protocol::account::delta::AccountUpdateDetails;
+use miden_protocol::account::auth::{AuthScheme, AuthSecretKey};
 use miden_protocol::account::{
     Account,
     AccountBuilder,
     AccountId,
     AccountStorageMode,
     AccountType,
+    PartialAccount,
+    StorageMapKey,
 };
-use miden_protocol::asset::{Asset, FungibleAsset, TokenSymbol};
+use miden_protocol::asset::{Asset, AssetVaultKey, AssetWitness, FungibleAsset, TokenSymbol};
 use miden_protocol::block::{BlockHeader, BlockNumber};
-use miden_protocol::crypto::dsa::falcon512_poseidon2::{PublicKey, SecretKey};
+use miden_protocol::crypto::dsa::falcon512_poseidon2::SecretKey;
+use miden_protocol::crypto::merkle::mmr::{MmrPeaks, PartialMmr};
 use miden_protocol::crypto::rand::RandomCoin;
-use miden_protocol::note::Note;
+use miden_protocol::note::{Note, NoteScript, NoteScriptRoot};
 use miden_protocol::transaction::{
-    InputNoteCommitment,
-    OutputNote,
+    AccountInputs,
+    InputNote,
+    InputNotes,
+    PartialBlockchain,
     ProvenTransaction,
-    PublicOutputNote,
-    TxAccountUpdate,
+    TransactionArgs,
 };
-use miden_protocol::vm::ExecutionProof;
-use miden_protocol::{Felt, ONE, Word};
+use miden_protocol::utils::serde::{Deserializable, Serializable};
+use miden_protocol::{Felt, MastForest, Word};
 use miden_standards::account::auth::AuthSingleSig;
-use miden_standards::account::faucets::{BasicFungibleFaucet, TokenMetadata};
+use miden_standards::account::faucets::BasicFungibleFaucet;
+use miden_standards::account::interface::{AccountInterface, AccountInterfaceExt};
 use miden_standards::account::metadata::{FungibleTokenMetadata, TokenName};
 use miden_standards::account::policies::{
     BurnPolicyConfig,
@@ -41,10 +49,21 @@ use miden_standards::account::policies::{
 };
 use miden_standards::account::wallets::BasicWallet;
 use miden_standards::note::P2idNote;
+use miden_tx::auth::BasicAuthenticator;
+use miden_tx::{
+    DataStore,
+    DataStoreError,
+    LocalTransactionProver,
+    MastForestStore,
+    TransactionExecutor,
+    TransactionMastStore,
+};
 use rand::Rng;
+use rayon::prelude::*;
+use tokio::sync::Semaphore;
 use url::Url;
 
-const TOTAL_WALLETS: u64 = 1_000_000;
+const PROOFS_DIR: &str = "./benchmark-proofs";
 
 // COMMANDS
 // ================================================================================================
@@ -58,8 +77,25 @@ pub struct Cli {
 
 #[derive(Subcommand)]
 pub enum Command {
-    CreateProofs,
-    RunBenchmark,
+    CreateProofs {
+        /// RPC endpoint of the target miden node — used to discover the
+        /// genesis commitment that the generated proofs are bound to. Must
+        /// match the node you intend to submit the proofs against.
+        #[arg(long, default_value = "http://127.0.0.1:57291")]
+        rpc_url: Url,
+        /// Number of mint + consume transaction pairs to generate. Each
+        /// pair takes seconds of real STARK proving, so start small.
+        #[arg(long, default_value_t = 10)]
+        num_transactions: u64,
+    },
+    RunBenchmark {
+        /// RPC endpoint of the target miden node.
+        #[arg(long, default_value = "http://127.0.0.1:57291")]
+        rpc_url: Url,
+        /// Number of concurrent submission tasks.
+        #[arg(long, default_value_t = 32)]
+        concurrency: usize,
+    },
 }
 
 #[tokio::main]
@@ -71,77 +107,388 @@ async fn main() {
 impl Cli {
     async fn run(self) {
         match self.command {
-            Command::CreateProofs => self.create_proofs().await,
-            Command::RunBenchmark => self.run_benchmark(),
+            Command::CreateProofs { rpc_url, num_transactions } => {
+                create_proofs(rpc_url, num_transactions).await
+            },
+            Command::RunBenchmark { rpc_url, concurrency } => {
+                run_benchmark(rpc_url, concurrency).await
+            },
         }
     }
+}
+
+async fn create_proofs(rpc_url: Url, num_transactions: u64) {
+    let mut rpc_client =
+        create_genesis_aware_rpc_client(&rpc_url, Duration::from_secs(10)).await.unwrap();
 
-    async fn create_proofs(self) {
-        let url = Url::parse("https://rpc.devnet.miden.io").unwrap();
-        let mut rpc_client =
-            create_genesis_aware_rpc_client(&url, Duration::from_secs(10)).await.unwrap();
+    println!("Fetching genesis block header from {rpc_url}...");
+    let genesis_header_proto = rpc_client
+        .get_block_header_by_number(get_genesis_header_request())
+        .await
+        .unwrap()
+        .into_inner()
+        .block_header
+        .expect("RPC returned no block header");
+    let genesis_header: BlockHeader = genesis_header_proto.try_into().unwrap();
 
-        // We need to:
-        // 1. Create 1 faucet
-        let mut faucet = create_faucet();
+    println!("Creating faucet...");
+    let (mut faucet, faucet_secret_key) = create_faucet();
 
-        // 2. Create N wallets
-        let mut wallets = vec![];
+    let coin_seed: [u64; 4] = rand::rng().random();
+    let mut seed_rng = RandomCoin::new(coin_seed.map(Felt::new).into());
+    let wallet_secret_key = SecretKey::with_rng(&mut seed_rng);
+    let wallet_public_key = wallet_secret_key.public_key();
+
+    println!("Creating {num_transactions} wallets in parallel...");
+    let wallets: Vec<Account> = (0..num_transactions)
+        .into_par_iter()
+        .map(|index| create_wallet(wallet_public_key.clone(), index))
+        .collect();
+
+    let genesis_chain_mmr =
+        PartialBlockchain::new(PartialMmr::from_peaks(MmrPeaks::default()), Vec::new())
+            .expect("failed to create empty chain MMR");
+
+    let mut data_store = BenchmarkDataStore::new(genesis_header.clone(), genesis_chain_mmr);
+    data_store.add_account(faucet.clone());
+    for wallet in &wallets {
+        data_store.add_account(wallet.clone());
+    }
 
-        // share random coin seed and key pair for all accounts to avoid key generation overhead
-        let coin_seed: [u64; 4] = rand::rng().random();
-        let rng = Arc::new(Mutex::new(RandomCoin::new(coin_seed.map(Felt::new).into())));
-        let key_pair = {
-            let mut rng = rng.lock().unwrap();
-            SecretKey::with_rng(&mut *rng)
+    let authenticator = BasicAuthenticator::new(&[
+        AuthSecretKey::Falcon512Poseidon2(faucet_secret_key),
+        AuthSecretKey::Falcon512Poseidon2(wallet_secret_key),
+    ]);
+
+    let prover = LocalTransactionProver::default();
+    let faucet_id = faucet.id();
+
+    // Mint phase — sequential because each mint mutates the faucet.
+    println!("Proving {num_transactions} mint transactions (sequential)...");
+    let mut mint_txs: Vec<ProvenTransaction> = Vec::with_capacity(num_transactions as usize);
+    let mut mint_tx_inputs: Vec<Vec<u8>> = Vec::with_capacity(num_transactions as usize);
+    let mut mint_notes: Vec<Note> = Vec::with_capacity(num_transactions as usize);
+
+    for index in 0..num_transactions {
+        let wallet_id = wallets[index as usize].id();
+        let note = {
+            let asset = Asset::Fungible(FungibleAsset::new(faucet_id, 10).unwrap());
+            P2idNote::create(
+                faucet_id,
+                wallet_id,
+                vec![asset],
+                miden_protocol::note::NoteType::Public,
+                miden_protocol::note::NoteAttachment::default(),
+                &mut seed_rng,
+            )
+            .expect("note creation failed")
         };
 
-        for index in 0..TOTAL_WALLETS {
-            let wallet = create_account(key_pair.public_key(), index, AccountStorageMode::Public);
-            wallets.push(wallet);
-        }
+        let account_interface = AccountInterface::from_account(&faucet);
+        let script = account_interface
+            .build_send_notes_script(&[note.clone().into()], None)
+            .expect("failed to build mint send-notes script");
+
+        let mut tx_args = TransactionArgs::default().with_tx_script(script);
+        tx_args.add_output_note_recipient(Box::new(note.recipient().clone()));
+
+        let executor =
+            TransactionExecutor::new(&data_store).with_authenticator(&authenticator);
 
-        // 3. Create 1 mint tx per wallet
-        let block_header_from_rpc = rpc_client
-            .get_block_header_by_number(get_genesis_header_request())
+        let executed_tx = Box::pin(executor.execute_transaction(
+            faucet_id,
+            genesis_header.block_num(),
+            InputNotes::default(),
+            tx_args,
+        ))
+        .await
+        .expect("failed to execute mint transaction");
+
+        let tx_inputs_bytes = executed_tx.tx_inputs().to_bytes();
+        let delta = executed_tx.account_delta().clone();
+
+        let proven_tx = prover
+            .prove(executed_tx)
             .await
-            .unwrap()
-            .into_inner()
-            .block_header;
-        let genesis_block_header: BlockHeader = block_header_from_rpc.unwrap().try_into().unwrap();
-
-        let mut mint_output_notes = vec![];
-        let mut mint_txs = vec![];
-
-        let faucet_id = faucet.id();
-        for index in 0..TOTAL_WALLETS {
-            let note = {
-                let mut rng = rng.lock().unwrap();
-                create_mint_note(faucet_id.clone(), wallets[index as usize].id().clone(), &mut rng)
-            };
-            mint_output_notes.push(note.clone());
+            .expect("failed to prove mint transaction");
+
+        // Evolve the faucet state for the next iteration. The first mint of a
+        // never-before-seen account produces a full-state delta (because the
+        // delta carries the freshly deployed code); subsequent mints produce
+        // partial-state deltas that can be applied incrementally.
+        if delta.is_full_state() {
+            faucet = Account::try_from(&delta)
+                .expect("failed to materialize faucet from full-state delta");
+        } else {
+            faucet
+                .apply_delta(&delta)
+                .expect("failed to apply faucet delta");
+        }
+        data_store.update_account(faucet.clone());
 
-            let mint_tx = create_mint_tx(&genesis_block_header, &mut faucet, vec![note]);
-            mint_txs.push(mint_tx);
+        mint_txs.push(proven_tx);
+        mint_tx_inputs.push(tx_inputs_bytes);
+        mint_notes.push(note);
+
+        if (index + 1) % 10 == 0 || index + 1 == num_transactions {
+            println!("  proved {} / {num_transactions} mint txs", index + 1);
         }
-        // 4. Create 1 consume tx per mint
-        let mut consume_txs = vec![];
+    }
+
+    // Consume phase — also sequential for now (each tx is one wallet, independent
+    // wallets, so this could be parallelized later with bounded concurrency).
+    println!("Proving {num_transactions} consume transactions (sequential)...");
+    let mut consume_txs: Vec<ProvenTransaction> = Vec::with_capacity(num_transactions as usize);
+    let mut consume_tx_inputs: Vec<Vec<u8>> = Vec::with_capacity(num_transactions as usize);
+
+    for index in 0..num_transactions {
+        let wallet_id = wallets[index as usize].id();
+        let note = mint_notes[index as usize].clone();
+        let input_note = InputNote::Unauthenticated { note };
+        let input_notes = InputNotes::new(vec![input_note])
+            .expect("failed to construct input notes for consume");
+
+        let executor =
+            TransactionExecutor::new(&data_store).with_authenticator(&authenticator);
+
+        let executed_tx = Box::pin(executor.execute_transaction(
+            wallet_id,
+            genesis_header.block_num(),
+            input_notes,
+            TransactionArgs::default(),
+        ))
+        .await
+        .expect("failed to execute consume transaction");
 
-        for index in 0..TOTAL_WALLETS {
-            let tx = create_consume_tx(
-                &genesis_block_header,
-                &mut wallets[index as usize],
-                mint_output_notes[index as usize].clone(),
-            );
+        let tx_inputs_bytes = executed_tx.tx_inputs().to_bytes();
 
-            consume_txs.push(tx);
+        let proven_tx = prover
+            .prove(executed_tx)
+            .await
+            .expect("failed to prove consume transaction");
+
+        consume_txs.push(proven_tx);
+        consume_tx_inputs.push(tx_inputs_bytes);
+
+        if (index + 1) % 10 == 0 || index + 1 == num_transactions {
+            println!("  proved {} / {num_transactions} consume txs", index + 1);
         }
+    }
+
+    let out_dir = PathBuf::from(PROOFS_DIR);
+    println!("Writing proofs to {}/", out_dir.display());
+    std::fs::create_dir_all(&out_dir).unwrap();
+    std::fs::write(out_dir.join("faucet.bin"), faucet.to_bytes()).unwrap();
+    std::fs::write(out_dir.join("wallets.bin"), wallets.to_bytes()).unwrap();
+    std::fs::write(out_dir.join("mint_txs.bin"), mint_txs.to_bytes()).unwrap();
+    std::fs::write(out_dir.join("mint_tx_inputs.bin"), mint_tx_inputs.to_bytes()).unwrap();
+    std::fs::write(out_dir.join("consume_txs.bin"), consume_txs.to_bytes()).unwrap();
+    std::fs::write(out_dir.join("consume_tx_inputs.bin"), consume_tx_inputs.to_bytes()).unwrap();
+    println!("Done.");
+}
+
+async fn run_benchmark(rpc_url: Url, concurrency: usize) {
+    let in_dir = PathBuf::from(PROOFS_DIR);
+
+    println!("Loading mint txs from {}", in_dir.join("mint_txs.bin").display());
+    let mint_txs = read_proven_txs(&in_dir.join("mint_txs.bin"));
+    let mint_tx_inputs = read_tx_inputs(&in_dir.join("mint_tx_inputs.bin"));
+    assert_eq!(mint_txs.len(), mint_tx_inputs.len(), "mint tx/inputs length mismatch");
+
+    println!("Loading consume txs from {}", in_dir.join("consume_txs.bin").display());
+    let consume_txs = read_proven_txs(&in_dir.join("consume_txs.bin"));
+    let consume_tx_inputs = read_tx_inputs(&in_dir.join("consume_tx_inputs.bin"));
+    assert_eq!(consume_txs.len(), consume_tx_inputs.len(), "consume tx/inputs length mismatch");
+
+    println!("Connecting to {rpc_url}...");
+    let rpc_client = create_genesis_aware_rpc_client(&rpc_url, Duration::from_secs(30))
+        .await
+        .expect("failed to create RPC client");
+
+    let h_start = current_block_height(rpc_client.clone()).await;
+    println!("Chain height at start: {h_start}");
+
+    println!(
+        "Submitting {} mint txs sequentially (each one mutates the shared faucet, so the \
+         submits must be serialized for the mempool to chain them)...",
+        mint_txs.len()
+    );
+    let (mint_ok, mint_err, mint_elapsed) =
+        submit_sequential(rpc_client.clone(), mint_txs, mint_tx_inputs).await;
+    println!(
+        "  mint: ok={mint_ok} err={mint_err} in {:.1}s ({:.2} tx/s)",
+        mint_elapsed.as_secs_f64(),
+        mint_ok as f64 / mint_elapsed.as_secs_f64()
+    );
+
+    println!("Submitting {} consume txs with concurrency={concurrency}...", consume_txs.len());
+    let (consume_ok, consume_err, consume_elapsed) =
+        submit_all(rpc_client.clone(), consume_txs, consume_tx_inputs, concurrency).await;
+    println!(
+        "  consume: ok={consume_ok} err={consume_err} in {:.1}s ({:.0} tx/s)",
+        consume_elapsed.as_secs_f64(),
+        consume_ok as f64 / consume_elapsed.as_secs_f64()
+    );
+
+    println!("Waiting 3 blocks for the last submissions to land...");
+    let h_final = wait_for_n_blocks(rpc_client.clone(), 3).await;
+
+    let total_submitted = mint_ok + consume_ok;
+    let total_submission_secs = (mint_elapsed + consume_elapsed).as_secs_f64();
+    println!();
+    println!("=== Summary ===");
+    println!("Chain height: {h_start} -> {h_final} ({} blocks)", h_final - h_start);
+    println!("Total successful submissions: {total_submitted}");
+    println!("Total submission time: {:.1}s", total_submission_secs);
+    println!("Submission TPS: {:.0}", total_submitted as f64 / total_submission_secs);
+}
+
+fn read_proven_txs(path: &std::path::Path) -> Vec<ProvenTransaction> {
+    let bytes = std::fs::read(path).unwrap_or_else(|_| {
+        panic!(
+            "failed to read {} — run `create-proofs` first",
+            path.display()
+        )
+    });
+    Vec::<ProvenTransaction>::read_from_bytes(&bytes)
+        .unwrap_or_else(|_| panic!("failed to deserialize {}", path.display()))
+}
+
+fn read_tx_inputs(path: &std::path::Path) -> Vec<Vec<u8>> {
+    let bytes = std::fs::read(path).unwrap_or_else(|_| {
+        panic!(
+            "failed to read {} — run `create-proofs` first",
+            path.display()
+        )
+    });
+    Vec::<Vec<u8>>::read_from_bytes(&bytes)
+        .unwrap_or_else(|_| panic!("failed to deserialize {}", path.display()))
+}
+
+async fn submit_all(
+    client: RpcClient,
+    txs: Vec<ProvenTransaction>,
+    tx_inputs: Vec<Vec<u8>>,
+    concurrency: usize,
+) -> (u64, u64, Duration) {
+    /// How many distinct error messages to surface to the console.
+    const MAX_ERRORS_TO_PRINT: u64 = 5;
+
+    let start = Instant::now();
+    let semaphore = Arc::new(Semaphore::new(concurrency));
+    let ok = Arc::new(AtomicU64::new(0));
+    let err = Arc::new(AtomicU64::new(0));
+    let printed = Arc::new(AtomicU64::new(0));
+
+    let mut handles = Vec::with_capacity(txs.len());
+    for (i, (tx, inputs)) in txs.into_iter().zip(tx_inputs.into_iter()).enumerate() {
+        let permit = semaphore.clone().acquire_owned().await.unwrap();
+        let mut client = client.clone();
+        let ok = ok.clone();
+        let err = err.clone();
+        let printed = printed.clone();
+        handles.push(tokio::spawn(async move {
+            let request = proto::transaction::ProvenTransaction {
+                transaction: tx.to_bytes(),
+                transaction_inputs: Some(inputs),
+            };
+            match client.submit_proven_transaction(request).await {
+                Ok(_) => {
+                    ok.fetch_add(1, Ordering::Relaxed);
+                },
+                Err(status) => {
+                    err.fetch_add(1, Ordering::Relaxed);
+                    if printed.fetch_add(1, Ordering::Relaxed) < MAX_ERRORS_TO_PRINT {
+                        eprintln!(
+                            "  tx idx {i} failed: code={:?} message={}",
+                            status.code(),
+                            status.message()
+                        );
+                    }
+                },
+            }
+            drop(permit);
+        }));
+    }
+    for h in handles {
+        let _ = h.await;
+    }
+
+    (ok.load(Ordering::Relaxed), err.load(Ordering::Relaxed), start.elapsed())
+}
 
-        // Save everything to files
+/// Submit txs one at a time, awaiting each RPC response before sending the
+/// next. Used for the mint phase, where every tx mutates the shared faucet
+/// and therefore must arrive at the mempool in order — the block-producer's
+/// mempool will reject out-of-order submissions but happily chains in-order
+/// ones against its own pending state, so we only need to serialize the
+/// `submit_proven_transaction` calls themselves, not wait for block
+/// inclusion in between.
+async fn submit_sequential(
+    client: RpcClient,
+    txs: Vec<ProvenTransaction>,
+    tx_inputs: Vec<Vec<u8>>,
+) -> (u64, u64, Duration) {
+    let start = Instant::now();
+    let mut ok: u64 = 0;
+    let mut err: u64 = 0;
+    let total = txs.len();
+
+    for (i, (tx, inputs)) in txs.into_iter().zip(tx_inputs.into_iter()).enumerate() {
+        let request = proto::transaction::ProvenTransaction {
+            transaction: tx.to_bytes(),
+            transaction_inputs: Some(inputs),
+        };
+
+        let mut submit_client = client.clone();
+        match submit_client.submit_proven_transaction(request).await {
+            Ok(_) => ok += 1,
+            Err(e) => {
+                err += 1;
+                eprintln!("  tx {} / {total} failed: {}", i + 1, e);
+            },
+        }
     }
 
-    fn run_benchmark(self) {
-        println!("run_benchmark");
+    println!("  submitted {total} (ok={ok} err={err})");
+    (ok, err, start.elapsed())
+}
+
+async fn current_block_height(mut client: RpcClient) -> u32 {
+    let response = client
+        .get_block_header_by_number(BlockHeaderByNumberRequest {
+            block_num: None,
+            include_mmr_proof: None,
+        })
+        .await
+        .expect("failed to fetch latest block header")
+        .into_inner();
+    let header: BlockHeader = response
+        .block_header
+        .expect("no block header in response")
+        .try_into()
+        .expect("failed to decode block header");
+    header.block_num().as_u32()
+}
+
+/// Wait until the chain has advanced by `n` blocks past whatever the current
+/// height is, then return. Used to give the block-producer time to include
+/// in-flight submissions without falsely waiting forever (the node produces
+/// empty blocks at a steady interval, so "no height change" never fires).
+async fn wait_for_n_blocks(client: RpcClient, n: u32) -> u32 {
+    let start_height = current_block_height(client.clone()).await;
+    let target = start_height + n;
+    let mut last = start_height;
+    loop {
+        tokio::time::sleep(Duration::from_millis(500)).await;
+        let h = current_block_height(client.clone()).await;
+        if h != last {
+            println!("  block height: {h}");
+            last = h;
+        }
+        if h >= target {
+            return h;
+        }
     }
 }
 
@@ -152,11 +499,15 @@ pub async fn create_genesis_aware_rpc_client(
     rpc_url: &Url,
     timeout: Duration,
 ) -> Result<RpcClient> {
-    // First, create a temporary client without genesis metadata to discover the
-    // genesis block header and its commitment.
-    let mut rpc: RpcClient = Builder::new(rpc_url.clone())
-        .with_tls()
-        .context("Failed to configure TLS for RPC client")?
+    let use_tls = rpc_url.scheme() == "https";
+
+    let tls_stage = Builder::new(rpc_url.clone());
+    let timeout_stage = if use_tls {
+        tls_stage.with_tls().context("Failed to configure TLS for RPC client")?
+    } else {
+        tls_stage.without_tls()
+    };
+    let mut rpc: RpcClient = timeout_stage
         .with_timeout(timeout)
         .without_metadata_version()
         .without_metadata_genesis()
@@ -181,11 +532,13 @@ pub async fn create_genesis_aware_rpc_client(
     let genesis_commitment = genesis_header.commitment();
     let genesis = genesis_commitment.to_hex();
 
-    // Rebuild the client, this time including the required genesis metadata so that
-    // write RPCs like SubmitProvenTransaction are accepted by the node.
-    let rpc_client = Builder::new(rpc_url.clone())
-        .with_tls()
-        .context("Failed to configure TLS for RPC client")?
+    let tls_stage = Builder::new(rpc_url.clone());
+    let timeout_stage = if use_tls {
+        tls_stage.with_tls().context("Failed to configure TLS for RPC client")?
+    } else {
+        tls_stage.without_tls()
+    };
+    let rpc_client = timeout_stage
         .with_timeout(timeout)
         .without_metadata_version()
         .with_metadata_genesis(genesis)
@@ -204,8 +557,8 @@ fn get_genesis_header_request() -> BlockHeaderByNumberRequest {
     }
 }
 
-/// Creates a new faucet account.
-fn create_faucet() -> Account {
+/// Creates a new faucet account and returns it alongside its secret key.
+fn create_faucet() -> (Account, SecretKey) {
     let coin_seed: [u64; 4] = rand::rng().random();
     let mut rng = RandomCoin::new(coin_seed.map(Felt::new).into());
     let key_pair = SecretKey::with_rng(&mut rng);
@@ -220,7 +573,7 @@ fn create_faucet() -> Account {
     )
     .build()
     .unwrap();
-    AccountBuilder::new(init_seed)
+    let faucet = AccountBuilder::new(init_seed)
         .account_type(AccountType::FungibleFaucet)
         .storage_mode(AccountStorageMode::Private)
         .with_component(token_metadata)
@@ -235,118 +588,141 @@ fn create_faucet() -> Account {
             AuthScheme::Falcon512Poseidon2,
         ))
         .build()
-        .unwrap()
+        .unwrap();
+    (faucet, key_pair)
 }
 
-/// Creates a new wallet account with a given public key.
-fn create_account(public_key: PublicKey, index: u64, storage_mode: AccountStorageMode) -> Account {
+/// Creates a new wallet account with the given public key, using `index` to vary
+/// the init seed so each wallet ends up with a distinct account ID.
+fn create_wallet(
+    public_key: miden_protocol::crypto::dsa::falcon512_poseidon2::PublicKey,
+    index: u64,
+) -> Account {
     let init_seed: Vec<_> = index.to_be_bytes().into_iter().chain([0u8; 24]).collect();
     AccountBuilder::new(init_seed.try_into().unwrap())
         .account_type(AccountType::RegularAccountImmutableCode)
-        .storage_mode(storage_mode)
+        .storage_mode(AccountStorageMode::Private)
         .with_auth_component(AuthSingleSig::new(public_key.into(), AuthScheme::Falcon512Poseidon2))
         .with_component(BasicWallet)
         .build()
         .unwrap()
 }
 
-/// Creates a public P2ID note containing 10 tokens of the fungible asset associated with the
-/// specified `faucet_id` and sent to the specified target account.
-fn create_mint_note(faucet_id: AccountId, target_id: AccountId, rng: &mut RandomCoin) -> Note {
-    let asset = Asset::Fungible(FungibleAsset::new(faucet_id, 10).unwrap());
-    P2idNote::create(
-        faucet_id,
-        target_id,
-        vec![asset],
-        miden_protocol::note::NoteType::Public,
-        miden_protocol::note::NoteAttachment::default(),
-        rng,
-    )
-    .expect("note creation failed")
+// BENCHMARK DATA STORE
+// ================================================================================================
+
+/// In-memory `DataStore` impl used to feed the [`TransactionExecutor`] when
+/// generating real proofs locally. Modelled on the network-monitor's
+/// `MonitorDataStore`.
+pub struct BenchmarkDataStore {
+    accounts: HashMap<AccountId, Account>,
+    block_header: BlockHeader,
+    partial_block_chain: PartialBlockchain,
+    mast_store: TransactionMastStore,
 }
 
-/// Creates a transaction from the faucet that creates the given output notes.
-/// Updates the faucet account to increase the issuance slot and it's nonce.
-fn create_mint_tx(
-    block_ref: &BlockHeader,
-    faucet: &mut Account,
-    output_notes: Vec<Note>,
-) -> ProvenTransaction {
-    let initial_account_hash = faucet.to_commitment();
-
-    let metadata_slot_name = TokenMetadata::metadata_slot();
-    let slot = faucet.storage().get_item(metadata_slot_name).unwrap();
-    faucet
-        .storage_mut()
-        .set_item(metadata_slot_name, [slot[0] + Felt::new(10), slot[1], slot[2], slot[3]].into())
-        .unwrap();
+impl BenchmarkDataStore {
+    pub fn new(block_header: BlockHeader, partial_block_chain: PartialBlockchain) -> Self {
+        Self {
+            accounts: HashMap::new(),
+            block_header,
+            partial_block_chain,
+            mast_store: TransactionMastStore::new(),
+        }
+    }
 
-    faucet.increment_nonce(ONE).unwrap();
+    pub fn add_account(&mut self, account: Account) {
+        self.mast_store.load_account_code(account.code());
+        self.accounts.insert(account.id(), account);
+    }
 
-    let account_update = TxAccountUpdate::new(
-        faucet.id(),
-        initial_account_hash,
-        faucet.to_commitment(),
-        Word::empty(),
-        AccountUpdateDetails::Private,
-    )
-    .unwrap();
-    ProvenTransaction::new(
-        account_update,
-        Vec::<InputNoteCommitment>::new(),
-        output_notes
-            .into_iter()
-            .map(|note| OutputNote::Public(PublicOutputNote::new(note).unwrap()))
-            .collect::<Vec<OutputNote>>(),
-        block_ref.block_num(),
-        block_ref.commitment(),
-        FungibleAsset::new(
-            block_ref.fee_parameters().fee_faucet_id(),
-            u64::from(block_ref.fee_parameters().verification_base_fee()),
-        )
-        .unwrap(),
-        u32::MAX.into(),
-        ExecutionProof::new_dummy(),
-    )
-    .unwrap()
+    pub fn update_account(&mut self, account: Account) {
+        self.add_account(account);
+    }
+
+    fn get_account(&self, account_id: AccountId) -> Result<&Account, DataStoreError> {
+        self.accounts.get(&account_id).ok_or_else(|| DataStoreError::Other {
+            error_msg: "unknown account".into(),
+            source: None,
+        })
+    }
 }
 
-/// Creates a transaction from the wallet that will the given output note.
-fn create_consume_tx(
-    block_ref: &BlockHeader,
-    wallet: &mut Account,
-    input_note: Note,
-) -> ProvenTransaction {
-    let initial_account_hash = wallet.to_commitment();
-
-    wallet.increment_nonce(ONE).unwrap();
-
-    let account_update = TxAccountUpdate::new(
-        wallet.id(),
-        initial_account_hash,
-        wallet.to_commitment(),
-        Word::empty(),
-        AccountUpdateDetails::Private,
-    )
-    .unwrap();
+impl DataStore for BenchmarkDataStore {
+    async fn get_transaction_inputs(
+        &self,
+        account_id: AccountId,
+        _block_refs: BTreeSet<BlockNumber>,
+    ) -> Result<(PartialAccount, BlockHeader, PartialBlockchain), DataStoreError> {
+        let account = self.get_account(account_id)?;
+        let partial_account = PartialAccount::from(account);
+        Ok((partial_account, self.block_header.clone(), self.partial_block_chain.clone()))
+    }
 
-    let nullifier = input_note.nullifier();
-    let header = input_note.header().clone();
-    let input_note_commitment = InputNoteCommitment::from_parts_unchecked(nullifier, Some(header));
-
-    ProvenTransaction::new(
-        account_update,
-        vec![input_note_commitment],
-        Vec::<OutputNote>::new(),
-        block_ref.block_num(),
-        block_ref.commitment(),
-        FungibleAsset::new(
-            block_ref.fee_parameters().fee_faucet_id(),
-            u64::from(block_ref.fee_parameters().verification_base_fee()),
-        )
-        .unwrap(),
-        u32::MAX.into(),
-        ExecutionProof::new_dummy(),
-    )
-    .unwrap()
+    async fn get_storage_map_witness(
+        &self,
+        account_id: AccountId,
+        map_root: Word,
+        map_key: StorageMapKey,
+    ) -> Result<miden_protocol::account::StorageMapWitness, DataStoreError> {
+        let account = self.get_account(account_id)?;
+        for slot in account.storage().slots() {
+            if let miden_protocol::account::StorageSlotContent::Map(map) = slot.content() {
+                if map.root() == map_root {
+                    return Ok(map.open(&map_key));
+                }
+            }
+        }
+        Err(DataStoreError::Other {
+            error_msg: format!("no storage map with the requested root in account {account_id}")
+                .into(),
+            source: None,
+        })
+    }
+
+    async fn get_foreign_account_inputs(
+        &self,
+        _foreign_account_id: AccountId,
+        _ref_block: BlockNumber,
+    ) -> Result<AccountInputs, DataStoreError> {
+        unimplemented!("foreign account inputs are not needed for the benchmark")
+    }
+
+    async fn get_vault_asset_witnesses(
+        &self,
+        account_id: AccountId,
+        vault_root: Word,
+        vault_keys: BTreeSet<AssetVaultKey>,
+    ) -> Result<Vec<AssetWitness>, DataStoreError> {
+        let account = self.get_account(account_id)?;
+
+        if account.vault().root() != vault_root {
+            return Err(DataStoreError::Other {
+                error_msg: "vault root mismatch".into(),
+                source: None,
+            });
+        }
+
+        Result::<Vec<_>, _>::from_iter(vault_keys.into_iter().map(|vault_key| {
+            AssetWitness::new(account.vault().open(vault_key).into()).map_err(|err| {
+                DataStoreError::Other {
+                    error_msg: "failed to open vault asset tree".into(),
+                    source: Some(Box::new(err)),
+                }
+            })
+        }))
+    }
+
+    async fn get_note_script(
+        &self,
+        _script_root: NoteScriptRoot,
+    ) -> Result<Option<NoteScript>, DataStoreError> {
+        Ok(None)
+    }
+}
+
+impl MastForestStore for BenchmarkDataStore {
+    fn get(&self, procedure_hash: &Word) -> Option<Arc<MastForest>> {
+        self.mast_store.get(procedure_hash)
+    }
 }

From b01d087b1c93f5ae07434e73c419673e38d87d60 Mon Sep 17 00:00:00 2001
From: SantiagoPittella <pittellasantiago@gmail.com>
Date: Tue, 12 May 2026 16:10:39 -0300
Subject: [PATCH 3/9] feat(benchmark): improvements

---
 Cargo.lock                |   5 +-
 Cargo.toml                |   1 -
 bin/benchmark/Cargo.toml  |  25 +-
 bin/benchmark/src/main.rs | 687 ++++++++++++++++++++++++++++++++------
 4 files changed, 591 insertions(+), 127 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index b24fd37825..2b10a15d1a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2974,16 +2974,15 @@ version = "0.15.0"
 dependencies = [
  "anyhow",
  "clap",
- "miden-node-block-producer",
+ "fs-err",
  "miden-node-proto",
- "miden-node-store",
- "miden-node-utils",
  "miden-protocol",
  "miden-standards",
  "miden-tx",
  "rand 0.9.2",
  "rayon",
  "tokio",
+ "tonic",
  "url",
 ]
 
diff --git a/Cargo.toml b/Cargo.toml
index 1fea4309dd..25caa8dbd5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -44,7 +44,6 @@ debug = true
 
 [workspace.dependencies]
 # Workspace crates.
-miden-benchmark                 = { path = "bin/benchmark", version = "0.15" }
 miden-large-smt-backend-rocksdb = { path = "crates/large-smt-backend-rocksdb", version = "0.15" }
 miden-node-block-producer       = { path = "crates/block-producer", version = "0.15" }
 miden-node-db                   = { path = "crates/db", version = "0.15" }
diff --git a/bin/benchmark/Cargo.toml b/bin/benchmark/Cargo.toml
index bb12efb267..c8cc1f04c5 100644
--- a/bin/benchmark/Cargo.toml
+++ b/bin/benchmark/Cargo.toml
@@ -17,16 +17,15 @@ version.workspace      = true
 workspace = true
 
 [dependencies]
-anyhow                    = { workspace = true }
-clap                      = { workspace = true, features = ["env", "string"] }
-miden-node-block-producer = { workspace = true }
-miden-node-proto          = { workspace = true }
-miden-node-store          = { workspace = true }
-miden-node-utils          = { workspace = true }
-miden-protocol            = { features = ["std", "testing"], workspace = true }
-miden-standards           = { workspace = true }
-miden-tx                  = { features = ["concurrent", "std"], workspace = true }
-rand                      = { workspace = true }
-rayon                     = { workspace = true }
-tokio                     = { features = ["full"], workspace = true }
-url                       = { features = ["serde"], workspace = true }
+anyhow           = { workspace = true }
+clap             = { features = ["env", "string"], workspace = true }
+fs-err           = { workspace = true }
+miden-node-proto = { workspace = true }
+miden-protocol   = { features = ["std", "testing"], workspace = true }
+miden-standards  = { workspace = true }
+miden-tx         = { features = ["concurrent", "std"], workspace = true }
+rand             = { workspace = true }
+rayon            = { workspace = true }
+tokio            = { features = ["full"], workspace = true }
+tonic            = { workspace = true }
+url              = { features = ["serde"], workspace = true }
diff --git a/bin/benchmark/src/main.rs b/bin/benchmark/src/main.rs
index 26c97aaf83..8468fb6886 100644
--- a/bin/benchmark/src/main.rs
+++ b/bin/benchmark/src/main.rs
@@ -4,7 +4,7 @@ use std::collections::{BTreeSet, HashMap};
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::atomic::{AtomicU64, Ordering};
-use std::time::{Duration, Instant};
+use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
 
 use anyhow::{Context, Result};
 use clap::{Parser, Subcommand};
@@ -22,7 +22,7 @@ use miden_protocol::account::{
     StorageMapKey,
 };
 use miden_protocol::asset::{Asset, AssetVaultKey, AssetWitness, FungibleAsset, TokenSymbol};
-use miden_protocol::block::{BlockHeader, BlockNumber};
+use miden_protocol::block::{BlockHeader, BlockNumber, SignedBlock};
 use miden_protocol::crypto::dsa::falcon512_poseidon2::SecretKey;
 use miden_protocol::crypto::merkle::mmr::{MmrPeaks, PartialMmr};
 use miden_protocol::crypto::rand::RandomCoin;
@@ -34,6 +34,7 @@ use miden_protocol::transaction::{
     PartialBlockchain,
     ProvenTransaction,
     TransactionArgs,
+    TransactionId,
 };
 use miden_protocol::utils::serde::{Deserializable, Serializable};
 use miden_protocol::{Felt, MastForest, Word};
@@ -95,6 +96,11 @@ pub enum Command {
         /// Number of concurrent submission tasks.
         #[arg(long, default_value_t = 32)]
         concurrency: usize,
+        /// Number of blocks to wait after the last submission RPC returns
+        /// before checking which of our txs have been included on-chain.
+        /// Larger values give the mempool more time to drain a backlog.
+        #[arg(long, default_value_t = 3)]
+        wait_blocks: u32,
     },
 }
 
@@ -108,18 +114,24 @@ impl Cli {
     async fn run(self) {
         match self.command {
             Command::CreateProofs { rpc_url, num_transactions } => {
-                create_proofs(rpc_url, num_transactions).await
+                create_proofs(rpc_url, num_transactions).await;
             },
-            Command::RunBenchmark { rpc_url, concurrency } => {
-                run_benchmark(rpc_url, concurrency).await
+            Command::RunBenchmark { rpc_url, concurrency, wait_blocks } => {
+                run_benchmark(rpc_url, concurrency, wait_blocks).await;
             },
         }
     }
 }
 
+#[expect(
+    clippy::too_many_lines,
+    reason = "single linear orchestration of genesis fetch + mint phase + consume phase; \
+              splitting would just shuffle locals (faucet, data_store, authenticator) around"
+)]
 async fn create_proofs(rpc_url: Url, num_transactions: u64) {
-    let mut rpc_client =
-        create_genesis_aware_rpc_client(&rpc_url, Duration::from_secs(10)).await.unwrap();
+    let mut rpc_client = create_genesis_aware_rpc_client(&rpc_url, Duration::from_secs(10))
+        .await
+        .unwrap();
 
     println!("Fetching genesis block header from {rpc_url}...");
     let genesis_header_proto = rpc_client
@@ -142,7 +154,7 @@ async fn create_proofs(rpc_url: Url, num_transactions: u64) {
     println!("Creating {num_transactions} wallets in parallel...");
     let wallets: Vec<Account> = (0..num_transactions)
         .into_par_iter()
-        .map(|index| create_wallet(wallet_public_key.clone(), index))
+        .map(|index| create_wallet(&wallet_public_key, index))
         .collect();
 
     let genesis_chain_mmr =
@@ -168,6 +180,9 @@ async fn create_proofs(rpc_url: Url, num_transactions: u64) {
     let mut mint_txs: Vec<ProvenTransaction> = Vec::with_capacity(num_transactions as usize);
     let mut mint_tx_inputs: Vec<Vec<u8>> = Vec::with_capacity(num_transactions as usize);
     let mut mint_notes: Vec<Note> = Vec::with_capacity(num_transactions as usize);
+    let mint_phase_start = Instant::now();
+    let mut mint_exec_total = Duration::ZERO;
+    let mut mint_prove_total = Duration::ZERO;
 
     for index in 0..num_transactions {
         let wallet_id = wallets[index as usize].id();
@@ -192,9 +207,9 @@ async fn create_proofs(rpc_url: Url, num_transactions: u64) {
         let mut tx_args = TransactionArgs::default().with_tx_script(script);
         tx_args.add_output_note_recipient(Box::new(note.recipient().clone()));
 
-        let executor =
-            TransactionExecutor::new(&data_store).with_authenticator(&authenticator);
+        let executor = TransactionExecutor::new(&data_store).with_authenticator(&authenticator);
 
+        let exec_t0 = Instant::now();
         let executed_tx = Box::pin(executor.execute_transaction(
             faucet_id,
             genesis_header.block_num(),
@@ -203,14 +218,14 @@ async fn create_proofs(rpc_url: Url, num_transactions: u64) {
         ))
         .await
         .expect("failed to execute mint transaction");
+        mint_exec_total += exec_t0.elapsed();
 
         let tx_inputs_bytes = executed_tx.tx_inputs().to_bytes();
         let delta = executed_tx.account_delta().clone();
 
-        let proven_tx = prover
-            .prove(executed_tx)
-            .await
-            .expect("failed to prove mint transaction");
+        let prove_t0 = Instant::now();
+        let proven_tx = prover.prove(executed_tx).await.expect("failed to prove mint transaction");
+        mint_prove_total += prove_t0.elapsed();
 
         // Evolve the faucet state for the next iteration. The first mint of a
         // never-before-seen account produces a full-state delta (because the
@@ -220,11 +235,9 @@ async fn create_proofs(rpc_url: Url, num_transactions: u64) {
             faucet = Account::try_from(&delta)
                 .expect("failed to materialize faucet from full-state delta");
         } else {
-            faucet
-                .apply_delta(&delta)
-                .expect("failed to apply faucet delta");
+            faucet.apply_delta(&delta).expect("failed to apply faucet delta");
         }
-        data_store.update_account(faucet.clone());
+        data_store.add_account(faucet.clone());
 
         mint_txs.push(proven_tx);
         mint_tx_inputs.push(tx_inputs_bytes);
@@ -234,23 +247,34 @@ async fn create_proofs(rpc_url: Url, num_transactions: u64) {
             println!("  proved {} / {num_transactions} mint txs", index + 1);
         }
     }
+    let mint_phase_elapsed = mint_phase_start.elapsed();
+    print_proving_summary(
+        "Mint",
+        num_transactions,
+        mint_phase_elapsed,
+        mint_exec_total,
+        mint_prove_total,
+    );
 
     // Consume phase — also sequential for now (each tx is one wallet, independent
     // wallets, so this could be parallelized later with bounded concurrency).
     println!("Proving {num_transactions} consume transactions (sequential)...");
     let mut consume_txs: Vec<ProvenTransaction> = Vec::with_capacity(num_transactions as usize);
     let mut consume_tx_inputs: Vec<Vec<u8>> = Vec::with_capacity(num_transactions as usize);
+    let consume_phase_start = Instant::now();
+    let mut consume_exec_total = Duration::ZERO;
+    let mut consume_prove_total = Duration::ZERO;
 
     for index in 0..num_transactions {
         let wallet_id = wallets[index as usize].id();
         let note = mint_notes[index as usize].clone();
         let input_note = InputNote::Unauthenticated { note };
-        let input_notes = InputNotes::new(vec![input_note])
-            .expect("failed to construct input notes for consume");
+        let input_notes =
+            InputNotes::new(vec![input_note]).expect("failed to construct input notes for consume");
 
-        let executor =
-            TransactionExecutor::new(&data_store).with_authenticator(&authenticator);
+        let executor = TransactionExecutor::new(&data_store).with_authenticator(&authenticator);
 
+        let exec_t0 = Instant::now();
         let executed_tx = Box::pin(executor.execute_transaction(
             wallet_id,
             genesis_header.block_num(),
@@ -259,13 +283,14 @@ async fn create_proofs(rpc_url: Url, num_transactions: u64) {
         ))
         .await
         .expect("failed to execute consume transaction");
+        consume_exec_total += exec_t0.elapsed();
 
         let tx_inputs_bytes = executed_tx.tx_inputs().to_bytes();
 
-        let proven_tx = prover
-            .prove(executed_tx)
-            .await
-            .expect("failed to prove consume transaction");
+        let prove_t0 = Instant::now();
+        let proven_tx =
+            prover.prove(executed_tx).await.expect("failed to prove consume transaction");
+        consume_prove_total += prove_t0.elapsed();
 
         consume_txs.push(proven_tx);
         consume_tx_inputs.push(tx_inputs_bytes);
@@ -274,32 +299,84 @@ async fn create_proofs(rpc_url: Url, num_transactions: u64) {
             println!("  proved {} / {num_transactions} consume txs", index + 1);
         }
     }
+    let consume_phase_elapsed = consume_phase_start.elapsed();
+    print_proving_summary(
+        "Consume",
+        num_transactions,
+        consume_phase_elapsed,
+        consume_exec_total,
+        consume_prove_total,
+    );
 
     let out_dir = PathBuf::from(PROOFS_DIR);
     println!("Writing proofs to {}/", out_dir.display());
-    std::fs::create_dir_all(&out_dir).unwrap();
-    std::fs::write(out_dir.join("faucet.bin"), faucet.to_bytes()).unwrap();
-    std::fs::write(out_dir.join("wallets.bin"), wallets.to_bytes()).unwrap();
-    std::fs::write(out_dir.join("mint_txs.bin"), mint_txs.to_bytes()).unwrap();
-    std::fs::write(out_dir.join("mint_tx_inputs.bin"), mint_tx_inputs.to_bytes()).unwrap();
-    std::fs::write(out_dir.join("consume_txs.bin"), consume_txs.to_bytes()).unwrap();
-    std::fs::write(out_dir.join("consume_tx_inputs.bin"), consume_tx_inputs.to_bytes()).unwrap();
+    fs_err::create_dir_all(&out_dir).unwrap();
+    write_to_file(&out_dir.join("mint_txs.bin"), &mint_txs);
+    write_to_file(&out_dir.join("mint_tx_inputs.bin"), &mint_tx_inputs);
+    write_to_file(&out_dir.join("consume_txs.bin"), &consume_txs);
+    write_to_file(&out_dir.join("consume_tx_inputs.bin"), &consume_tx_inputs);
     println!("Done.");
 }
 
-async fn run_benchmark(rpc_url: Url, concurrency: usize) {
+/// Prints a per-phase summary of how long proof generation took, broken down
+/// into the executor (VM execution) and prover (STARK proving) costs, plus the
+/// mean per tx for each so that runs of different sizes can be compared.
+fn print_proving_summary(
+    label: &str,
+    num_transactions: u64,
+    wall: Duration,
+    exec_total: Duration,
+    prove_total: Duration,
+) {
+    let n_u32 = u32::try_from(num_transactions).unwrap_or(u32::MAX);
+    let exec_mean = if num_transactions > 0 {
+        exec_total / n_u32
+    } else {
+        Duration::ZERO
+    };
+    let prove_mean = if num_transactions > 0 {
+        prove_total / n_u32
+    } else {
+        Duration::ZERO
+    };
+    let per_tx_mean = if num_transactions > 0 {
+        (exec_total + prove_total) / n_u32
+    } else {
+        Duration::ZERO
+    };
+    println!("{label} proving summary (n={num_transactions}):");
+    println!("  wall time:           {}", format_duration_secs(wall));
+    println!(
+        "  execute_transaction: total={}  mean={}/tx",
+        format_duration_secs(exec_total),
+        format_duration_secs(exec_mean),
+    );
+    println!(
+        "  prover.prove:        total={}  mean={}/tx",
+        format_duration_secs(prove_total),
+        format_duration_secs(prove_mean),
+    );
+    println!("  exec+prove per tx:   mean={}/tx", format_duration_secs(per_tx_mean));
+}
+
+async fn run_benchmark(rpc_url: Url, concurrency: usize, wait_blocks: u32) {
     let in_dir = PathBuf::from(PROOFS_DIR);
 
     println!("Loading mint txs from {}", in_dir.join("mint_txs.bin").display());
-    let mint_txs = read_proven_txs(&in_dir.join("mint_txs.bin"));
-    let mint_tx_inputs = read_tx_inputs(&in_dir.join("mint_tx_inputs.bin"));
+    let mint_txs: Vec<ProvenTransaction> = read_from_file(&in_dir.join("mint_txs.bin"));
+    let mint_tx_inputs: Vec<Vec<u8>> = read_from_file(&in_dir.join("mint_tx_inputs.bin"));
     assert_eq!(mint_txs.len(), mint_tx_inputs.len(), "mint tx/inputs length mismatch");
 
     println!("Loading consume txs from {}", in_dir.join("consume_txs.bin").display());
-    let consume_txs = read_proven_txs(&in_dir.join("consume_txs.bin"));
-    let consume_tx_inputs = read_tx_inputs(&in_dir.join("consume_tx_inputs.bin"));
+    let consume_txs: Vec<ProvenTransaction> = read_from_file(&in_dir.join("consume_txs.bin"));
+    let consume_tx_inputs: Vec<Vec<u8>> = read_from_file(&in_dir.join("consume_tx_inputs.bin"));
     assert_eq!(consume_txs.len(), consume_tx_inputs.len(), "consume tx/inputs length mismatch");
 
+    // Compute the tx-id master lists up front so we can match them against
+    // on-chain block contents later, without having to interrogate the node.
+    let mint_ids: Vec<TransactionId> = mint_txs.iter().map(ProvenTransaction::id).collect();
+    let consume_ids: Vec<TransactionId> = consume_txs.iter().map(ProvenTransaction::id).collect();
+
     println!("Connecting to {rpc_url}...");
     let rpc_client = create_genesis_aware_rpc_client(&rpc_url, Duration::from_secs(30))
         .await
@@ -313,91 +390,340 @@ async fn run_benchmark(rpc_url: Url, concurrency: usize) {
          submits must be serialized for the mempool to chain them)...",
         mint_txs.len()
     );
-    let (mint_ok, mint_err, mint_elapsed) =
-        submit_sequential(rpc_client.clone(), mint_txs, mint_tx_inputs).await;
-    println!(
-        "  mint: ok={mint_ok} err={mint_err} in {:.1}s ({:.2} tx/s)",
-        mint_elapsed.as_secs_f64(),
-        mint_ok as f64 / mint_elapsed.as_secs_f64()
-    );
+    let mint_stats = submit_sequential(rpc_client.clone(), mint_txs, mint_tx_inputs).await;
+    print_phase_progress("mint", &mint_stats);
 
     println!("Submitting {} consume txs with concurrency={concurrency}...", consume_txs.len());
-    let (consume_ok, consume_err, consume_elapsed) =
+    let consume_stats =
         submit_all(rpc_client.clone(), consume_txs, consume_tx_inputs, concurrency).await;
+    print_phase_progress("consume", &consume_stats);
+
+    println!("Waiting {wait_blocks} blocks for the last submissions to land...");
+    let h_final = wait_for_n_blocks(rpc_client.clone(), wait_blocks).await;
+
+    println!("Checking which submitted txs landed in blocks {}..={}", h_start + 1, h_final);
+    let ack_by_id = build_ack_map(&mint_ids, &mint_stats, &consume_ids, &consume_stats);
+    let inclusion = compute_inclusion(rpc_client.clone(), h_start + 1, h_final, ack_by_id).await;
+
+    print_summary(h_start, h_final, &mint_stats, &consume_stats, concurrency, &inclusion);
+}
+
+fn print_phase_progress(label: &str, stats: &PhaseStats) {
+    let elapsed = stats.elapsed.as_secs_f64();
+    let rate = rate_per_second(stats.ok_count(), stats.elapsed);
     println!(
-        "  consume: ok={consume_ok} err={consume_err} in {:.1}s ({:.0} tx/s)",
-        consume_elapsed.as_secs_f64(),
-        consume_ok as f64 / consume_elapsed.as_secs_f64()
+        "  {label}: ok={ok} err={err} in {elapsed:.1}s ({rate:.1} tx/s ack rate)",
+        ok = stats.ok_count(),
+        err = stats.err_count(),
     );
+}
 
-    println!("Waiting 3 blocks for the last submissions to land...");
-    let h_final = wait_for_n_blocks(rpc_client.clone(), 3).await;
+/// Computes `count / elapsed`, treating a zero-or-negative elapsed window as
+/// zero. Wrapping the cast in a helper keeps the precision-loss expect tightly
+/// scoped — the loss is harmless for display purposes.
+#[expect(
+    clippy::cast_precision_loss,
+    reason = "presentational rate; precision loss past 2^52 events is irrelevant"
+)]
+fn rate_per_second(count: u64, elapsed: Duration) -> f64 {
+    let secs = elapsed.as_secs_f64();
+    if secs > 0.0 { (count as f64) / secs } else { 0.0 }
+}
+
+/// Computes `100 * num / den` as a percentage, returning 0 when `den == 0`.
+#[expect(
+    clippy::cast_precision_loss,
+    reason = "presentational percentage; precision loss past 2^52 is irrelevant"
+)]
+fn ratio_pct(num: u64, den: u64) -> f64 {
+    if den == 0 {
+        0.0
+    } else {
+        (num as f64) * 100.0 / (den as f64)
+    }
+}
+
+/// Build a lookup from the on-chain `TransactionId` of every successfully
+/// submitted tx to the `SystemTime` at which the node `ACKed` its submission.
+/// Used by [`compute_inclusion`] to compute per-tx inclusion latency.
+fn build_ack_map(
+    mint_ids: &[TransactionId],
+    mint_stats: &PhaseStats,
+    consume_ids: &[TransactionId],
+    consume_stats: &PhaseStats,
+) -> HashMap<TransactionId, SystemTime> {
+    let mut map = HashMap::new();
+    for outcome in &mint_stats.outcomes {
+        if let Some(ack_at) = outcome.ack_at {
+            map.insert(mint_ids[outcome.index], ack_at);
+        }
+    }
+    for outcome in &consume_stats.outcomes {
+        if let Some(ack_at) = outcome.ack_at {
+            map.insert(consume_ids[outcome.index], ack_at);
+        }
+    }
+    map
+}
 
-    let total_submitted = mint_ok + consume_ok;
-    let total_submission_secs = (mint_elapsed + consume_elapsed).as_secs_f64();
+fn print_summary(
+    h_start: u32,
+    h_final: u32,
+    mint: &PhaseStats,
+    consume: &PhaseStats,
+    concurrency: usize,
+    inclusion: &InclusionResult,
+) {
     println!();
     println!("=== Summary ===");
-    println!("Chain height: {h_start} -> {h_final} ({} blocks)", h_final - h_start);
-    println!("Total successful submissions: {total_submitted}");
-    println!("Total submission time: {:.1}s", total_submission_secs);
-    println!("Submission TPS: {:.0}", total_submitted as f64 / total_submission_secs);
-}
-
-fn read_proven_txs(path: &std::path::Path) -> Vec<ProvenTransaction> {
-    let bytes = std::fs::read(path).unwrap_or_else(|_| {
-        panic!(
-            "failed to read {} — run `create-proofs` first",
-            path.display()
-        )
-    });
-    Vec::<ProvenTransaction>::read_from_bytes(&bytes)
-        .unwrap_or_else(|_| panic!("failed to deserialize {}", path.display()))
+    println!(
+        "Chain height: {h_start} -> {h_final} ({} blocks, of which {} contained at least one of our txs)",
+        h_final - h_start,
+        inclusion.blocks_with_our_txs
+    );
+    println!();
+    print_phase_summary("Mint phase (sequential)", mint);
+    println!();
+    print_phase_summary(&format!("Consume phase (concurrent, c={concurrency})"), consume);
+    println!();
+    print_inclusion_summary(inclusion);
+}
+
+fn print_phase_summary(title: &str, stats: &PhaseStats) {
+    let ok = stats.ok_count();
+    let err = stats.err_count();
+    let elapsed = stats.elapsed.as_secs_f64();
+    let total = stats.outcomes.len() as u64;
+
+    println!("{title}:");
+    println!(
+        "  ok = {ok} / {total}   err = {err}   ({})",
+        format_err_breakdown(stats.err_by_code()),
+    );
+
+    let mut latencies = stats.submit_latencies();
+    if let Some(p) = percentiles(&mut latencies) {
+        println!(
+            "  submit RPC latency: mean={mean}  p50={p50}  p95={p95}  p99={p99}  max={max}",
+            mean = format_duration_ms(p.mean),
+            p50 = format_duration_ms(p.p50),
+            p95 = format_duration_ms(p.p95),
+            p99 = format_duration_ms(p.p99),
+            max = format_duration_ms(p.max),
+        );
+    } else {
+        println!("  submit RPC latency: (no successful submissions)");
+    }
+
+    let rate = rate_per_second(stats.ok_count(), stats.elapsed);
+    println!("  elapsed = {elapsed:.1}s,   RPC ack rate = {rate:.1} tx/s");
+}
+
+fn print_inclusion_summary(inclusion: &InclusionResult) {
+    let submitted = inclusion.submitted_count;
+    let included = inclusion.included_count;
+    let drop = submitted.saturating_sub(included);
+    let drop_pct = ratio_pct(drop, submitted);
+
+    println!("Inclusion (per-tx ID match against block contents):");
+    println!(
+        "  included = {included} / {submitted} submitted   ({drop} missing, {drop_pct:.1}% drop)",
+    );
+
+    if inclusion.blocks_with_our_txs == 0 {
+        println!("  no blocks observed containing any of our txs");
+        return;
+    }
+
+    let txs_per_block: Vec<u32> = inclusion.txs_per_block_when_present.clone();
+    let sum_txs: u32 = txs_per_block.iter().copied().sum();
+    let mean_tpb =
+        f64::from(sum_txs) / f64::from(u32::try_from(txs_per_block.len()).unwrap_or(u32::MAX));
+    let max_tpb = txs_per_block.iter().copied().max().unwrap_or(0);
+    println!(
+        "  blocks with our txs = {} (mean txs/block when present = {:.1}, max = {})",
+        inclusion.blocks_with_our_txs, mean_tpb, max_tpb
+    );
+
+    let span = u64::from(inclusion.last_inclusion_ts)
+        .saturating_sub(u64::from(inclusion.first_inclusion_ts));
+    if span == 0 {
+        println!(
+            "  inclusion TPS: all {included} txs landed in a single block at timestamp {} \
+             (no usable timespan to divide over)",
+            inclusion.first_inclusion_ts
+        );
+    } else {
+        let tps = rate_per_second(included, Duration::from_secs(span));
+        println!(
+            "  inclusion TPS = {included} included / {span}s spanning blocks {}..={}  =>  {tps:.1} tx/s",
+            inclusion.first_inclusion_block, inclusion.last_inclusion_block
+        );
+    }
+
+    let mut lats = inclusion.inclusion_latencies.clone();
+    if let Some(p) = percentiles(&mut lats) {
+        println!(
+            "  inclusion latency (submit_ack -> block timestamp): mean={mean}  p50={p50}  p95={p95}  p99={p99}  max={max}",
+            mean = format_duration_secs(p.mean),
+            p50 = format_duration_secs(p.p50),
+            p95 = format_duration_secs(p.p95),
+            p99 = format_duration_secs(p.p99),
+            max = format_duration_secs(p.max),
+        );
+    }
 }
 
-fn read_tx_inputs(path: &std::path::Path) -> Vec<Vec<u8>> {
-    let bytes = std::fs::read(path).unwrap_or_else(|_| {
-        panic!(
-            "failed to read {} — run `create-proofs` first",
-            path.display()
-        )
+fn read_from_file<T: Deserializable>(path: &std::path::Path) -> T {
+    let bytes = fs_err::read(path).unwrap_or_else(|_| {
+        panic!("failed to read {} — run `create-proofs` first", path.display())
     });
-    Vec::<Vec<u8>>::read_from_bytes(&bytes)
+    T::read_from_bytes(&bytes)
         .unwrap_or_else(|_| panic!("failed to deserialize {}", path.display()))
 }
 
+fn write_to_file<T: Serializable>(path: &std::path::Path, value: &T) {
+    fs_err::write(path, value.to_bytes())
+        .unwrap_or_else(|err| panic!("failed to write {}: {err}", path.display()));
+}
+
+// SUBMISSION STATS
+// ================================================================================================
+
+/// Outcome of a single `submit_proven_transaction` RPC.
+#[derive(Debug)]
+struct SubmitOutcome {
+    /// Position of this tx in the original input vec — used to recover the
+    /// corresponding `TransactionId` from the caller-owned id list.
+    index: usize,
+    /// `Ok(rpc_round_trip_duration)` on success, `Err(grpc_code)` on failure.
+    result: Result<Duration, tonic::Code>,
+    /// Wall-clock timestamp at which the RPC returned `Ok`. `None` on error.
+    /// Stored as `SystemTime` so it is directly comparable to block headers'
+    /// unix-second timestamps when computing inclusion latency.
+    ack_at: Option<SystemTime>,
+}
+
+/// Aggregated stats for one submission phase (mint or consume).
+#[derive(Debug)]
+struct PhaseStats {
+    /// Wall-clock duration of the entire phase.
+    elapsed: Duration,
+    /// One entry per input tx, aligned by `index`.
+    outcomes: Vec<SubmitOutcome>,
+}
+
+impl PhaseStats {
+    fn ok_count(&self) -> u64 {
+        self.outcomes.iter().filter(|o| o.result.is_ok()).count() as u64
+    }
+
+    fn err_count(&self) -> u64 {
+        self.outcomes.iter().filter(|o| o.result.is_err()).count() as u64
+    }
+
+    fn submit_latencies(&self) -> Vec<Duration> {
+        self.outcomes.iter().filter_map(|o| o.result.as_ref().ok().copied()).collect()
+    }
+
+    fn err_by_code(&self) -> HashMap<tonic::Code, u64> {
+        let mut map: HashMap<tonic::Code, u64> = HashMap::new();
+        for o in &self.outcomes {
+            if let Err(code) = o.result {
+                *map.entry(code).or_insert(0) += 1;
+            }
+        }
+        map
+    }
+}
+
+fn format_err_breakdown(by_code: HashMap<tonic::Code, u64>) -> String {
+    if by_code.is_empty() {
+        return "no errors".to_string();
+    }
+    let mut entries: Vec<(tonic::Code, u64)> = by_code.into_iter().collect();
+    entries.sort_by(|a, b| b.1.cmp(&a.1));
+    let parts: Vec<String> = entries.iter().map(|(c, n)| format!("{c:?}={n}")).collect();
+    parts.join(", ")
+}
+
+fn format_duration_ms(d: Duration) -> String {
+    format!("{:.1}ms", d.as_secs_f64() * 1000.0)
+}
+
+fn format_duration_secs(d: Duration) -> String {
+    format!("{:.2}s", d.as_secs_f64())
+}
+
+#[derive(Debug, Clone, Copy)]
+struct Percentiles {
+    mean: Duration,
+    p50: Duration,
+    p95: Duration,
+    p99: Duration,
+    max: Duration,
+}
+
+/// Returns `None` if there are no samples.
+fn percentiles(samples: &mut [Duration]) -> Option<Percentiles> {
+    if samples.is_empty() {
+        return None;
+    }
+    samples.sort();
+    let n = samples.len();
+    // Integer index for percentile `num/den`. Picked over an `f64` cast to
+    // avoid the cast_sign_loss / cast_precision_loss footguns.
+    let pick = |num: usize, den: usize| -> Duration {
+        let idx = (n * num / den).min(n - 1);
+        samples[idx]
+    };
+    let sum: Duration = samples.iter().copied().sum();
+    let mean = sum / u32::try_from(n).unwrap_or(u32::MAX);
+    Some(Percentiles {
+        mean,
+        p50: pick(50, 100),
+        p95: pick(95, 100),
+        p99: pick(99, 100),
+        max: *samples.last().unwrap(),
+    })
+}
+
 async fn submit_all(
     client: RpcClient,
     txs: Vec<ProvenTransaction>,
     tx_inputs: Vec<Vec<u8>>,
     concurrency: usize,
-) -> (u64, u64, Duration) {
-    /// How many distinct error messages to surface to the console.
+) -> PhaseStats {
+    /// How many distinct error messages to surface to the console as they
+    /// happen. The full failure breakdown still appears in the summary.
     const MAX_ERRORS_TO_PRINT: u64 = 5;
 
     let start = Instant::now();
     let semaphore = Arc::new(Semaphore::new(concurrency));
-    let ok = Arc::new(AtomicU64::new(0));
-    let err = Arc::new(AtomicU64::new(0));
+    // Incrementing-only counter used purely to budget the live error prints.
+    // It is never read on the hot path, so it does not introduce any
+    // submit-side synchronization beyond what was already there.
     let printed = Arc::new(AtomicU64::new(0));
 
-    let mut handles = Vec::with_capacity(txs.len());
+    let total = txs.len();
+    let mut set = tokio::task::JoinSet::new();
     for (i, (tx, inputs)) in txs.into_iter().zip(tx_inputs.into_iter()).enumerate() {
         let permit = semaphore.clone().acquire_owned().await.unwrap();
         let mut client = client.clone();
-        let ok = ok.clone();
-        let err = err.clone();
         let printed = printed.clone();
-        handles.push(tokio::spawn(async move {
+        set.spawn(async move {
             let request = proto::transaction::ProvenTransaction {
                 transaction: tx.to_bytes(),
                 transaction_inputs: Some(inputs),
             };
-            match client.submit_proven_transaction(request).await {
-                Ok(_) => {
-                    ok.fetch_add(1, Ordering::Relaxed);
+            let t0 = Instant::now();
+            let outcome = match client.submit_proven_transaction(request).await {
+                Ok(_) => SubmitOutcome {
+                    index: i,
+                    result: Ok(t0.elapsed()),
+                    ack_at: Some(SystemTime::now()),
                 },
                 Err(status) => {
-                    err.fetch_add(1, Ordering::Relaxed);
                     if printed.fetch_add(1, Ordering::Relaxed) < MAX_ERRORS_TO_PRINT {
                         eprintln!(
                             "  tx idx {i} failed: code={:?} message={}",
@@ -405,16 +731,26 @@ async fn submit_all(
                             status.message()
                         );
                     }
+                    SubmitOutcome {
+                        index: i,
+                        result: Err(status.code()),
+                        ack_at: None,
+                    }
                 },
-            }
+            };
             drop(permit);
-        }));
+            outcome
+        });
     }
-    for h in handles {
-        let _ = h.await;
+
+    // Outcomes carry their original `index`, so completion order is fine —
+    // downstream summarizers don't depend on the vec being in spawn order.
+    let mut outcomes = Vec::with_capacity(total);
+    while let Some(res) = set.join_next().await {
+        outcomes.push(res.expect("submission task panicked"));
     }
 
-    (ok.load(Ordering::Relaxed), err.load(Ordering::Relaxed), start.elapsed())
+    PhaseStats { elapsed: start.elapsed(), outcomes }
 }
 
 /// Submit txs one at a time, awaiting each RPC response before sending the
@@ -425,14 +761,13 @@ async fn submit_all(
 /// `submit_proven_transaction` calls themselves, not wait for block
 /// inclusion in between.
 async fn submit_sequential(
-    client: RpcClient,
+    mut client: RpcClient,
     txs: Vec<ProvenTransaction>,
     tx_inputs: Vec<Vec<u8>>,
-) -> (u64, u64, Duration) {
+) -> PhaseStats {
     let start = Instant::now();
-    let mut ok: u64 = 0;
-    let mut err: u64 = 0;
     let total = txs.len();
+    let mut outcomes = Vec::with_capacity(total);
 
     for (i, (tx, inputs)) in txs.into_iter().zip(tx_inputs.into_iter()).enumerate() {
         let request = proto::transaction::ProvenTransaction {
@@ -440,18 +775,151 @@ async fn submit_sequential(
             transaction_inputs: Some(inputs),
         };
 
-        let mut submit_client = client.clone();
-        match submit_client.submit_proven_transaction(request).await {
-            Ok(_) => ok += 1,
-            Err(e) => {
-                err += 1;
-                eprintln!("  tx {} / {total} failed: {}", i + 1, e);
+        let t0 = Instant::now();
+        let outcome = match client.submit_proven_transaction(request).await {
+            Ok(_) => SubmitOutcome {
+                index: i,
+                result: Ok(t0.elapsed()),
+                ack_at: Some(SystemTime::now()),
+            },
+            Err(status) => {
+                eprintln!("  tx {} / {total} failed: {status}", i + 1);
+                SubmitOutcome {
+                    index: i,
+                    result: Err(status.code()),
+                    ack_at: None,
+                }
+            },
+        };
+        outcomes.push(outcome);
+    }
+
+    PhaseStats { elapsed: start.elapsed(), outcomes }
+}
+
+// INCLUSION CHECK
+// ================================================================================================
+
+#[derive(Debug)]
+struct InclusionResult {
+    submitted_count: u64,
+    included_count: u64,
+    /// Block number of the earliest block containing any of our txs.
+    first_inclusion_block: u32,
+    /// Block number of the latest block containing any of our txs.
+    last_inclusion_block: u32,
+    /// Header timestamps (unix seconds) of those two blocks. Used to compute
+    /// inclusion TPS as `included_count / (last_ts - first_ts)`.
+    first_inclusion_ts: u32,
+    last_inclusion_ts: u32,
+    /// Number of blocks in the scanned range that contained at least one of
+    /// our txs (excludes empty blocks and blocks unrelated to this run).
+    blocks_with_our_txs: u32,
+    /// Per-block count of our txs, recorded only for blocks where the count
+    /// is non-zero. Used for mean/max txs-per-block in the summary.
+    txs_per_block_when_present: Vec<u32>,
+    /// For each successfully submitted tx that landed in a block: the
+    /// elapsed time from RPC ack to that block's header timestamp.
+    inclusion_latencies: Vec<Duration>,
+}
+
+/// Walk every block from `from_block` to `to_block` inclusive, deserialize
+/// it as a [`SignedBlock`], and check which of the submitted tx-ids appear
+/// in each block's transaction headers. Sequential because the volumes are
+/// small and the call is cheap.
+async fn compute_inclusion(
+    mut client: RpcClient,
+    from_block: u32,
+    to_block: u32,
+    mut ack_by_id: HashMap<TransactionId, SystemTime>,
+) -> InclusionResult {
+    let submitted_count = ack_by_id.len() as u64;
+    let mut included_count: u64 = 0;
+    let mut first_inclusion_block: u32 = 0;
+    let mut last_inclusion_block: u32 = 0;
+    let mut first_inclusion_ts: u32 = 0;
+    let mut last_inclusion_ts: u32 = 0;
+    let mut blocks_with_our_txs: u32 = 0;
+    let mut txs_per_block_when_present: Vec<u32> = Vec::new();
+    let mut inclusion_latencies: Vec<Duration> = Vec::new();
+
+    if from_block > to_block {
+        return InclusionResult {
+            submitted_count,
+            included_count,
+            first_inclusion_block,
+            last_inclusion_block,
+            first_inclusion_ts,
+            last_inclusion_ts,
+            blocks_with_our_txs,
+            txs_per_block_when_present,
+            inclusion_latencies,
+        };
+    }
+
+    for block_num in from_block..=to_block {
+        let request = proto::blockchain::BlockRequest { block_num, include_proof: None };
+        let response = match client.get_block_by_number(request).await {
+            Ok(r) => r.into_inner(),
+            Err(status) => {
+                eprintln!(
+                    "  warning: get_block_by_number({block_num}) failed: {status} \
+                     — skipping this block in the inclusion scan"
+                );
+                continue;
+            },
+        };
+        let Some(bytes) = response.block else {
+            continue;
+        };
+        let signed_block = match SignedBlock::read_from_bytes(&bytes) {
+            Ok(sb) => sb,
+            Err(err) => {
+                eprintln!(
+                    "  warning: failed to deserialize SignedBlock for block {block_num}: {err}"
+                );
+                continue;
             },
+        };
+
+        let block_ts = signed_block.header().timestamp();
+        let block_ts_system = UNIX_EPOCH + Duration::from_secs(u64::from(block_ts));
+        let mut hits_in_this_block: u32 = 0;
+
+        for header in signed_block.body().transactions().as_slice() {
+            if let Some(ack_at) = ack_by_id.remove(&header.id()) {
+                hits_in_this_block += 1;
+                included_count += 1;
+                // Block timestamps have 1-second resolution and may round
+                // down past the ack instant; clamp negative deltas to zero.
+                let latency = block_ts_system.duration_since(ack_at).unwrap_or_default();
+                inclusion_latencies.push(latency);
+            }
+        }
+
+        if hits_in_this_block > 0 {
+            if blocks_with_our_txs == 0 {
+                first_inclusion_block = block_num;
+                first_inclusion_ts = block_ts;
+            }
+            last_inclusion_block = block_num;
+            last_inclusion_ts = block_ts;
+            blocks_with_our_txs += 1;
+            txs_per_block_when_present.push(hits_in_this_block);
         }
     }
 
-    println!("  submitted {total} (ok={ok} err={err})");
-    (ok, err, start.elapsed())
+    InclusionResult {
+        submitted_count,
+        included_count,
+        first_inclusion_block,
+        last_inclusion_block,
+        first_inclusion_ts,
+        last_inclusion_ts,
+        blocks_with_our_txs,
+        txs_per_block_when_present,
+        inclusion_latencies,
+    }
 }
 
 async fn current_block_height(mut client: RpcClient) -> u32 {
@@ -595,14 +1063,17 @@ fn create_faucet() -> (Account, SecretKey) {
 /// Creates a new wallet account with the given public key, using `index` to vary
 /// the init seed so each wallet ends up with a distinct account ID.
 fn create_wallet(
-    public_key: miden_protocol::crypto::dsa::falcon512_poseidon2::PublicKey,
+    public_key: &miden_protocol::crypto::dsa::falcon512_poseidon2::PublicKey,
     index: u64,
 ) -> Account {
     let init_seed: Vec<_> = index.to_be_bytes().into_iter().chain([0u8; 24]).collect();
     AccountBuilder::new(init_seed.try_into().unwrap())
         .account_type(AccountType::RegularAccountImmutableCode)
         .storage_mode(AccountStorageMode::Private)
-        .with_auth_component(AuthSingleSig::new(public_key.into(), AuthScheme::Falcon512Poseidon2))
+        .with_auth_component(AuthSingleSig::new(
+            public_key.clone().into(),
+            AuthScheme::Falcon512Poseidon2,
+        ))
         .with_component(BasicWallet)
         .build()
         .unwrap()
@@ -636,10 +1107,6 @@ impl BenchmarkDataStore {
         self.accounts.insert(account.id(), account);
     }
 
-    pub fn update_account(&mut self, account: Account) {
-        self.add_account(account);
-    }
-
     fn get_account(&self, account_id: AccountId) -> Result<&Account, DataStoreError> {
         self.accounts.get(&account_id).ok_or_else(|| DataStoreError::Other {
             error_msg: "unknown account".into(),

From c2d0a59db3153ce6a169b849ab47725ebf418e3d Mon Sep 17 00:00:00 2001
From: SantiagoPittella <pittellasantiago@gmail.com>
Date: Wed, 13 May 2026 09:52:55 -0300
Subject: [PATCH 4/9] chore: make bp configurable + use block header
 information

---
 .gitignore                                |   6 +
 Cargo.toml                                |   8 +-
 bin/benchmark/README.md                   | 224 ++++++++++++++++++++++
 bin/benchmark/src/main.rs                 | 213 ++++++++++++++------
 bin/node/src/commands/block_producer.rs   |  17 ++
 crates/block-producer/src/lib.rs          |   9 +-
 crates/block-producer/src/server/mod.rs   |   7 +-
 crates/block-producer/src/server/tests.rs |   8 +-
 8 files changed, 424 insertions(+), 68 deletions(-)

diff --git a/.gitignore b/.gitignore
index b17513dafd..2b71420925 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,12 @@ miden-node-stress-test-*
 /accounts
 /data
 
+# Native benchmark runtime artifacts (data dir, logs, snapshots, proofs).
+/node-data*
+/logs
+/snapshots
+/benchmark-proofs
+
 # Sqlite db files
 *.sqlite3
 *.sqlite3-shm
diff --git a/Cargo.toml b/Cargo.toml
index 25caa8dbd5..84556999a6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -155,6 +155,10 @@ should_panic_without_expect  = "allow" # We don't care about the specific panic
 # Configure `cargo-typos`
 [workspace.metadata.typos]
 files.extend-exclude = [
-  "*.min.js", # Minified JS bundles (vendored htmx etc.).
-  "*.svg",    # SVG files.
+  "*.min.js",          # Minified JS bundles (vendored htmx etc.).
+  "*.svg",             # SVG files.
+  "benchmark-proofs/", # miden-benchmark output.
+  "logs/",             # Native benchmark process logs.
+  "node-data*/",       # Native benchmark runtime data dir + any sibling clones (RocksDB LOGs etc.).
+  "snapshots/",        # Snapshot tarballs used by the bench replay workflow.
 ]
diff --git a/bin/benchmark/README.md b/bin/benchmark/README.md
index e69de29bb2..6675890b61 100644
--- a/bin/benchmark/README.md
+++ b/bin/benchmark/README.md
@@ -0,0 +1,224 @@
+# Miden benchmark
+
+A binary for measuring transaction throughput on a Miden node by submitting locally-generated proven transactions over RPC and reporting submission ack rate, block inclusion rate, and end-to-end latency.
+
+## Overview
+
+End-to-end benchmarking is split into two phases because of proof generation is expensive and shouldn't be on the critical path of the throughput measurement:
+
+1. **`create-proofs`**: Generates a faucet, N wallets, and `2 * N` proven
+   transactions (one mint and one consume per wallet). Each proof is produced
+   locally with `LocalTransactionProver` and is bound to the chain state of
+   the target node at the moment of generation (genesis commitment, reference
+   block, initial account commitments, input note nullifiers). The bundle is
+   written to `./benchmark-proofs/` as serialized blobs.
+2. **`run-benchmark`**: Loads the bundle from disk and submits it to the
+   node's RPC. Mints are submitted sequentially (each mutates the shared
+   faucet, so order matters) and consumes are submitted with bounded
+   concurrency. After submission, the run waits a few blocks and scans them
+   to compute inclusion rate, inclusion TPS, and submit/inclusion latency
+   percentiles.
+
+Each proof takes seconds of real proving, so generating a bundle once and re-running submissions against it is the right way to iterate on the node's mempool / block-producer / store throughput. See [Re-usingproofs](#re-using-proofs-across-runs) below.
+
+## Building
+
+```sh
+make install-benchmark
+```
+
+## Usage
+
+### Generate proven transactions
+
+```sh
+miden-benchmark create-proofs \
+  --rpc-url http://127.0.0.1:57291 \
+  --num-transactions 100
+```
+
+Writes the bundle to `./benchmark-proofs/`:
+
+- `mint_txs.bin`, `mint_tx_inputs.bin`
+- `consume_txs.bin`, `consume_tx_inputs.bin`
+
+### Submit them
+
+```sh
+miden-benchmark run-benchmark \
+  --rpc-url http://127.0.0.1:57291 \
+  --concurrency 32 \
+  --wait-blocks 3
+```
+
+Mints go in sequentially, then consumes with the requested concurrency, then the run waits `--wait-blocks` blocks before scanning for inclusion. Per-phase ack rate, RPC latency percentiles, inclusion rate, and inclusion TPS are printed at the end.
+
+## Re-using proofs across runs
+
+A `ProvenTransaction` is pinned to the chain state it was generated against:
+
+- the node's genesis commitment,
+- the reference block header,
+- the initial account commitment of the account being modified,
+- the input note nullifiers.
+
+Once a tx is included in a block, the node's state advances nullifiers are recorded and account commitments change. Re-submitting the same proven tx is rejected because the chain has moved past the state the proof was built against.
+
+**Useful tip: clone the node's data directory before each benchmark run.** If you snapshot the data directory while the node is stopped, then *clone* the snapshot every time before bringing the node back up, the proofs in `./benchmark-proofs/` stay valid indefinitely. Each run is:
+
+1. Stop the node.
+2. Replace the node's working data directory with a fresh copy of the
+   snapshot.
+3. Start the node.
+4. `miden-benchmark run-benchmark`.
+
+## Starting the node
+
+The benchmark needs a running Miden node with a reachable RPC endpoint.
+
+### Option A: docker-compose (recommended for benchmarking)
+
+The repo's `docker-compose.yml` wires up all node components (`store`,
+`validator`, `block-producer`, `rpc`, `ntx-builder`) plus telemetry. From the
+repo root:
+
+```sh
+make docker-build      # build miden-node and miden-validator images
+make compose-genesis   # wipe the volume, bootstrap a fresh genesis
+make compose-up        # start the stack (RPC at http://127.0.0.1:57291)
+```
+
+Stop with `make compose-down`.
+
+### Option B: running `miden-node` and `miden-validator` directly
+
+Install both binaries:
+
+```sh
+make install-node
+make install-validator
+```
+
+Bootstrap a fresh data directory (one-time):
+
+```sh
+DATA=./node-data
+
+miden-validator bootstrap \
+  --data-directory          $DATA/validator \
+  --genesis-block-directory $DATA/genesis \
+  --accounts-directory      $DATA/accounts
+
+miden-node store bootstrap \
+  --data-directory $DATA/store \
+  --genesis-block  $DATA/genesis/genesis.dat
+```
+
+Start each component. The example below backgrounds them with `nohup` and captures logs under `./logs/`. For an interactive run, drop the `nohup` / `&` and put each command in its own terminal.
+
+```sh
+mkdir -p logs
+
+nohup miden-validator start \
+  --listen 127.0.0.1:50101 \
+  --data-directory "$DATA/validator" \
+  > logs/validator.log 2>&1 &
+
+nohup miden-node store start \
+  --rpc.listen            127.0.0.1:50001 \
+  --ntx-builder.listen    127.0.0.1:50002 \
+  --block-producer.listen 127.0.0.1:50003 \
+  --data-directory        "$DATA/store" \
+  > logs/store.log 2>&1 &
+
+nohup miden-node block-producer start \
+  --listen 127.0.0.1:50201 \
+  --store.url     http://127.0.0.1:50003 \
+  --validator.url http://127.0.0.1:50101 \
+  > logs/block-producer.log 2>&1 &
+
+nohup miden-node rpc start \
+  --listen 127.0.0.1:57291 \
+  --store.url          http://127.0.0.1:50001 \
+  --block-producer.url http://127.0.0.1:50201 \
+  --validator.url      http://127.0.0.1:50101 \
+  > logs/rpc.log 2>&1 &
+
+nohup miden-node ntx-builder start \
+  --listen 127.0.0.1:50301 \
+  --store.url          http://127.0.0.1:50002 \
+  --block-producer.url http://127.0.0.1:50201 \
+  --validator.url      http://127.0.0.1:50101 \
+  --data-directory     "$DATA/ntx-builder" \
+  > logs/ntx-builder.log 2>&1 &
+```
+
+#### Stopping the node
+
+```sh
+pkill -f miden-validator
+pkill -f 'miden-node store'
+pkill -f 'miden-node block-producer'
+pkill -f 'miden-node rpc'
+pkill -f 'miden-node ntx-builder'
+# Or, if no other miden binaries are running:
+pkill -f 'miden-(node|validator)'
+```
+## Lifting the TPS ceiling
+
+At default settings the block-producer caps end-to-end inclusion at **~21 tx/s**, well below the protocol's hard limit.
+
+### The layered ceiling
+
+| Cap                                          | Default | Protocol max | Knob                           |
+| -------------------------------------------- | ------- | ------------ | ------------------------------ |
+| Transactions per batch                       | 8       | 1024         | `--max-txs-per-batch`          |
+| Batches per block                            | 8       | 64           | `--max-batches-per-block`      |
+| Block interval                               | 3 s     | n/a          | `--block.interval`             |
+| Batch interval                               | 1 s     | n/a          | `--batch.interval`             |
+| Concurrent batch-builder workers             | 2       | n/a          | `--batch.workers`              |
+| Inflight mempool transactions                | ~1280   | n/a          | `--mempool.tx-capacity`        |
+
+Block throughput ceiling = `max_batches_per_block x max_txs_per_batch / block.interval`.
+
+- Defaults: `8 x 8 / 3 s ~= 21 tx/s`.
+- Protocol max with a 1 s block: `64 x 1024 / 1 s = 65 536 tx/s`.
+
+Protocol caps are enforced at startup (in `bin/node/src/commands/block_producer.rs`) and require a protocol-level change to lift. Everything else is operator configuration.
+
+### The batch-builder worker pool (`--batch.workers`)
+
+`--batch.workers` (env `MIDEN_NODE_BLOCK_PRODUCER_BATCH_WORKERS`) sets how many batches the block-producer keeps proving in parallel. Each worker is responsible for one in-flight batch proof — locally with the built-in prover, or remotely if `--batch-prover.url` is set. The default is **2**. Once `--max-txs-per-batch` and `--max-batches-per-block` are pushed up, this worker count is the single setting that determines how fast the block-producer can refill the mempool's batch slots; leaving it at 2 caps effective throughput well before the new block capacity becomes reachable.
+
+Rough sizing:
+
+- **With local batch proving** (no `--batch-prover.url`): raise to roughly
+  the number of physical CPU cores on the block-producer host. More than
+  that just over-subscribes the cores running the prover.
+- **With a remote batch prover**: raise to whatever the remote service can
+  service in parallel (i.e. its own worker count). The block-producer
+  workers are now mostly waiting on I/O, so the bound is the remote
+  prover's capacity, not local CPU.
+
+### Bench-tuned recipe
+
+Replace the `block-producer` invocation in the [Option B](#option-b--running-miden-node-and-miden-validator-directly)
+startup block with:
+
+```sh
+nohup miden-node block-producer start \
+  --listen 127.0.0.1:50201 \
+  --store.url            http://127.0.0.1:50003 \
+  --validator.url        http://127.0.0.1:50101 \
+  --max-txs-per-batch     1024     `# protocol max`        \
+  --max-batches-per-block 64       `# protocol max`        \
+  --block.interval        2s       `# default 3s`          \
+  --batch.interval        100ms    `# default 1s`          \
+  --batch.workers         16       `# default 2`           \
+  --mempool.tx-capacity   1000000  `# default ~1280`       \
+  > logs/block-producer.log 2>&1 &
+```
+
+## License
+
+This project is [MIT licensed](../../LICENSE).
diff --git a/bin/benchmark/src/main.rs b/bin/benchmark/src/main.rs
index 8468fb6886..0e1502dc29 100644
--- a/bin/benchmark/src/main.rs
+++ b/bin/benchmark/src/main.rs
@@ -479,7 +479,7 @@ fn print_summary(
     println!(
         "Chain height: {h_start} -> {h_final} ({} blocks, of which {} contained at least one of our txs)",
         h_final - h_start,
-        inclusion.blocks_with_our_txs
+        inclusion.per_block_hits.len(),
     );
     println!();
     print_phase_summary("Mint phase (sequential)", mint);
@@ -530,36 +530,71 @@ fn print_inclusion_summary(inclusion: &InclusionResult) {
         "  included = {included} / {submitted} submitted   ({drop} missing, {drop_pct:.1}% drop)",
     );
 
-    if inclusion.blocks_with_our_txs == 0 {
+    let hits = &inclusion.per_block_hits;
+    if hits.is_empty() {
         println!("  no blocks observed containing any of our txs");
         return;
     }
 
-    let txs_per_block: Vec<u32> = inclusion.txs_per_block_when_present.clone();
-    let sum_txs: u32 = txs_per_block.iter().copied().sum();
-    let mean_tpb =
-        f64::from(sum_txs) / f64::from(u32::try_from(txs_per_block.len()).unwrap_or(u32::MAX));
-    let max_tpb = txs_per_block.iter().copied().max().unwrap_or(0);
+    // Per-block aggregates.
+    let counts: Vec<u32> = hits.iter().map(|h| h.hit_count).collect();
+    let sum_counts: u32 = counts.iter().copied().sum();
+    let max_count = counts.iter().copied().max().unwrap_or(0);
+    let n_blocks = u32::try_from(counts.len()).unwrap_or(u32::MAX);
+    let mean_count = f64::from(sum_counts) / f64::from(n_blocks);
+
+    let peak_block = hits.iter().max_by_key(|h| h.hit_count).expect("non-empty hits");
+    let first_block = hits.first().expect("non-empty hits");
+    let last_block = hits.last().expect("non-empty hits");
+
     println!(
-        "  blocks with our txs = {} (mean txs/block when present = {:.1}, max = {})",
-        inclusion.blocks_with_our_txs, mean_tpb, max_tpb
+        "  blocks with our txs = {n_blocks} \
+         (block range {}..={}, mean txs/block when present = {mean_count:.1}, max = {max_count})",
+        first_block.block_num, last_block.block_num,
     );
 
-    let span = u64::from(inclusion.last_inclusion_ts)
-        .saturating_sub(u64::from(inclusion.first_inclusion_ts));
-    if span == 0 {
+    // Derive the block interval from consecutive scanned timestamps.
+    let Some(block_interval) = inclusion.derived_block_interval() else {
         println!(
-            "  inclusion TPS: all {included} txs landed in a single block at timestamp {} \
-             (no usable timespan to divide over)",
-            inclusion.first_inclusion_ts
+            "  block interval: could not derive from {} scanned block(s) \
+             (need >=2 blocks spanning at least one second boundary)",
+            inclusion.scanned_block_count,
         );
+        println!("  throughput metrics skipped; per-block series follows.");
+        print_per_block_series(hits, None);
+        return;
+    };
+
+    println!(
+        "  derived block interval = {} (from {} scanned blocks, span = {}s)",
+        format_duration_secs(block_interval),
+        inclusion.scanned_block_count,
+        inclusion.scanned_last_ts - inclusion.scanned_first_ts,
+    );
+
+    // Throughput. Each block-with-our-txs is treated as `block_interval`
+    // seconds of node work.
+    let interval_secs = block_interval.as_secs_f64();
+    let peak_rate = rate_per_second(u64::from(peak_block.hit_count), block_interval);
+    let mean_rate = if interval_secs > 0.0 {
+        mean_count / interval_secs
     } else {
-        let tps = rate_per_second(included, Duration::from_secs(span));
-        println!(
-            "  inclusion TPS = {included} included / {span}s spanning blocks {}..={}  =>  {tps:.1} tx/s",
-            inclusion.first_inclusion_block, inclusion.last_inclusion_block
-        );
-    }
+        0.0
+    };
+    let window_rate = rate_per_second(included, block_interval.saturating_mul(n_blocks));
+
+    println!(
+        "  peak per-block rate  = {} txs in block {}  =>  {peak_rate:.1} tx/s",
+        peak_block.hit_count, peak_block.block_num,
+    );
+    println!("  mean per-block rate  = {mean_count:.1} txs/block  =>  {mean_rate:.1} tx/s");
+    println!(
+        "  window-average TPS   = {included} included / ({n_blocks} blocks * {}) \
+         =>  {window_rate:.1} tx/s",
+        format_duration_secs(block_interval),
+    );
+
+    print_per_block_series(hits, Some(block_interval));
 
     let mut lats = inclusion.inclusion_latencies.clone();
     if let Some(p) = percentiles(&mut lats) {
@@ -574,6 +609,31 @@ fn print_inclusion_summary(inclusion: &InclusionResult) {
     }
 }
 
+/// Print a compact per-block series so the operator can eyeball the
+/// time-series shape (ramp, plateau, dip). Empty blocks in the scan range
+/// are intentionally omitted. If `block_interval` is `Some`, each line also
+/// shows the equivalent rate; if `None`, only the raw count.
+fn print_per_block_series(hits: &[BlockHit], block_interval: Option<Duration>) {
+    println!("  per-block series:");
+    for hit in hits {
+        match block_interval {
+            Some(interval) => {
+                let rate = rate_per_second(u64::from(hit.hit_count), interval);
+                println!(
+                    "    block {} (ts={}): {} txs   ({rate:.1} tx/s @ block_interval)",
+                    hit.block_num, hit.block_ts, hit.hit_count,
+                );
+            },
+            None => {
+                println!(
+                    "    block {} (ts={}): {} txs",
+                    hit.block_num, hit.block_ts, hit.hit_count,
+                );
+            },
+        }
+    }
+}
+
 fn read_from_file<T: Deserializable>(path: &std::path::Path) -> T {
     let bytes = fs_err::read(path).unwrap_or_else(|_| {
         panic!("failed to read {} — run `create-proofs` first", path.display())
@@ -800,27 +860,59 @@ async fn submit_sequential(
 // INCLUSION CHECK
 // ================================================================================================
 
+/// One scanned block that contained at least one of our txs. Empty blocks
+/// in the scan range are not represented here.
+#[derive(Debug, Clone, Copy)]
+struct BlockHit {
+    /// On-chain block number.
+    block_num: u32,
+    /// Unix-seconds timestamp from the block header.
+    block_ts: u32,
+    /// Number of our txs included in this block.
+    hit_count: u32,
+}
+
 #[derive(Debug)]
 struct InclusionResult {
     submitted_count: u64,
     included_count: u64,
-    /// Block number of the earliest block containing any of our txs.
-    first_inclusion_block: u32,
-    /// Block number of the latest block containing any of our txs.
-    last_inclusion_block: u32,
-    /// Header timestamps (unix seconds) of those two blocks. Used to compute
-    /// inclusion TPS as `included_count / (last_ts - first_ts)`.
-    first_inclusion_ts: u32,
-    last_inclusion_ts: u32,
-    /// Number of blocks in the scanned range that contained at least one of
-    /// our txs (excludes empty blocks and blocks unrelated to this run).
-    blocks_with_our_txs: u32,
-    /// Per-block count of our txs, recorded only for blocks where the count
-    /// is non-zero. Used for mean/max txs-per-block in the summary.
-    txs_per_block_when_present: Vec<u32>,
+    /// One entry per block in the scan range that included any of our txs,
+    /// in scan order. Throughput metrics are derived from this list plus
+    /// the block interval inferred from the scan span (see [`block_interval`]).
+    per_block_hits: Vec<BlockHit>,
     /// For each successfully submitted tx that landed in a block: the
     /// elapsed time from RPC ack to that block's header timestamp.
     inclusion_latencies: Vec<Duration>,
+    /// Number of blocks the inclusion scan successfully read headers for.
+    scanned_block_count: u32,
+    /// Header timestamps of the first and last successfully scanned blocks
+    /// (unix seconds). Together with `scanned_block_count`, used to derive
+    /// the block interval at print time.
+    scanned_first_ts: u32,
+    scanned_last_ts: u32,
+}
+
+impl InclusionResult {
+    /// Derive the average block interval from the scan span. Returns `None`
+    /// when the scan touched fewer than two blocks or when all scanned
+    /// headers share the same 1-second-resolution timestamp (sub-second
+    /// cadence), in which case the bench cannot determine the interval
+    /// from headers alone.
+    fn derived_block_interval(&self) -> Option<Duration> {
+        if self.scanned_block_count < 2 || self.scanned_last_ts <= self.scanned_first_ts {
+            return None;
+        }
+        let span_secs = u64::from(self.scanned_last_ts - self.scanned_first_ts);
+        let intervals = u64::from(self.scanned_block_count - 1);
+        // f64 keeps the fractional seconds when the cadence is finer than 1s
+        // *and* the scan crosses enough one-second boundaries.
+        #[expect(
+            clippy::cast_precision_loss,
+            reason = "block counts and timestamp deltas are tiny in practice"
+        )]
+        let interval_secs = (span_secs as f64) / (intervals as f64);
+        Some(Duration::from_secs_f64(interval_secs))
+    }
 }
 
 /// Walk every block from `from_block` to `to_block` inclusive, deserialize
@@ -835,25 +927,21 @@ async fn compute_inclusion(
 ) -> InclusionResult {
     let submitted_count = ack_by_id.len() as u64;
     let mut included_count: u64 = 0;
-    let mut first_inclusion_block: u32 = 0;
-    let mut last_inclusion_block: u32 = 0;
-    let mut first_inclusion_ts: u32 = 0;
-    let mut last_inclusion_ts: u32 = 0;
-    let mut blocks_with_our_txs: u32 = 0;
-    let mut txs_per_block_when_present: Vec<u32> = Vec::new();
+    let mut per_block_hits: Vec<BlockHit> = Vec::new();
     let mut inclusion_latencies: Vec<Duration> = Vec::new();
+    let mut scanned_block_count: u32 = 0;
+    let mut scanned_first_ts: u32 = 0;
+    let mut scanned_last_ts: u32 = 0;
 
     if from_block > to_block {
         return InclusionResult {
             submitted_count,
             included_count,
-            first_inclusion_block,
-            last_inclusion_block,
-            first_inclusion_ts,
-            last_inclusion_ts,
-            blocks_with_our_txs,
-            txs_per_block_when_present,
+            per_block_hits,
             inclusion_latencies,
+            scanned_block_count,
+            scanned_first_ts,
+            scanned_last_ts,
         };
     }
 
@@ -884,6 +972,14 @@ async fn compute_inclusion(
 
         let block_ts = signed_block.header().timestamp();
         let block_ts_system = UNIX_EPOCH + Duration::from_secs(u64::from(block_ts));
+
+        // Track scan span so we can derive the block interval at print time.
+        if scanned_block_count == 0 {
+            scanned_first_ts = block_ts;
+        }
+        scanned_last_ts = block_ts;
+        scanned_block_count += 1;
+
         let mut hits_in_this_block: u32 = 0;
 
         for header in signed_block.body().transactions().as_slice() {
@@ -898,27 +994,22 @@ async fn compute_inclusion(
         }
 
         if hits_in_this_block > 0 {
-            if blocks_with_our_txs == 0 {
-                first_inclusion_block = block_num;
-                first_inclusion_ts = block_ts;
-            }
-            last_inclusion_block = block_num;
-            last_inclusion_ts = block_ts;
-            blocks_with_our_txs += 1;
-            txs_per_block_when_present.push(hits_in_this_block);
+            per_block_hits.push(BlockHit {
+                block_num,
+                block_ts,
+                hit_count: hits_in_this_block,
+            });
         }
     }
 
     InclusionResult {
         submitted_count,
         included_count,
-        first_inclusion_block,
-        last_inclusion_block,
-        first_inclusion_ts,
-        last_inclusion_ts,
-        blocks_with_our_txs,
-        txs_per_block_when_present,
+        per_block_hits,
         inclusion_latencies,
+        scanned_block_count,
+        scanned_first_ts,
+        scanned_last_ts,
     }
 }
 
diff --git a/bin/node/src/commands/block_producer.rs b/bin/node/src/commands/block_producer.rs
index b921868231..e19223fd22 100644
--- a/bin/node/src/commands/block_producer.rs
+++ b/bin/node/src/commands/block_producer.rs
@@ -6,6 +6,7 @@ use anyhow::Context;
 use miden_node_block_producer::{
     BlockProducer,
     DEFAULT_BATCH_INTERVAL,
+    DEFAULT_BATCH_WORKERS,
     DEFAULT_BLOCK_INTERVAL,
     DEFAULT_MAX_BATCHES_PER_BLOCK,
     DEFAULT_MAX_TXS_PER_BATCH,
@@ -22,6 +23,7 @@ const ENV_MAX_TXS_PER_BATCH: &str = "MIDEN_NODE_BLOCK_PRODUCER_MAX_TXS_PER_BATCH
 const ENV_MAX_BATCHES_PER_BLOCK: &str = "MIDEN_NODE_BLOCK_PRODUCER_MAX_BATCHES_PER_BLOCK";
 const ENV_MEMPOOL_TX_CAPACITY: &str = "MIDEN_NODE_BLOCK_PRODUCER_MEMPOOL_TX_CAPACITY";
 const ENV_BATCH_PROVER_URL: &str = "MIDEN_NODE_BLOCK_PRODUCER_BATCH_PROVER_URL";
+const ENV_BATCH_WORKERS: &str = "MIDEN_NODE_BLOCK_PRODUCER_BATCH_WORKERS";
 
 // BLOCK PRODUCER COMMAND
 // ================================================================================================
@@ -95,6 +97,7 @@ impl BlockProducerCommand {
             max_batches_per_block: block_producer.max_batches_per_block,
             grpc_options,
             mempool_tx_capacity: block_producer.mempool_tx_capacity,
+            batch_workers: block_producer.batch_workers,
         }
         .serve()
         .await
@@ -132,6 +135,7 @@ mod tests {
                 max_txs_per_batch: 8,
                 max_batches_per_block: miden_protocol::MAX_BATCHES_PER_BLOCK + 1, // Invalid value
                 mempool_tx_capacity: NonZeroUsize::new(1000).unwrap(),
+                batch_workers: NonZeroUsize::new(2).unwrap(),
             },
             enable_otel: false,
             grpc_options: GrpcOptionsInternal::default(),
@@ -157,6 +161,7 @@ mod tests {
                                                                                 * (should fail) */
                 max_batches_per_block: 8,
                 mempool_tx_capacity: NonZeroUsize::new(1000).unwrap(),
+                batch_workers: NonZeroUsize::new(2).unwrap(),
             },
             enable_otel: false,
             grpc_options: GrpcOptionsInternal::default(),
@@ -223,4 +228,16 @@ pub struct BlockProducerConfig {
         value_name = "NUM"
     )]
     mempool_tx_capacity: NonZeroUsize,
+
+    /// Number of concurrent batch-builder workers.
+    ///
+    /// Each worker can prove one batch at a time, so this caps how many batch
+    /// proofs the block-producer keeps in flight.
+    #[arg(
+        long = "batch.workers",
+        env = ENV_BATCH_WORKERS,
+        value_name = "NUM",
+        default_value_t = DEFAULT_BATCH_WORKERS
+    )]
+    pub batch_workers: NonZeroUsize,
 }
diff --git a/crates/block-producer/src/lib.rs b/crates/block-producer/src/lib.rs
index 955aa23565..6fc204fccb 100644
--- a/crates/block-producer/src/lib.rs
+++ b/crates/block-producer/src/lib.rs
@@ -32,8 +32,13 @@ pub const DEFAULT_MAX_TXS_PER_BATCH: usize = 8;
 /// Maximum number of batches per block.
 pub const DEFAULT_MAX_BATCHES_PER_BLOCK: usize = 8;
 
-/// Size of the batch building worker pool.
-const SERVER_NUM_BATCH_BUILDERS: NonZeroUsize = NonZeroUsize::new(2).unwrap();
+/// Default size of the batch-builder worker pool.
+///
+/// Each worker can prove one batch at a time. Raising this allows more
+/// concurrent batch proofs in-flight, which is the primary lever for lifting
+/// the per-block-producer TPS ceiling once `--max-txs-per-batch` and
+/// `--max-batches-per-block` are pushed up.
+pub const DEFAULT_BATCH_WORKERS: NonZeroUsize = NonZeroUsize::new(2).unwrap();
 
 /// The number of blocks of committed state that the mempool retains.
 ///
diff --git a/crates/block-producer/src/server/mod.rs b/crates/block-producer/src/server/mod.rs
index f0266c98aa..a1a835d38f 100644
--- a/crates/block-producer/src/server/mod.rs
+++ b/crates/block-producer/src/server/mod.rs
@@ -33,7 +33,7 @@ use crate::errors::{BlockProducerError, MempoolSubmissionError, StoreError};
 use crate::mempool::{BatchBudget, BlockBudget, Mempool, MempoolConfig, SharedMempool};
 use crate::store::StoreClient;
 use crate::validator::BlockProducerValidatorClient;
-use crate::{CACHED_MEMPOOL_STATS_UPDATE_INTERVAL, COMPONENT, SERVER_NUM_BATCH_BUILDERS};
+use crate::{CACHED_MEMPOOL_STATS_UPDATE_INTERVAL, COMPONENT};
 
 #[cfg(test)]
 mod tests;
@@ -66,6 +66,9 @@ pub struct BlockProducer {
 
     /// The maximum number of inflight transactions allowed in the mempool at once.
     pub mempool_tx_capacity: NonZeroUsize,
+
+    /// The number of concurrent batch-builder workers.
+    pub batch_workers: NonZeroUsize,
 }
 
 // BLOCK PRODUCER
@@ -119,7 +122,7 @@ impl BlockProducer {
         let block_builder = BlockBuilder::new(store.clone(), validator, self.block_interval);
         let batch_builder = BatchBuilder::new(
             store.clone(),
-            SERVER_NUM_BATCH_BUILDERS,
+            self.batch_workers,
             self.batch_prover_url,
             self.batch_interval,
         );
diff --git a/crates/block-producer/src/server/tests.rs b/crates/block-producer/src/server/tests.rs
index 053de2c7d5..b573d157cf 100644
--- a/crates/block-producer/src/server/tests.rs
+++ b/crates/block-producer/src/server/tests.rs
@@ -14,7 +14,12 @@ use tokio::{runtime, task};
 use tonic::transport::{Channel, Endpoint};
 use url::Url;
 
-use crate::{BlockProducer, DEFAULT_MAX_BATCHES_PER_BLOCK, DEFAULT_MAX_TXS_PER_BATCH};
+use crate::{
+    BlockProducer,
+    DEFAULT_BATCH_WORKERS,
+    DEFAULT_MAX_BATCHES_PER_BLOCK,
+    DEFAULT_MAX_TXS_PER_BATCH,
+};
 
 /// Tests that the block producer starts up correctly even when the store is not initially
 /// available. The block producer should retry with exponential backoff until the store becomes
@@ -75,6 +80,7 @@ async fn block_producer_startup_is_robust_to_network_failures() {
             max_batches_per_block: DEFAULT_MAX_BATCHES_PER_BLOCK,
             grpc_options,
             mempool_tx_capacity: NonZeroUsize::new(100).unwrap(),
+            batch_workers: DEFAULT_BATCH_WORKERS,
         }
         .serve()
         .await

From 1532a7960dd99c0a9388ab2abb43b03bc46d6429 Mon Sep 17 00:00:00 2001
From: SantiagoPittella <pittellasantiago@gmail.com>
Date: Wed, 13 May 2026 11:07:48 -0300
Subject: [PATCH 5/9] fix readme commands

---
 bin/benchmark/README.md | 34 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/bin/benchmark/README.md b/bin/benchmark/README.md
index 6675890b61..847fee0188 100644
--- a/bin/benchmark/README.md
+++ b/bin/benchmark/README.md
@@ -133,8 +133,14 @@ nohup miden-node store start \
 
 nohup miden-node block-producer start \
   --listen 127.0.0.1:50201 \
-  --store.url     http://127.0.0.1:50003 \
-  --validator.url http://127.0.0.1:50101 \
+  --store.url            http://127.0.0.1:50003 \
+  --validator.url        http://127.0.0.1:50101 \
+  --max-txs-per-batch     1024 \
+  --max-batches-per-block 64 \
+  --block.interval        2s \
+  --batch.interval        100ms \
+  --batch.workers         16 \
+  --mempool.tx-capacity   1000000 \
   > logs/block-producer.log 2>&1 &
 
 nohup miden-node rpc start \
@@ -142,6 +148,11 @@ nohup miden-node rpc start \
   --store.url          http://127.0.0.1:50001 \
   --block-producer.url http://127.0.0.1:50201 \
   --validator.url      http://127.0.0.1:50101 \
+  --grpc.timeout                    24h \
+  --grpc.max_connection_age         24h \
+  --grpc.burst_size                 100000 \
+  --grpc.replenish_n_per_second     100000 \
+  --grpc.max_concurrent_connections 1000000 \
   > logs/rpc.log 2>&1 &
 
 nohup miden-node ntx-builder start \
@@ -200,25 +211,6 @@ Rough sizing:
   workers are now mostly waiting on I/O, so the bound is the remote
   prover's capacity, not local CPU.
 
-### Bench-tuned recipe
-
-Replace the `block-producer` invocation in the [Option B](#option-b--running-miden-node-and-miden-validator-directly)
-startup block with:
-
-```sh
-nohup miden-node block-producer start \
-  --listen 127.0.0.1:50201 \
-  --store.url            http://127.0.0.1:50003 \
-  --validator.url        http://127.0.0.1:50101 \
-  --max-txs-per-batch     1024     `# protocol max`        \
-  --max-batches-per-block 64       `# protocol max`        \
-  --block.interval        2s       `# default 3s`          \
-  --batch.interval        100ms    `# default 1s`          \
-  --batch.workers         16       `# default 2`           \
-  --mempool.tx-capacity   1000000  `# default ~1280`       \
-  > logs/block-producer.log 2>&1 &
-```
-
 ## License
 
 This project is [MIT licensed](../../LICENSE).

From a50b423d8669baadfb752ecbb61ae03ef3277ead Mon Sep 17 00:00:00 2001
From: SantiagoPittella <pittellasantiago@gmail.com>
Date: Wed, 13 May 2026 16:57:45 -0300
Subject: [PATCH 6/9] improve code organization

---
 bin/benchmark/src/create_proofs.rs |  447 +++++++++++
 bin/benchmark/src/inclusion.rs     |  235 ++++++
 bin/benchmark/src/main.rs          | 1196 +---------------------------
 bin/benchmark/src/rpc_state.rs     |   89 +++
 bin/benchmark/src/submit.rs        |  267 +++++++
 bin/benchmark/src/summary.rs       |  314 ++++++++
 6 files changed, 1391 insertions(+), 1157 deletions(-)
 create mode 100644 bin/benchmark/src/create_proofs.rs
 create mode 100644 bin/benchmark/src/inclusion.rs
 create mode 100644 bin/benchmark/src/rpc_state.rs
 create mode 100644 bin/benchmark/src/submit.rs
 create mode 100644 bin/benchmark/src/summary.rs

diff --git a/bin/benchmark/src/create_proofs.rs b/bin/benchmark/src/create_proofs.rs
new file mode 100644
index 0000000000..7dd34e7f02
--- /dev/null
+++ b/bin/benchmark/src/create_proofs.rs
@@ -0,0 +1,447 @@
+//! The `create-proofs` orchestrator and everything it needs to build the
+//! proven-tx bundle locally:
+//!
+//! - `run` orchestrates the genesis fetch + faucet/wallet construction + mint phase + consume phase
+//!   + final write-out to `./benchmark-proofs/`.
+//! - `create_faucet` / `create_wallet` build the accounts the bench uses.
+//! - `BenchmarkDataStore` is the in-memory `DataStore` impl that feeds the `TransactionExecutor`
+//!   while we generate proofs locally.
+
+use std::collections::{BTreeSet, HashMap};
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+use miden_protocol::account::auth::{AuthScheme, AuthSecretKey};
+use miden_protocol::account::{
+    Account,
+    AccountBuilder,
+    AccountId,
+    AccountStorageMode,
+    AccountType,
+    PartialAccount,
+    StorageMapKey,
+};
+use miden_protocol::asset::{Asset, AssetVaultKey, AssetWitness, FungibleAsset, TokenSymbol};
+use miden_protocol::block::{BlockHeader, BlockNumber};
+use miden_protocol::crypto::dsa::falcon512_poseidon2::SecretKey;
+use miden_protocol::crypto::rand::RandomCoin;
+use miden_protocol::note::{Note, NoteScript, NoteScriptRoot};
+use miden_protocol::transaction::{
+    AccountInputs,
+    InputNote,
+    InputNotes,
+    PartialBlockchain,
+    ProvenTransaction,
+    TransactionArgs,
+};
+use miden_protocol::utils::serde::Serializable;
+use miden_protocol::{Felt, MastForest, Word};
+use miden_standards::account::auth::AuthSingleSig;
+use miden_standards::account::faucets::BasicFungibleFaucet;
+use miden_standards::account::interface::{AccountInterface, AccountInterfaceExt};
+use miden_standards::account::metadata::{FungibleTokenMetadata, TokenName};
+use miden_standards::account::policies::{
+    BurnPolicyConfig,
+    MintPolicyConfig,
+    PolicyAuthority,
+    TokenPolicyManager,
+};
+use miden_standards::account::wallets::BasicWallet;
+use miden_standards::note::P2idNote;
+use miden_tx::auth::BasicAuthenticator;
+use miden_tx::{
+    DataStore,
+    DataStoreError,
+    LocalTransactionProver,
+    MastForestStore,
+    TransactionExecutor,
+    TransactionMastStore,
+};
+use rand::Rng;
+use rayon::prelude::*;
+use url::Url;
+
+use crate::rpc_state::{fetch_chain_tip_header, fetch_partial_blockchain};
+use crate::summary::print_proving_summary;
+use crate::{
+    PROOFS_DIR,
+    create_genesis_aware_rpc_client,
+    get_genesis_header_request,
+    write_to_file,
+};
+
+// ORCHESTRATOR
+// ================================================================================================
+
+#[expect(
+    clippy::too_many_lines,
+    reason = "single linear orchestration of genesis fetch + mint phase + consume phase; \
+              splitting would just shuffle locals (faucet, data_store, authenticator) around"
+)]
+pub(crate) async fn run(rpc_url: Url, num_transactions: u64) {
+    let mut rpc_client = create_genesis_aware_rpc_client(&rpc_url, Duration::from_secs(10))
+        .await
+        .unwrap();
+
+    println!("Fetching genesis block header from {rpc_url}...");
+    let genesis_header_proto = rpc_client
+        .get_block_header_by_number(get_genesis_header_request())
+        .await
+        .unwrap()
+        .into_inner()
+        .block_header
+        .expect("RPC returned no block header");
+    let genesis_header: BlockHeader = genesis_header_proto.try_into().unwrap();
+
+    println!("Fetching chain tip header...");
+    let ref_block_header = fetch_chain_tip_header(&mut rpc_client).await;
+    let ref_block_num = ref_block_header.block_num();
+    println!("  ref block = {ref_block_num} (proofs will bind to this block's chain state)");
+
+    println!("Fetching chain MMR up to ref block...");
+    let partial_blockchain =
+        fetch_partial_blockchain(&mut rpc_client, ref_block_num.as_u32(), &genesis_header).await;
+
+    println!("Creating faucet...");
+    let (mut faucet, faucet_secret_key) = create_faucet();
+
+    let coin_seed: [u64; 4] = rand::rng().random();
+    let mut seed_rng = RandomCoin::new(coin_seed.map(Felt::new).into());
+    let wallet_secret_key = SecretKey::with_rng(&mut seed_rng);
+    let wallet_public_key = wallet_secret_key.public_key();
+
+    println!("Creating {num_transactions} wallets in parallel...");
+    let wallets: Vec<Account> = (0..num_transactions)
+        .into_par_iter()
+        .map(|index| create_wallet(&wallet_public_key, index))
+        .collect();
+
+    let mut data_store = BenchmarkDataStore::new(ref_block_header.clone(), partial_blockchain);
+    data_store.add_account(faucet.clone());
+    for wallet in &wallets {
+        data_store.add_account(wallet.clone());
+    }
+
+    let authenticator = BasicAuthenticator::new(&[
+        AuthSecretKey::Falcon512Poseidon2(faucet_secret_key),
+        AuthSecretKey::Falcon512Poseidon2(wallet_secret_key),
+    ]);
+
+    let prover = LocalTransactionProver::default();
+    let faucet_id = faucet.id();
+
+    // Mint phase — sequential because each mint mutates the faucet.
+    println!("Proving {num_transactions} mint transactions (sequential)...");
+    let mut mint_txs: Vec<ProvenTransaction> = Vec::with_capacity(num_transactions as usize);
+    let mut mint_tx_inputs: Vec<Vec<u8>> = Vec::with_capacity(num_transactions as usize);
+    let mut mint_notes: Vec<Note> = Vec::with_capacity(num_transactions as usize);
+    let mint_phase_start = Instant::now();
+    let mut mint_exec_total = Duration::ZERO;
+    let mut mint_prove_total = Duration::ZERO;
+
+    for index in 0..num_transactions {
+        let wallet_id = wallets[index as usize].id();
+        let note = {
+            let asset = Asset::Fungible(FungibleAsset::new(faucet_id, 10).unwrap());
+            P2idNote::create(
+                faucet_id,
+                wallet_id,
+                vec![asset],
+                miden_protocol::note::NoteType::Public,
+                miden_protocol::note::NoteAttachment::default(),
+                &mut seed_rng,
+            )
+            .expect("note creation failed")
+        };
+
+        let account_interface = AccountInterface::from_account(&faucet);
+        let script = account_interface
+            .build_send_notes_script(&[note.clone().into()], None)
+            .expect("failed to build mint send-notes script");
+
+        let mut tx_args = TransactionArgs::default().with_tx_script(script);
+        tx_args.add_output_note_recipient(Box::new(note.recipient().clone()));
+
+        let executor = TransactionExecutor::new(&data_store).with_authenticator(&authenticator);
+
+        let exec_t0 = Instant::now();
+        let executed_tx = Box::pin(executor.execute_transaction(
+            faucet_id,
+            ref_block_num,
+            InputNotes::default(),
+            tx_args,
+        ))
+        .await
+        .expect("failed to execute mint transaction");
+        mint_exec_total += exec_t0.elapsed();
+
+        let tx_inputs_bytes = executed_tx.tx_inputs().to_bytes();
+        let delta = executed_tx.account_delta().clone();
+
+        let prove_t0 = Instant::now();
+        let proven_tx = prover.prove(executed_tx).await.expect("failed to prove mint transaction");
+        mint_prove_total += prove_t0.elapsed();
+
+        // Evolve the faucet state for the next iteration. The first mint of a
+        // never-before-seen account produces a full-state delta (because the
+        // delta carries the freshly deployed code); subsequent mints produce
+        // partial-state deltas that can be applied incrementally.
+        if delta.is_full_state() {
+            faucet = Account::try_from(&delta)
+                .expect("failed to materialize faucet from full-state delta");
+        } else {
+            faucet.apply_delta(&delta).expect("failed to apply faucet delta");
+        }
+        data_store.add_account(faucet.clone());
+
+        mint_txs.push(proven_tx);
+        mint_tx_inputs.push(tx_inputs_bytes);
+        mint_notes.push(note);
+
+        if (index + 1) % 10 == 0 || index + 1 == num_transactions {
+            println!("  proved {} / {num_transactions} mint txs", index + 1);
+        }
+    }
+    let mint_phase_elapsed = mint_phase_start.elapsed();
+    print_proving_summary(
+        "Mint",
+        num_transactions,
+        mint_phase_elapsed,
+        mint_exec_total,
+        mint_prove_total,
+    );
+
+    // Consume phase — also sequential for now (each tx is one wallet, independent
+    // wallets, so this could be parallelized later with bounded concurrency).
+    println!("Proving {num_transactions} consume transactions (sequential)...");
+    let mut consume_txs: Vec<ProvenTransaction> = Vec::with_capacity(num_transactions as usize);
+    let mut consume_tx_inputs: Vec<Vec<u8>> = Vec::with_capacity(num_transactions as usize);
+    let consume_phase_start = Instant::now();
+    let mut consume_exec_total = Duration::ZERO;
+    let mut consume_prove_total = Duration::ZERO;
+
+    for index in 0..num_transactions {
+        let wallet_id = wallets[index as usize].id();
+        let note = mint_notes[index as usize].clone();
+        let input_note = InputNote::Unauthenticated { note };
+        let input_notes =
+            InputNotes::new(vec![input_note]).expect("failed to construct input notes for consume");
+
+        let executor = TransactionExecutor::new(&data_store).with_authenticator(&authenticator);
+
+        let exec_t0 = Instant::now();
+        let executed_tx = Box::pin(executor.execute_transaction(
+            wallet_id,
+            ref_block_num,
+            input_notes,
+            TransactionArgs::default(),
+        ))
+        .await
+        .expect("failed to execute consume transaction");
+        consume_exec_total += exec_t0.elapsed();
+
+        let tx_inputs_bytes = executed_tx.tx_inputs().to_bytes();
+
+        let prove_t0 = Instant::now();
+        let proven_tx =
+            prover.prove(executed_tx).await.expect("failed to prove consume transaction");
+        consume_prove_total += prove_t0.elapsed();
+
+        consume_txs.push(proven_tx);
+        consume_tx_inputs.push(tx_inputs_bytes);
+
+        if (index + 1) % 10 == 0 || index + 1 == num_transactions {
+            println!("  proved {} / {num_transactions} consume txs", index + 1);
+        }
+    }
+    let consume_phase_elapsed = consume_phase_start.elapsed();
+    print_proving_summary(
+        "Consume",
+        num_transactions,
+        consume_phase_elapsed,
+        consume_exec_total,
+        consume_prove_total,
+    );
+
+    let out_dir = PathBuf::from(PROOFS_DIR);
+    println!("Writing proofs to {}/", out_dir.display());
+    fs_err::create_dir_all(&out_dir).unwrap();
+    write_to_file(&out_dir.join("mint_txs.bin"), &mint_txs);
+    write_to_file(&out_dir.join("mint_tx_inputs.bin"), &mint_tx_inputs);
+    write_to_file(&out_dir.join("consume_txs.bin"), &consume_txs);
+    write_to_file(&out_dir.join("consume_tx_inputs.bin"), &consume_tx_inputs);
+    println!("Done.");
+}
+
+// ACCOUNT BUILDERS
+// ================================================================================================
+
+/// Creates a new faucet account and returns it alongside its secret key.
+fn create_faucet() -> (Account, SecretKey) {
+    let coin_seed: [u64; 4] = rand::rng().random();
+    let mut rng = RandomCoin::new(coin_seed.map(Felt::new).into());
+    let key_pair = SecretKey::with_rng(&mut rng);
+    let init_seed = [0_u8; 32];
+
+    let token_symbol = TokenSymbol::new("TEST").unwrap();
+    let token_metadata = FungibleTokenMetadata::builder(
+        TokenName::new("TEST").unwrap(),
+        token_symbol,
+        2,
+        FungibleAsset::MAX_AMOUNT,
+    )
+    .build()
+    .unwrap();
+    let faucet = AccountBuilder::new(init_seed)
+        .account_type(AccountType::FungibleFaucet)
+        .storage_mode(AccountStorageMode::Private)
+        .with_component(token_metadata)
+        .with_component(BasicFungibleFaucet)
+        .with_components(TokenPolicyManager::new(
+            PolicyAuthority::AuthControlled,
+            MintPolicyConfig::AllowAll,
+            BurnPolicyConfig::AllowAll,
+        ))
+        .with_auth_component(AuthSingleSig::new(
+            key_pair.public_key().into(),
+            AuthScheme::Falcon512Poseidon2,
+        ))
+        .build()
+        .unwrap();
+    (faucet, key_pair)
+}
+
+/// Creates a new wallet account with the given public key, using `index` to vary
+/// the init seed so each wallet ends up with a distinct account ID.
+fn create_wallet(
+    public_key: &miden_protocol::crypto::dsa::falcon512_poseidon2::PublicKey,
+    index: u64,
+) -> Account {
+    let init_seed: Vec<_> = index.to_be_bytes().into_iter().chain([0u8; 24]).collect();
+    AccountBuilder::new(init_seed.try_into().unwrap())
+        .account_type(AccountType::RegularAccountImmutableCode)
+        .storage_mode(AccountStorageMode::Private)
+        .with_auth_component(AuthSingleSig::new(
+            public_key.clone().into(),
+            AuthScheme::Falcon512Poseidon2,
+        ))
+        .with_component(BasicWallet)
+        .build()
+        .unwrap()
+}
+
+// BENCHMARK DATA STORE
+// ================================================================================================
+
+/// In-memory `DataStore` impl used to feed the [`TransactionExecutor`] when
+/// generating real proofs locally. Modelled on the network-monitor's
+/// `MonitorDataStore`.
+struct BenchmarkDataStore {
+    accounts: HashMap<AccountId, Account>,
+    block_header: BlockHeader,
+    partial_block_chain: PartialBlockchain,
+    mast_store: TransactionMastStore,
+}
+
+impl BenchmarkDataStore {
+    fn new(block_header: BlockHeader, partial_block_chain: PartialBlockchain) -> Self {
+        Self {
+            accounts: HashMap::new(),
+            block_header,
+            partial_block_chain,
+            mast_store: TransactionMastStore::new(),
+        }
+    }
+
+    fn add_account(&mut self, account: Account) {
+        self.mast_store.load_account_code(account.code());
+        self.accounts.insert(account.id(), account);
+    }
+
+    fn get_account(&self, account_id: AccountId) -> Result<&Account, DataStoreError> {
+        self.accounts.get(&account_id).ok_or_else(|| DataStoreError::Other {
+            error_msg: "unknown account".into(),
+            source: None,
+        })
+    }
+}
+
+impl DataStore for BenchmarkDataStore {
+    async fn get_transaction_inputs(
+        &self,
+        account_id: AccountId,
+        _block_refs: BTreeSet<BlockNumber>,
+    ) -> Result<(PartialAccount, BlockHeader, PartialBlockchain), DataStoreError> {
+        let account = self.get_account(account_id)?;
+        let partial_account = PartialAccount::from(account);
+        Ok((partial_account, self.block_header.clone(), self.partial_block_chain.clone()))
+    }
+
+    async fn get_storage_map_witness(
+        &self,
+        account_id: AccountId,
+        map_root: Word,
+        map_key: StorageMapKey,
+    ) -> Result<miden_protocol::account::StorageMapWitness, DataStoreError> {
+        let account = self.get_account(account_id)?;
+        for slot in account.storage().slots() {
+            if let miden_protocol::account::StorageSlotContent::Map(map) = slot.content() {
+                if map.root() == map_root {
+                    return Ok(map.open(&map_key));
+                }
+            }
+        }
+        Err(DataStoreError::Other {
+            error_msg: format!("no storage map with the requested root in account {account_id}")
+                .into(),
+            source: None,
+        })
+    }
+
+    async fn get_foreign_account_inputs(
+        &self,
+        _foreign_account_id: AccountId,
+        _ref_block: BlockNumber,
+    ) -> Result<AccountInputs, DataStoreError> {
+        unimplemented!("foreign account inputs are not needed for the benchmark")
+    }
+
+    async fn get_vault_asset_witnesses(
+        &self,
+        account_id: AccountId,
+        vault_root: Word,
+        vault_keys: BTreeSet<AssetVaultKey>,
+    ) -> Result<Vec<AssetWitness>, DataStoreError> {
+        let account = self.get_account(account_id)?;
+
+        if account.vault().root() != vault_root {
+            return Err(DataStoreError::Other {
+                error_msg: "vault root mismatch".into(),
+                source: None,
+            });
+        }
+
+        Result::<Vec<_>, _>::from_iter(vault_keys.into_iter().map(|vault_key| {
+            AssetWitness::new(account.vault().open(vault_key).into()).map_err(|err| {
+                DataStoreError::Other {
+                    error_msg: "failed to open vault asset tree".into(),
+                    source: Some(Box::new(err)),
+                }
+            })
+        }))
+    }
+
+    async fn get_note_script(
+        &self,
+        _script_root: NoteScriptRoot,
+    ) -> Result<Option<NoteScript>, DataStoreError> {
+        Ok(None)
+    }
+}
+
+impl MastForestStore for BenchmarkDataStore {
+    fn get(&self, procedure_hash: &Word) -> Option<Arc<MastForest>> {
+        self.mast_store.get(procedure_hash)
+    }
+}
diff --git a/bin/benchmark/src/inclusion.rs b/bin/benchmark/src/inclusion.rs
new file mode 100644
index 0000000000..1f2f7b027d
--- /dev/null
+++ b/bin/benchmark/src/inclusion.rs
@@ -0,0 +1,235 @@
+//! Post-submission inclusion scan.
+//!
+//! `scan_with_drain` is the one-shot watcher: it polls the chain past a
+//! starting height, scans each new block for the txs we submitted, and
+//! exits as soon as every submitted tx has been seen on-chain — falling
+//! back to a `max_blocks` bound if some submissions never land. The result
+//! carries per-block hit counts plus the scan span used to derive the
+//! average block interval.
+
+use std::collections::HashMap;
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
+
+use miden_node_proto::clients::RpcClient;
+use miden_node_proto::generated as proto;
+use miden_node_proto::generated::rpc::BlockHeaderByNumberRequest;
+use miden_protocol::block::{BlockHeader, SignedBlock};
+use miden_protocol::transaction::TransactionId;
+use miden_protocol::utils::serde::Deserializable;
+
+/// One scanned block that contained at least one of our txs. Empty blocks
+/// in the scan range are not represented here.
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct BlockHit {
+    /// On-chain block number.
+    pub(crate) block_num: u32,
+    /// Unix-seconds timestamp from the block header.
+    pub(crate) block_ts: u32,
+    /// Number of our txs included in this block.
+    pub(crate) hit_count: u32,
+}
+
+#[derive(Debug)]
+pub(crate) struct InclusionResult {
+    pub(crate) submitted_count: u64,
+    pub(crate) included_count: u64,
+    /// One entry per block in the scan range that included any of our txs,
+    /// in scan order. Throughput metrics are derived from this list plus
+    /// the block interval inferred from the scan span (see
+    /// [`InclusionResult::derived_block_interval`]).
+    pub(crate) per_block_hits: Vec<BlockHit>,
+    /// For each successfully submitted tx that landed in a block: the
+    /// elapsed time from RPC ack to that block's header timestamp.
+    pub(crate) inclusion_latencies: Vec<Duration>,
+    /// Number of blocks the inclusion scan successfully read headers for.
+    pub(crate) scanned_block_count: u32,
+    /// Header timestamps of the first and last successfully scanned blocks
+    /// (unix seconds). Together with `scanned_block_count`, used to derive
+    /// the block interval at print time.
+    pub(crate) scanned_first_ts: u32,
+    pub(crate) scanned_last_ts: u32,
+}
+
+impl InclusionResult {
+    /// Derive the average block interval from the scan span. Returns `None`
+    /// when the scan touched fewer than two blocks or when all scanned
+    /// headers share the same 1-second-resolution timestamp (sub-second
+    /// cadence), in which case the bench cannot determine the interval
+    /// from headers alone.
+    pub(crate) fn derived_block_interval(&self) -> Option<Duration> {
+        if self.scanned_block_count < 2 || self.scanned_last_ts <= self.scanned_first_ts {
+            return None;
+        }
+        let span_secs = u64::from(self.scanned_last_ts - self.scanned_first_ts);
+        let intervals = u64::from(self.scanned_block_count - 1);
+        // f64 keeps the fractional seconds when the cadence is finer than 1s
+        // *and* the scan crosses enough one-second boundaries.
+        #[expect(
+            clippy::cast_precision_loss,
+            reason = "block counts and timestamp deltas are tiny in practice"
+        )]
+        let interval_secs = (span_secs as f64) / (intervals as f64);
+        Some(Duration::from_secs_f64(interval_secs))
+    }
+}
+
+/// Watch the chain advance past `start_height` and scan each new block for
+/// our submitted txs as it lands. Stops as soon as every entry in `ack_by_id`
+/// has been matched (early-exit), or after `max_blocks` blocks past
+/// `start_height` have been scanned without draining (timeout) — whichever
+/// comes first. Returns the final scanned block number alongside the
+/// inclusion stats; if early-exit fires, the returned `h_final` is the
+/// block that completed the drain.
+#[expect(
+    clippy::too_many_lines,
+    reason = "polling + per-block deserialization + tx-id matching is intentionally inline; \
+              the alternative is to thread eight pieces of mutable state through a helper, \
+              which obscures the read flow without changing the logic"
+)]
+pub(crate) async fn scan_with_drain(
+    mut client: RpcClient,
+    start_height: u32,
+    max_blocks: u32,
+    mut ack_by_id: HashMap<TransactionId, SystemTime>,
+) -> (u32, InclusionResult) {
+    let submitted_count = ack_by_id.len() as u64;
+    let mut included_count: u64 = 0;
+    let mut per_block_hits: Vec<BlockHit> = Vec::new();
+    let mut inclusion_latencies: Vec<Duration> = Vec::new();
+    let mut scanned_block_count: u32 = 0;
+    let mut scanned_first_ts: u32 = 0;
+    let mut scanned_last_ts: u32 = 0;
+
+    let max_target = start_height.saturating_add(max_blocks);
+    let mut next_block = start_height + 1;
+    let mut last_seen_height = start_height;
+    let mut h_final = start_height;
+
+    'outer: loop {
+        // Refresh the chain tip and announce changes.
+        let tip = current_block_height(client.clone()).await;
+        if tip != last_seen_height {
+            println!("  block height: {tip}");
+            last_seen_height = tip;
+        }
+
+        // Scan every unwatched block, capped at the max-bound target.
+        let scan_to = tip.min(max_target);
+        while next_block <= scan_to {
+            let request = proto::blockchain::BlockRequest {
+                block_num: next_block,
+                include_proof: None,
+            };
+            let response = match client.get_block_by_number(request).await {
+                Ok(r) => r.into_inner(),
+                Err(status) => {
+                    eprintln!(
+                        "  warning: get_block_by_number({next_block}) failed: {status} \
+                         — skipping this block in the inclusion scan"
+                    );
+                    next_block += 1;
+                    continue;
+                },
+            };
+            let Some(bytes) = response.block else {
+                next_block += 1;
+                continue;
+            };
+            let signed_block = match SignedBlock::read_from_bytes(&bytes) {
+                Ok(sb) => sb,
+                Err(err) => {
+                    eprintln!(
+                        "  warning: failed to deserialize SignedBlock for block {next_block}: {err}"
+                    );
+                    next_block += 1;
+                    continue;
+                },
+            };
+
+            let block_ts = signed_block.header().timestamp();
+            let block_ts_system = UNIX_EPOCH + Duration::from_secs(u64::from(block_ts));
+
+            // Track scan span so we can derive the block interval at print time.
+            if scanned_block_count == 0 {
+                scanned_first_ts = block_ts;
+            }
+            scanned_last_ts = block_ts;
+            scanned_block_count += 1;
+
+            let mut hits_in_this_block: u32 = 0;
+            for header in signed_block.body().transactions().as_slice() {
+                if let Some(ack_at) = ack_by_id.remove(&header.id()) {
+                    hits_in_this_block += 1;
+                    included_count += 1;
+                    // Block timestamps have 1-second resolution and may round
+                    // down past the ack instant; clamp negative deltas to zero.
+                    let latency = block_ts_system.duration_since(ack_at).unwrap_or_default();
+                    inclusion_latencies.push(latency);
+                }
+            }
+
+            if hits_in_this_block > 0 {
+                per_block_hits.push(BlockHit {
+                    block_num: next_block,
+                    block_ts,
+                    hit_count: hits_in_this_block,
+                });
+            }
+
+            h_final = next_block;
+            next_block += 1;
+
+            // Early exit: pending set drained — every submitted tx is on chain.
+            if ack_by_id.is_empty() {
+                println!(
+                    "  all {submitted_count} submitted tx(s) included by block {h_final}; \
+                     stopping scan early"
+                );
+                break 'outer;
+            }
+        }
+
+        // Hit the safety bound but still have pending txs. Stop and report
+        // what we have; the unaccounted-for txs will show as drop in the
+        // summary.
+        if next_block > max_target {
+            println!(
+                "  reached max wait of {max_blocks} blocks past height {start_height}; \
+                 stopping with {} tx(s) still pending",
+                ack_by_id.len(),
+            );
+            break;
+        }
+
+        // Pace the polling.
+        tokio::time::sleep(Duration::from_millis(500)).await;
+    }
+
+    let inclusion = InclusionResult {
+        submitted_count,
+        included_count,
+        per_block_hits,
+        inclusion_latencies,
+        scanned_block_count,
+        scanned_first_ts,
+        scanned_last_ts,
+    };
+    (h_final, inclusion)
+}
+
+pub(crate) async fn current_block_height(mut client: RpcClient) -> u32 {
+    let response = client
+        .get_block_header_by_number(BlockHeaderByNumberRequest {
+            block_num: None,
+            include_mmr_proof: None,
+        })
+        .await
+        .expect("failed to fetch latest block header")
+        .into_inner();
+    let header: BlockHeader = response
+        .block_header
+        .expect("no block header in response")
+        .try_into()
+        .expect("failed to decode block header");
+    header.block_num().as_u32()
+}
diff --git a/bin/benchmark/src/main.rs b/bin/benchmark/src/main.rs
index 0e1502dc29..f13e33df99 100644
--- a/bin/benchmark/src/main.rs
+++ b/bin/benchmark/src/main.rs
@@ -1,70 +1,31 @@
-//! Runs benchmarks
+//! Runs benchmarks.
+//!
+//! Each subcommand's body lives in its own module (`create_proofs`, `submit`).
+//! `main.rs` is just the clap CLI + dispatch + a few shared utilities both
+//! orchestrators need (RPC client setup with genesis metadata, file I/O
+//! helpers, and the proofs-bundle directory).
 
-use std::collections::{BTreeSet, HashMap};
-use std::path::PathBuf;
-use std::sync::Arc;
-use std::sync::atomic::{AtomicU64, Ordering};
-use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
+use std::path::Path;
+use std::time::Duration;
 
 use anyhow::{Context, Result};
 use clap::{Parser, Subcommand};
 use miden_node_proto::clients::{Builder, RpcClient};
-use miden_node_proto::generated as proto;
 use miden_node_proto::generated::rpc::BlockHeaderByNumberRequest;
-use miden_protocol::account::auth::{AuthScheme, AuthSecretKey};
-use miden_protocol::account::{
-    Account,
-    AccountBuilder,
-    AccountId,
-    AccountStorageMode,
-    AccountType,
-    PartialAccount,
-    StorageMapKey,
-};
-use miden_protocol::asset::{Asset, AssetVaultKey, AssetWitness, FungibleAsset, TokenSymbol};
-use miden_protocol::block::{BlockHeader, BlockNumber, SignedBlock};
-use miden_protocol::crypto::dsa::falcon512_poseidon2::SecretKey;
-use miden_protocol::crypto::merkle::mmr::{MmrPeaks, PartialMmr};
-use miden_protocol::crypto::rand::RandomCoin;
-use miden_protocol::note::{Note, NoteScript, NoteScriptRoot};
-use miden_protocol::transaction::{
-    AccountInputs,
-    InputNote,
-    InputNotes,
-    PartialBlockchain,
-    ProvenTransaction,
-    TransactionArgs,
-    TransactionId,
-};
+use miden_protocol::block::{BlockHeader, BlockNumber};
 use miden_protocol::utils::serde::{Deserializable, Serializable};
-use miden_protocol::{Felt, MastForest, Word};
-use miden_standards::account::auth::AuthSingleSig;
-use miden_standards::account::faucets::BasicFungibleFaucet;
-use miden_standards::account::interface::{AccountInterface, AccountInterfaceExt};
-use miden_standards::account::metadata::{FungibleTokenMetadata, TokenName};
-use miden_standards::account::policies::{
-    BurnPolicyConfig,
-    MintPolicyConfig,
-    PolicyAuthority,
-    TokenPolicyManager,
-};
-use miden_standards::account::wallets::BasicWallet;
-use miden_standards::note::P2idNote;
-use miden_tx::auth::BasicAuthenticator;
-use miden_tx::{
-    DataStore,
-    DataStoreError,
-    LocalTransactionProver,
-    MastForestStore,
-    TransactionExecutor,
-    TransactionMastStore,
-};
-use rand::Rng;
-use rayon::prelude::*;
-use tokio::sync::Semaphore;
 use url::Url;
 
-const PROOFS_DIR: &str = "./benchmark-proofs";
+mod create_proofs;
+mod inclusion;
+mod rpc_state;
+mod submit;
+mod summary;
+
+// SHARED CONSTANTS
+// ================================================================================================
+
+pub(crate) const PROOFS_DIR: &str = "./benchmark-proofs";
 
 // COMMANDS
 // ================================================================================================
@@ -96,10 +57,12 @@ pub enum Command {
         /// Number of concurrent submission tasks.
         #[arg(long, default_value_t = 32)]
         concurrency: usize,
-        /// Number of blocks to wait after the last submission RPC returns
-        /// before checking which of our txs have been included on-chain.
-        /// Larger values give the mempool more time to drain a backlog.
-        #[arg(long, default_value_t = 3)]
+        /// Maximum number of blocks past the submission point to scan
+        /// before giving up. The scan exits early as soon as every submitted
+        /// tx has been seen on-chain, so this is an upper bound on the
+        /// wait, not a fixed delay. Bump this when running large batches
+        /// that may take many blocks to fully include.
+        #[arg(long, default_value_t = 30)]
         wait_blocks: u32,
     },
 }
@@ -114,947 +77,22 @@ impl Cli {
     async fn run(self) {
         match self.command {
             Command::CreateProofs { rpc_url, num_transactions } => {
-                create_proofs(rpc_url, num_transactions).await;
+                create_proofs::run(rpc_url, num_transactions).await;
             },
             Command::RunBenchmark { rpc_url, concurrency, wait_blocks } => {
-                run_benchmark(rpc_url, concurrency, wait_blocks).await;
-            },
-        }
-    }
-}
-
-#[expect(
-    clippy::too_many_lines,
-    reason = "single linear orchestration of genesis fetch + mint phase + consume phase; \
-              splitting would just shuffle locals (faucet, data_store, authenticator) around"
-)]
-async fn create_proofs(rpc_url: Url, num_transactions: u64) {
-    let mut rpc_client = create_genesis_aware_rpc_client(&rpc_url, Duration::from_secs(10))
-        .await
-        .unwrap();
-
-    println!("Fetching genesis block header from {rpc_url}...");
-    let genesis_header_proto = rpc_client
-        .get_block_header_by_number(get_genesis_header_request())
-        .await
-        .unwrap()
-        .into_inner()
-        .block_header
-        .expect("RPC returned no block header");
-    let genesis_header: BlockHeader = genesis_header_proto.try_into().unwrap();
-
-    println!("Creating faucet...");
-    let (mut faucet, faucet_secret_key) = create_faucet();
-
-    let coin_seed: [u64; 4] = rand::rng().random();
-    let mut seed_rng = RandomCoin::new(coin_seed.map(Felt::new).into());
-    let wallet_secret_key = SecretKey::with_rng(&mut seed_rng);
-    let wallet_public_key = wallet_secret_key.public_key();
-
-    println!("Creating {num_transactions} wallets in parallel...");
-    let wallets: Vec<Account> = (0..num_transactions)
-        .into_par_iter()
-        .map(|index| create_wallet(&wallet_public_key, index))
-        .collect();
-
-    let genesis_chain_mmr =
-        PartialBlockchain::new(PartialMmr::from_peaks(MmrPeaks::default()), Vec::new())
-            .expect("failed to create empty chain MMR");
-
-    let mut data_store = BenchmarkDataStore::new(genesis_header.clone(), genesis_chain_mmr);
-    data_store.add_account(faucet.clone());
-    for wallet in &wallets {
-        data_store.add_account(wallet.clone());
-    }
-
-    let authenticator = BasicAuthenticator::new(&[
-        AuthSecretKey::Falcon512Poseidon2(faucet_secret_key),
-        AuthSecretKey::Falcon512Poseidon2(wallet_secret_key),
-    ]);
-
-    let prover = LocalTransactionProver::default();
-    let faucet_id = faucet.id();
-
-    // Mint phase — sequential because each mint mutates the faucet.
-    println!("Proving {num_transactions} mint transactions (sequential)...");
-    let mut mint_txs: Vec<ProvenTransaction> = Vec::with_capacity(num_transactions as usize);
-    let mut mint_tx_inputs: Vec<Vec<u8>> = Vec::with_capacity(num_transactions as usize);
-    let mut mint_notes: Vec<Note> = Vec::with_capacity(num_transactions as usize);
-    let mint_phase_start = Instant::now();
-    let mut mint_exec_total = Duration::ZERO;
-    let mut mint_prove_total = Duration::ZERO;
-
-    for index in 0..num_transactions {
-        let wallet_id = wallets[index as usize].id();
-        let note = {
-            let asset = Asset::Fungible(FungibleAsset::new(faucet_id, 10).unwrap());
-            P2idNote::create(
-                faucet_id,
-                wallet_id,
-                vec![asset],
-                miden_protocol::note::NoteType::Public,
-                miden_protocol::note::NoteAttachment::default(),
-                &mut seed_rng,
-            )
-            .expect("note creation failed")
-        };
-
-        let account_interface = AccountInterface::from_account(&faucet);
-        let script = account_interface
-            .build_send_notes_script(&[note.clone().into()], None)
-            .expect("failed to build mint send-notes script");
-
-        let mut tx_args = TransactionArgs::default().with_tx_script(script);
-        tx_args.add_output_note_recipient(Box::new(note.recipient().clone()));
-
-        let executor = TransactionExecutor::new(&data_store).with_authenticator(&authenticator);
-
-        let exec_t0 = Instant::now();
-        let executed_tx = Box::pin(executor.execute_transaction(
-            faucet_id,
-            genesis_header.block_num(),
-            InputNotes::default(),
-            tx_args,
-        ))
-        .await
-        .expect("failed to execute mint transaction");
-        mint_exec_total += exec_t0.elapsed();
-
-        let tx_inputs_bytes = executed_tx.tx_inputs().to_bytes();
-        let delta = executed_tx.account_delta().clone();
-
-        let prove_t0 = Instant::now();
-        let proven_tx = prover.prove(executed_tx).await.expect("failed to prove mint transaction");
-        mint_prove_total += prove_t0.elapsed();
-
-        // Evolve the faucet state for the next iteration. The first mint of a
-        // never-before-seen account produces a full-state delta (because the
-        // delta carries the freshly deployed code); subsequent mints produce
-        // partial-state deltas that can be applied incrementally.
-        if delta.is_full_state() {
-            faucet = Account::try_from(&delta)
-                .expect("failed to materialize faucet from full-state delta");
-        } else {
-            faucet.apply_delta(&delta).expect("failed to apply faucet delta");
-        }
-        data_store.add_account(faucet.clone());
-
-        mint_txs.push(proven_tx);
-        mint_tx_inputs.push(tx_inputs_bytes);
-        mint_notes.push(note);
-
-        if (index + 1) % 10 == 0 || index + 1 == num_transactions {
-            println!("  proved {} / {num_transactions} mint txs", index + 1);
-        }
-    }
-    let mint_phase_elapsed = mint_phase_start.elapsed();
-    print_proving_summary(
-        "Mint",
-        num_transactions,
-        mint_phase_elapsed,
-        mint_exec_total,
-        mint_prove_total,
-    );
-
-    // Consume phase — also sequential for now (each tx is one wallet, independent
-    // wallets, so this could be parallelized later with bounded concurrency).
-    println!("Proving {num_transactions} consume transactions (sequential)...");
-    let mut consume_txs: Vec<ProvenTransaction> = Vec::with_capacity(num_transactions as usize);
-    let mut consume_tx_inputs: Vec<Vec<u8>> = Vec::with_capacity(num_transactions as usize);
-    let consume_phase_start = Instant::now();
-    let mut consume_exec_total = Duration::ZERO;
-    let mut consume_prove_total = Duration::ZERO;
-
-    for index in 0..num_transactions {
-        let wallet_id = wallets[index as usize].id();
-        let note = mint_notes[index as usize].clone();
-        let input_note = InputNote::Unauthenticated { note };
-        let input_notes =
-            InputNotes::new(vec![input_note]).expect("failed to construct input notes for consume");
-
-        let executor = TransactionExecutor::new(&data_store).with_authenticator(&authenticator);
-
-        let exec_t0 = Instant::now();
-        let executed_tx = Box::pin(executor.execute_transaction(
-            wallet_id,
-            genesis_header.block_num(),
-            input_notes,
-            TransactionArgs::default(),
-        ))
-        .await
-        .expect("failed to execute consume transaction");
-        consume_exec_total += exec_t0.elapsed();
-
-        let tx_inputs_bytes = executed_tx.tx_inputs().to_bytes();
-
-        let prove_t0 = Instant::now();
-        let proven_tx =
-            prover.prove(executed_tx).await.expect("failed to prove consume transaction");
-        consume_prove_total += prove_t0.elapsed();
-
-        consume_txs.push(proven_tx);
-        consume_tx_inputs.push(tx_inputs_bytes);
-
-        if (index + 1) % 10 == 0 || index + 1 == num_transactions {
-            println!("  proved {} / {num_transactions} consume txs", index + 1);
-        }
-    }
-    let consume_phase_elapsed = consume_phase_start.elapsed();
-    print_proving_summary(
-        "Consume",
-        num_transactions,
-        consume_phase_elapsed,
-        consume_exec_total,
-        consume_prove_total,
-    );
-
-    let out_dir = PathBuf::from(PROOFS_DIR);
-    println!("Writing proofs to {}/", out_dir.display());
-    fs_err::create_dir_all(&out_dir).unwrap();
-    write_to_file(&out_dir.join("mint_txs.bin"), &mint_txs);
-    write_to_file(&out_dir.join("mint_tx_inputs.bin"), &mint_tx_inputs);
-    write_to_file(&out_dir.join("consume_txs.bin"), &consume_txs);
-    write_to_file(&out_dir.join("consume_tx_inputs.bin"), &consume_tx_inputs);
-    println!("Done.");
-}
-
-/// Prints a per-phase summary of how long proof generation took, broken down
-/// into the executor (VM execution) and prover (STARK proving) costs, plus the
-/// mean per tx for each so that runs of different sizes can be compared.
-fn print_proving_summary(
-    label: &str,
-    num_transactions: u64,
-    wall: Duration,
-    exec_total: Duration,
-    prove_total: Duration,
-) {
-    let n_u32 = u32::try_from(num_transactions).unwrap_or(u32::MAX);
-    let exec_mean = if num_transactions > 0 {
-        exec_total / n_u32
-    } else {
-        Duration::ZERO
-    };
-    let prove_mean = if num_transactions > 0 {
-        prove_total / n_u32
-    } else {
-        Duration::ZERO
-    };
-    let per_tx_mean = if num_transactions > 0 {
-        (exec_total + prove_total) / n_u32
-    } else {
-        Duration::ZERO
-    };
-    println!("{label} proving summary (n={num_transactions}):");
-    println!("  wall time:           {}", format_duration_secs(wall));
-    println!(
-        "  execute_transaction: total={}  mean={}/tx",
-        format_duration_secs(exec_total),
-        format_duration_secs(exec_mean),
-    );
-    println!(
-        "  prover.prove:        total={}  mean={}/tx",
-        format_duration_secs(prove_total),
-        format_duration_secs(prove_mean),
-    );
-    println!("  exec+prove per tx:   mean={}/tx", format_duration_secs(per_tx_mean));
-}
-
-async fn run_benchmark(rpc_url: Url, concurrency: usize, wait_blocks: u32) {
-    let in_dir = PathBuf::from(PROOFS_DIR);
-
-    println!("Loading mint txs from {}", in_dir.join("mint_txs.bin").display());
-    let mint_txs: Vec<ProvenTransaction> = read_from_file(&in_dir.join("mint_txs.bin"));
-    let mint_tx_inputs: Vec<Vec<u8>> = read_from_file(&in_dir.join("mint_tx_inputs.bin"));
-    assert_eq!(mint_txs.len(), mint_tx_inputs.len(), "mint tx/inputs length mismatch");
-
-    println!("Loading consume txs from {}", in_dir.join("consume_txs.bin").display());
-    let consume_txs: Vec<ProvenTransaction> = read_from_file(&in_dir.join("consume_txs.bin"));
-    let consume_tx_inputs: Vec<Vec<u8>> = read_from_file(&in_dir.join("consume_tx_inputs.bin"));
-    assert_eq!(consume_txs.len(), consume_tx_inputs.len(), "consume tx/inputs length mismatch");
-
-    // Compute the tx-id master lists up front so we can match them against
-    // on-chain block contents later, without having to interrogate the node.
-    let mint_ids: Vec<TransactionId> = mint_txs.iter().map(ProvenTransaction::id).collect();
-    let consume_ids: Vec<TransactionId> = consume_txs.iter().map(ProvenTransaction::id).collect();
-
-    println!("Connecting to {rpc_url}...");
-    let rpc_client = create_genesis_aware_rpc_client(&rpc_url, Duration::from_secs(30))
-        .await
-        .expect("failed to create RPC client");
-
-    let h_start = current_block_height(rpc_client.clone()).await;
-    println!("Chain height at start: {h_start}");
-
-    println!(
-        "Submitting {} mint txs sequentially (each one mutates the shared faucet, so the \
-         submits must be serialized for the mempool to chain them)...",
-        mint_txs.len()
-    );
-    let mint_stats = submit_sequential(rpc_client.clone(), mint_txs, mint_tx_inputs).await;
-    print_phase_progress("mint", &mint_stats);
-
-    println!("Submitting {} consume txs with concurrency={concurrency}...", consume_txs.len());
-    let consume_stats =
-        submit_all(rpc_client.clone(), consume_txs, consume_tx_inputs, concurrency).await;
-    print_phase_progress("consume", &consume_stats);
-
-    println!("Waiting {wait_blocks} blocks for the last submissions to land...");
-    let h_final = wait_for_n_blocks(rpc_client.clone(), wait_blocks).await;
-
-    println!("Checking which submitted txs landed in blocks {}..={}", h_start + 1, h_final);
-    let ack_by_id = build_ack_map(&mint_ids, &mint_stats, &consume_ids, &consume_stats);
-    let inclusion = compute_inclusion(rpc_client.clone(), h_start + 1, h_final, ack_by_id).await;
-
-    print_summary(h_start, h_final, &mint_stats, &consume_stats, concurrency, &inclusion);
-}
-
-fn print_phase_progress(label: &str, stats: &PhaseStats) {
-    let elapsed = stats.elapsed.as_secs_f64();
-    let rate = rate_per_second(stats.ok_count(), stats.elapsed);
-    println!(
-        "  {label}: ok={ok} err={err} in {elapsed:.1}s ({rate:.1} tx/s ack rate)",
-        ok = stats.ok_count(),
-        err = stats.err_count(),
-    );
-}
-
-/// Computes `count / elapsed`, treating a zero-or-negative elapsed window as
-/// zero. Wrapping the cast in a helper keeps the precision-loss expect tightly
-/// scoped — the loss is harmless for display purposes.
-#[expect(
-    clippy::cast_precision_loss,
-    reason = "presentational rate; precision loss past 2^52 events is irrelevant"
-)]
-fn rate_per_second(count: u64, elapsed: Duration) -> f64 {
-    let secs = elapsed.as_secs_f64();
-    if secs > 0.0 { (count as f64) / secs } else { 0.0 }
-}
-
-/// Computes `100 * num / den` as a percentage, returning 0 when `den == 0`.
-#[expect(
-    clippy::cast_precision_loss,
-    reason = "presentational percentage; precision loss past 2^52 is irrelevant"
-)]
-fn ratio_pct(num: u64, den: u64) -> f64 {
-    if den == 0 {
-        0.0
-    } else {
-        (num as f64) * 100.0 / (den as f64)
-    }
-}
-
-/// Build a lookup from the on-chain `TransactionId` of every successfully
-/// submitted tx to the `SystemTime` at which the node `ACKed` its submission.
-/// Used by [`compute_inclusion`] to compute per-tx inclusion latency.
-fn build_ack_map(
-    mint_ids: &[TransactionId],
-    mint_stats: &PhaseStats,
-    consume_ids: &[TransactionId],
-    consume_stats: &PhaseStats,
-) -> HashMap<TransactionId, SystemTime> {
-    let mut map = HashMap::new();
-    for outcome in &mint_stats.outcomes {
-        if let Some(ack_at) = outcome.ack_at {
-            map.insert(mint_ids[outcome.index], ack_at);
-        }
-    }
-    for outcome in &consume_stats.outcomes {
-        if let Some(ack_at) = outcome.ack_at {
-            map.insert(consume_ids[outcome.index], ack_at);
-        }
-    }
-    map
-}
-
-fn print_summary(
-    h_start: u32,
-    h_final: u32,
-    mint: &PhaseStats,
-    consume: &PhaseStats,
-    concurrency: usize,
-    inclusion: &InclusionResult,
-) {
-    println!();
-    println!("=== Summary ===");
-    println!(
-        "Chain height: {h_start} -> {h_final} ({} blocks, of which {} contained at least one of our txs)",
-        h_final - h_start,
-        inclusion.per_block_hits.len(),
-    );
-    println!();
-    print_phase_summary("Mint phase (sequential)", mint);
-    println!();
-    print_phase_summary(&format!("Consume phase (concurrent, c={concurrency})"), consume);
-    println!();
-    print_inclusion_summary(inclusion);
-}
-
-fn print_phase_summary(title: &str, stats: &PhaseStats) {
-    let ok = stats.ok_count();
-    let err = stats.err_count();
-    let elapsed = stats.elapsed.as_secs_f64();
-    let total = stats.outcomes.len() as u64;
-
-    println!("{title}:");
-    println!(
-        "  ok = {ok} / {total}   err = {err}   ({})",
-        format_err_breakdown(stats.err_by_code()),
-    );
-
-    let mut latencies = stats.submit_latencies();
-    if let Some(p) = percentiles(&mut latencies) {
-        println!(
-            "  submit RPC latency: mean={mean}  p50={p50}  p95={p95}  p99={p99}  max={max}",
-            mean = format_duration_ms(p.mean),
-            p50 = format_duration_ms(p.p50),
-            p95 = format_duration_ms(p.p95),
-            p99 = format_duration_ms(p.p99),
-            max = format_duration_ms(p.max),
-        );
-    } else {
-        println!("  submit RPC latency: (no successful submissions)");
-    }
-
-    let rate = rate_per_second(stats.ok_count(), stats.elapsed);
-    println!("  elapsed = {elapsed:.1}s,   RPC ack rate = {rate:.1} tx/s");
-}
-
-fn print_inclusion_summary(inclusion: &InclusionResult) {
-    let submitted = inclusion.submitted_count;
-    let included = inclusion.included_count;
-    let drop = submitted.saturating_sub(included);
-    let drop_pct = ratio_pct(drop, submitted);
-
-    println!("Inclusion (per-tx ID match against block contents):");
-    println!(
-        "  included = {included} / {submitted} submitted   ({drop} missing, {drop_pct:.1}% drop)",
-    );
-
-    let hits = &inclusion.per_block_hits;
-    if hits.is_empty() {
-        println!("  no blocks observed containing any of our txs");
-        return;
-    }
-
-    // Per-block aggregates.
-    let counts: Vec<u32> = hits.iter().map(|h| h.hit_count).collect();
-    let sum_counts: u32 = counts.iter().copied().sum();
-    let max_count = counts.iter().copied().max().unwrap_or(0);
-    let n_blocks = u32::try_from(counts.len()).unwrap_or(u32::MAX);
-    let mean_count = f64::from(sum_counts) / f64::from(n_blocks);
-
-    let peak_block = hits.iter().max_by_key(|h| h.hit_count).expect("non-empty hits");
-    let first_block = hits.first().expect("non-empty hits");
-    let last_block = hits.last().expect("non-empty hits");
-
-    println!(
-        "  blocks with our txs = {n_blocks} \
-         (block range {}..={}, mean txs/block when present = {mean_count:.1}, max = {max_count})",
-        first_block.block_num, last_block.block_num,
-    );
-
-    // Derive the block interval from consecutive scanned timestamps.
-    let Some(block_interval) = inclusion.derived_block_interval() else {
-        println!(
-            "  block interval: could not derive from {} scanned block(s) \
-             (need >=2 blocks spanning at least one second boundary)",
-            inclusion.scanned_block_count,
-        );
-        println!("  throughput metrics skipped; per-block series follows.");
-        print_per_block_series(hits, None);
-        return;
-    };
-
-    println!(
-        "  derived block interval = {} (from {} scanned blocks, span = {}s)",
-        format_duration_secs(block_interval),
-        inclusion.scanned_block_count,
-        inclusion.scanned_last_ts - inclusion.scanned_first_ts,
-    );
-
-    // Throughput. Each block-with-our-txs is treated as `block_interval`
-    // seconds of node work.
-    let interval_secs = block_interval.as_secs_f64();
-    let peak_rate = rate_per_second(u64::from(peak_block.hit_count), block_interval);
-    let mean_rate = if interval_secs > 0.0 {
-        mean_count / interval_secs
-    } else {
-        0.0
-    };
-    let window_rate = rate_per_second(included, block_interval.saturating_mul(n_blocks));
-
-    println!(
-        "  peak per-block rate  = {} txs in block {}  =>  {peak_rate:.1} tx/s",
-        peak_block.hit_count, peak_block.block_num,
-    );
-    println!("  mean per-block rate  = {mean_count:.1} txs/block  =>  {mean_rate:.1} tx/s");
-    println!(
-        "  window-average TPS   = {included} included / ({n_blocks} blocks * {}) \
-         =>  {window_rate:.1} tx/s",
-        format_duration_secs(block_interval),
-    );
-
-    print_per_block_series(hits, Some(block_interval));
-
-    let mut lats = inclusion.inclusion_latencies.clone();
-    if let Some(p) = percentiles(&mut lats) {
-        println!(
-            "  inclusion latency (submit_ack -> block timestamp): mean={mean}  p50={p50}  p95={p95}  p99={p99}  max={max}",
-            mean = format_duration_secs(p.mean),
-            p50 = format_duration_secs(p.p50),
-            p95 = format_duration_secs(p.p95),
-            p99 = format_duration_secs(p.p99),
-            max = format_duration_secs(p.max),
-        );
-    }
-}
-
-/// Print a compact per-block series so the operator can eyeball the
-/// time-series shape (ramp, plateau, dip). Empty blocks in the scan range
-/// are intentionally omitted. If `block_interval` is `Some`, each line also
-/// shows the equivalent rate; if `None`, only the raw count.
-fn print_per_block_series(hits: &[BlockHit], block_interval: Option<Duration>) {
-    println!("  per-block series:");
-    for hit in hits {
-        match block_interval {
-            Some(interval) => {
-                let rate = rate_per_second(u64::from(hit.hit_count), interval);
-                println!(
-                    "    block {} (ts={}): {} txs   ({rate:.1} tx/s @ block_interval)",
-                    hit.block_num, hit.block_ts, hit.hit_count,
-                );
-            },
-            None => {
-                println!(
-                    "    block {} (ts={}): {} txs",
-                    hit.block_num, hit.block_ts, hit.hit_count,
-                );
+                submit::run(rpc_url, concurrency, wait_blocks).await;
             },
         }
     }
 }
 
-fn read_from_file<T: Deserializable>(path: &std::path::Path) -> T {
-    let bytes = fs_err::read(path).unwrap_or_else(|_| {
-        panic!("failed to read {} — run `create-proofs` first", path.display())
-    });
-    T::read_from_bytes(&bytes)
-        .unwrap_or_else(|_| panic!("failed to deserialize {}", path.display()))
-}
-
-fn write_to_file<T: Serializable>(path: &std::path::Path, value: &T) {
-    fs_err::write(path, value.to_bytes())
-        .unwrap_or_else(|err| panic!("failed to write {}: {err}", path.display()));
-}
-
-// SUBMISSION STATS
+// SHARED INFRA
 // ================================================================================================
 
-/// Outcome of a single `submit_proven_transaction` RPC.
-#[derive(Debug)]
-struct SubmitOutcome {
-    /// Position of this tx in the original input vec — used to recover the
-    /// corresponding `TransactionId` from the caller-owned id list.
-    index: usize,
-    /// `Ok(rpc_round_trip_duration)` on success, `Err(grpc_code)` on failure.
-    result: Result<Duration, tonic::Code>,
-    /// Wall-clock timestamp at which the RPC returned `Ok`. `None` on error.
-    /// Stored as `SystemTime` so it is directly comparable to block headers'
-    /// unix-second timestamps when computing inclusion latency.
-    ack_at: Option<SystemTime>,
-}
-
-/// Aggregated stats for one submission phase (mint or consume).
-#[derive(Debug)]
-struct PhaseStats {
-    /// Wall-clock duration of the entire phase.
-    elapsed: Duration,
-    /// One entry per input tx, aligned by `index`.
-    outcomes: Vec<SubmitOutcome>,
-}
-
-impl PhaseStats {
-    fn ok_count(&self) -> u64 {
-        self.outcomes.iter().filter(|o| o.result.is_ok()).count() as u64
-    }
-
-    fn err_count(&self) -> u64 {
-        self.outcomes.iter().filter(|o| o.result.is_err()).count() as u64
-    }
-
-    fn submit_latencies(&self) -> Vec<Duration> {
-        self.outcomes.iter().filter_map(|o| o.result.as_ref().ok().copied()).collect()
-    }
-
-    fn err_by_code(&self) -> HashMap<tonic::Code, u64> {
-        let mut map: HashMap<tonic::Code, u64> = HashMap::new();
-        for o in &self.outcomes {
-            if let Err(code) = o.result {
-                *map.entry(code).or_insert(0) += 1;
-            }
-        }
-        map
-    }
-}
-
-fn format_err_breakdown(by_code: HashMap<tonic::Code, u64>) -> String {
-    if by_code.is_empty() {
-        return "no errors".to_string();
-    }
-    let mut entries: Vec<(tonic::Code, u64)> = by_code.into_iter().collect();
-    entries.sort_by(|a, b| b.1.cmp(&a.1));
-    let parts: Vec<String> = entries.iter().map(|(c, n)| format!("{c:?}={n}")).collect();
-    parts.join(", ")
-}
-
-fn format_duration_ms(d: Duration) -> String {
-    format!("{:.1}ms", d.as_secs_f64() * 1000.0)
-}
-
-fn format_duration_secs(d: Duration) -> String {
-    format!("{:.2}s", d.as_secs_f64())
-}
-
-#[derive(Debug, Clone, Copy)]
-struct Percentiles {
-    mean: Duration,
-    p50: Duration,
-    p95: Duration,
-    p99: Duration,
-    max: Duration,
-}
-
-/// Returns `None` if there are no samples.
-fn percentiles(samples: &mut [Duration]) -> Option<Percentiles> {
-    if samples.is_empty() {
-        return None;
-    }
-    samples.sort();
-    let n = samples.len();
-    // Integer index for percentile `num/den`. Picked over an `f64` cast to
-    // avoid the cast_sign_loss / cast_precision_loss footguns.
-    let pick = |num: usize, den: usize| -> Duration {
-        let idx = (n * num / den).min(n - 1);
-        samples[idx]
-    };
-    let sum: Duration = samples.iter().copied().sum();
-    let mean = sum / u32::try_from(n).unwrap_or(u32::MAX);
-    Some(Percentiles {
-        mean,
-        p50: pick(50, 100),
-        p95: pick(95, 100),
-        p99: pick(99, 100),
-        max: *samples.last().unwrap(),
-    })
-}
-
-async fn submit_all(
-    client: RpcClient,
-    txs: Vec<ProvenTransaction>,
-    tx_inputs: Vec<Vec<u8>>,
-    concurrency: usize,
-) -> PhaseStats {
-    /// How many distinct error messages to surface to the console as they
-    /// happen. The full failure breakdown still appears in the summary.
-    const MAX_ERRORS_TO_PRINT: u64 = 5;
-
-    let start = Instant::now();
-    let semaphore = Arc::new(Semaphore::new(concurrency));
-    // Incrementing-only counter used purely to budget the live error prints.
-    // It is never read on the hot path, so it does not introduce any
-    // submit-side synchronization beyond what was already there.
-    let printed = Arc::new(AtomicU64::new(0));
-
-    let total = txs.len();
-    let mut set = tokio::task::JoinSet::new();
-    for (i, (tx, inputs)) in txs.into_iter().zip(tx_inputs.into_iter()).enumerate() {
-        let permit = semaphore.clone().acquire_owned().await.unwrap();
-        let mut client = client.clone();
-        let printed = printed.clone();
-        set.spawn(async move {
-            let request = proto::transaction::ProvenTransaction {
-                transaction: tx.to_bytes(),
-                transaction_inputs: Some(inputs),
-            };
-            let t0 = Instant::now();
-            let outcome = match client.submit_proven_transaction(request).await {
-                Ok(_) => SubmitOutcome {
-                    index: i,
-                    result: Ok(t0.elapsed()),
-                    ack_at: Some(SystemTime::now()),
-                },
-                Err(status) => {
-                    if printed.fetch_add(1, Ordering::Relaxed) < MAX_ERRORS_TO_PRINT {
-                        eprintln!(
-                            "  tx idx {i} failed: code={:?} message={}",
-                            status.code(),
-                            status.message()
-                        );
-                    }
-                    SubmitOutcome {
-                        index: i,
-                        result: Err(status.code()),
-                        ack_at: None,
-                    }
-                },
-            };
-            drop(permit);
-            outcome
-        });
-    }
-
-    // Outcomes carry their original `index`, so completion order is fine —
-    // downstream summarizers don't depend on the vec being in spawn order.
-    let mut outcomes = Vec::with_capacity(total);
-    while let Some(res) = set.join_next().await {
-        outcomes.push(res.expect("submission task panicked"));
-    }
-
-    PhaseStats { elapsed: start.elapsed(), outcomes }
-}
-
-/// Submit txs one at a time, awaiting each RPC response before sending the
-/// next. Used for the mint phase, where every tx mutates the shared faucet
-/// and therefore must arrive at the mempool in order — the block-producer's
-/// mempool will reject out-of-order submissions but happily chains in-order
-/// ones against its own pending state, so we only need to serialize the
-/// `submit_proven_transaction` calls themselves, not wait for block
-/// inclusion in between.
-async fn submit_sequential(
-    mut client: RpcClient,
-    txs: Vec<ProvenTransaction>,
-    tx_inputs: Vec<Vec<u8>>,
-) -> PhaseStats {
-    let start = Instant::now();
-    let total = txs.len();
-    let mut outcomes = Vec::with_capacity(total);
-
-    for (i, (tx, inputs)) in txs.into_iter().zip(tx_inputs.into_iter()).enumerate() {
-        let request = proto::transaction::ProvenTransaction {
-            transaction: tx.to_bytes(),
-            transaction_inputs: Some(inputs),
-        };
-
-        let t0 = Instant::now();
-        let outcome = match client.submit_proven_transaction(request).await {
-            Ok(_) => SubmitOutcome {
-                index: i,
-                result: Ok(t0.elapsed()),
-                ack_at: Some(SystemTime::now()),
-            },
-            Err(status) => {
-                eprintln!("  tx {} / {total} failed: {status}", i + 1);
-                SubmitOutcome {
-                    index: i,
-                    result: Err(status.code()),
-                    ack_at: None,
-                }
-            },
-        };
-        outcomes.push(outcome);
-    }
-
-    PhaseStats { elapsed: start.elapsed(), outcomes }
-}
-
-// INCLUSION CHECK
-// ================================================================================================
-
-/// One scanned block that contained at least one of our txs. Empty blocks
-/// in the scan range are not represented here.
-#[derive(Debug, Clone, Copy)]
-struct BlockHit {
-    /// On-chain block number.
-    block_num: u32,
-    /// Unix-seconds timestamp from the block header.
-    block_ts: u32,
-    /// Number of our txs included in this block.
-    hit_count: u32,
-}
-
-#[derive(Debug)]
-struct InclusionResult {
-    submitted_count: u64,
-    included_count: u64,
-    /// One entry per block in the scan range that included any of our txs,
-    /// in scan order. Throughput metrics are derived from this list plus
-    /// the block interval inferred from the scan span (see [`block_interval`]).
-    per_block_hits: Vec<BlockHit>,
-    /// For each successfully submitted tx that landed in a block: the
-    /// elapsed time from RPC ack to that block's header timestamp.
-    inclusion_latencies: Vec<Duration>,
-    /// Number of blocks the inclusion scan successfully read headers for.
-    scanned_block_count: u32,
-    /// Header timestamps of the first and last successfully scanned blocks
-    /// (unix seconds). Together with `scanned_block_count`, used to derive
-    /// the block interval at print time.
-    scanned_first_ts: u32,
-    scanned_last_ts: u32,
-}
-
-impl InclusionResult {
-    /// Derive the average block interval from the scan span. Returns `None`
-    /// when the scan touched fewer than two blocks or when all scanned
-    /// headers share the same 1-second-resolution timestamp (sub-second
-    /// cadence), in which case the bench cannot determine the interval
-    /// from headers alone.
-    fn derived_block_interval(&self) -> Option<Duration> {
-        if self.scanned_block_count < 2 || self.scanned_last_ts <= self.scanned_first_ts {
-            return None;
-        }
-        let span_secs = u64::from(self.scanned_last_ts - self.scanned_first_ts);
-        let intervals = u64::from(self.scanned_block_count - 1);
-        // f64 keeps the fractional seconds when the cadence is finer than 1s
-        // *and* the scan crosses enough one-second boundaries.
-        #[expect(
-            clippy::cast_precision_loss,
-            reason = "block counts and timestamp deltas are tiny in practice"
-        )]
-        let interval_secs = (span_secs as f64) / (intervals as f64);
-        Some(Duration::from_secs_f64(interval_secs))
-    }
-}
-
-/// Walk every block from `from_block` to `to_block` inclusive, deserialize
-/// it as a [`SignedBlock`], and check which of the submitted tx-ids appear
-/// in each block's transaction headers. Sequential because the volumes are
-/// small and the call is cheap.
-async fn compute_inclusion(
-    mut client: RpcClient,
-    from_block: u32,
-    to_block: u32,
-    mut ack_by_id: HashMap<TransactionId, SystemTime>,
-) -> InclusionResult {
-    let submitted_count = ack_by_id.len() as u64;
-    let mut included_count: u64 = 0;
-    let mut per_block_hits: Vec<BlockHit> = Vec::new();
-    let mut inclusion_latencies: Vec<Duration> = Vec::new();
-    let mut scanned_block_count: u32 = 0;
-    let mut scanned_first_ts: u32 = 0;
-    let mut scanned_last_ts: u32 = 0;
-
-    if from_block > to_block {
-        return InclusionResult {
-            submitted_count,
-            included_count,
-            per_block_hits,
-            inclusion_latencies,
-            scanned_block_count,
-            scanned_first_ts,
-            scanned_last_ts,
-        };
-    }
-
-    for block_num in from_block..=to_block {
-        let request = proto::blockchain::BlockRequest { block_num, include_proof: None };
-        let response = match client.get_block_by_number(request).await {
-            Ok(r) => r.into_inner(),
-            Err(status) => {
-                eprintln!(
-                    "  warning: get_block_by_number({block_num}) failed: {status} \
-                     — skipping this block in the inclusion scan"
-                );
-                continue;
-            },
-        };
-        let Some(bytes) = response.block else {
-            continue;
-        };
-        let signed_block = match SignedBlock::read_from_bytes(&bytes) {
-            Ok(sb) => sb,
-            Err(err) => {
-                eprintln!(
-                    "  warning: failed to deserialize SignedBlock for block {block_num}: {err}"
-                );
-                continue;
-            },
-        };
-
-        let block_ts = signed_block.header().timestamp();
-        let block_ts_system = UNIX_EPOCH + Duration::from_secs(u64::from(block_ts));
-
-        // Track scan span so we can derive the block interval at print time.
-        if scanned_block_count == 0 {
-            scanned_first_ts = block_ts;
-        }
-        scanned_last_ts = block_ts;
-        scanned_block_count += 1;
-
-        let mut hits_in_this_block: u32 = 0;
-
-        for header in signed_block.body().transactions().as_slice() {
-            if let Some(ack_at) = ack_by_id.remove(&header.id()) {
-                hits_in_this_block += 1;
-                included_count += 1;
-                // Block timestamps have 1-second resolution and may round
-                // down past the ack instant; clamp negative deltas to zero.
-                let latency = block_ts_system.duration_since(ack_at).unwrap_or_default();
-                inclusion_latencies.push(latency);
-            }
-        }
-
-        if hits_in_this_block > 0 {
-            per_block_hits.push(BlockHit {
-                block_num,
-                block_ts,
-                hit_count: hits_in_this_block,
-            });
-        }
-    }
-
-    InclusionResult {
-        submitted_count,
-        included_count,
-        per_block_hits,
-        inclusion_latencies,
-        scanned_block_count,
-        scanned_first_ts,
-        scanned_last_ts,
-    }
-}
-
-async fn current_block_height(mut client: RpcClient) -> u32 {
-    let response = client
-        .get_block_header_by_number(BlockHeaderByNumberRequest {
-            block_num: None,
-            include_mmr_proof: None,
-        })
-        .await
-        .expect("failed to fetch latest block header")
-        .into_inner();
-    let header: BlockHeader = response
-        .block_header
-        .expect("no block header in response")
-        .try_into()
-        .expect("failed to decode block header");
-    header.block_num().as_u32()
-}
-
-/// Wait until the chain has advanced by `n` blocks past whatever the current
-/// height is, then return. Used to give the block-producer time to include
-/// in-flight submissions without falsely waiting forever (the node produces
-/// empty blocks at a steady interval, so "no height change" never fires).
-async fn wait_for_n_blocks(client: RpcClient, n: u32) -> u32 {
-    let start_height = current_block_height(client.clone()).await;
-    let target = start_height + n;
-    let mut last = start_height;
-    loop {
-        tokio::time::sleep(Duration::from_millis(500)).await;
-        let h = current_block_height(client.clone()).await;
-        if h != last {
-            println!("  block height: {h}");
-            last = h;
-        }
-        if h >= target {
-            return h;
-        }
-    }
-}
-
 /// Create an RPC client configured with the correct genesis metadata in the
 /// `Accept` header so that write RPCs such as `SubmitProvenTransaction` are
 /// accepted by the node.
-pub async fn create_genesis_aware_rpc_client(
+pub(crate) async fn create_genesis_aware_rpc_client(
     rpc_url: &Url,
     timeout: Duration,
 ) -> Result<RpcClient> {
@@ -1109,178 +147,22 @@ pub async fn create_genesis_aware_rpc_client(
     Ok(rpc_client)
 }
 
-fn get_genesis_header_request() -> BlockHeaderByNumberRequest {
+pub(crate) fn get_genesis_header_request() -> BlockHeaderByNumberRequest {
     BlockHeaderByNumberRequest {
         block_num: Some(BlockNumber::GENESIS.as_u32()),
         include_mmr_proof: None,
     }
 }
 
-/// Creates a new faucet account and returns it alongside its secret key.
-fn create_faucet() -> (Account, SecretKey) {
-    let coin_seed: [u64; 4] = rand::rng().random();
-    let mut rng = RandomCoin::new(coin_seed.map(Felt::new).into());
-    let key_pair = SecretKey::with_rng(&mut rng);
-    let init_seed = [0_u8; 32];
-
-    let token_symbol = TokenSymbol::new("TEST").unwrap();
-    let token_metadata = FungibleTokenMetadata::builder(
-        TokenName::new("TEST").unwrap(),
-        token_symbol,
-        2,
-        FungibleAsset::MAX_AMOUNT,
-    )
-    .build()
-    .unwrap();
-    let faucet = AccountBuilder::new(init_seed)
-        .account_type(AccountType::FungibleFaucet)
-        .storage_mode(AccountStorageMode::Private)
-        .with_component(token_metadata)
-        .with_component(BasicFungibleFaucet)
-        .with_components(TokenPolicyManager::new(
-            PolicyAuthority::AuthControlled,
-            MintPolicyConfig::AllowAll,
-            BurnPolicyConfig::AllowAll,
-        ))
-        .with_auth_component(AuthSingleSig::new(
-            key_pair.public_key().into(),
-            AuthScheme::Falcon512Poseidon2,
-        ))
-        .build()
-        .unwrap();
-    (faucet, key_pair)
-}
-
-/// Creates a new wallet account with the given public key, using `index` to vary
-/// the init seed so each wallet ends up with a distinct account ID.
-fn create_wallet(
-    public_key: &miden_protocol::crypto::dsa::falcon512_poseidon2::PublicKey,
-    index: u64,
-) -> Account {
-    let init_seed: Vec<_> = index.to_be_bytes().into_iter().chain([0u8; 24]).collect();
-    AccountBuilder::new(init_seed.try_into().unwrap())
-        .account_type(AccountType::RegularAccountImmutableCode)
-        .storage_mode(AccountStorageMode::Private)
-        .with_auth_component(AuthSingleSig::new(
-            public_key.clone().into(),
-            AuthScheme::Falcon512Poseidon2,
-        ))
-        .with_component(BasicWallet)
-        .build()
-        .unwrap()
-}
-
-// BENCHMARK DATA STORE
-// ================================================================================================
-
-/// In-memory `DataStore` impl used to feed the [`TransactionExecutor`] when
-/// generating real proofs locally. Modelled on the network-monitor's
-/// `MonitorDataStore`.
-pub struct BenchmarkDataStore {
-    accounts: HashMap<AccountId, Account>,
-    block_header: BlockHeader,
-    partial_block_chain: PartialBlockchain,
-    mast_store: TransactionMastStore,
-}
-
-impl BenchmarkDataStore {
-    pub fn new(block_header: BlockHeader, partial_block_chain: PartialBlockchain) -> Self {
-        Self {
-            accounts: HashMap::new(),
-            block_header,
-            partial_block_chain,
-            mast_store: TransactionMastStore::new(),
-        }
-    }
-
-    pub fn add_account(&mut self, account: Account) {
-        self.mast_store.load_account_code(account.code());
-        self.accounts.insert(account.id(), account);
-    }
-
-    fn get_account(&self, account_id: AccountId) -> Result<&Account, DataStoreError> {
-        self.accounts.get(&account_id).ok_or_else(|| DataStoreError::Other {
-            error_msg: "unknown account".into(),
-            source: None,
-        })
-    }
-}
-
-impl DataStore for BenchmarkDataStore {
-    async fn get_transaction_inputs(
-        &self,
-        account_id: AccountId,
-        _block_refs: BTreeSet<BlockNumber>,
-    ) -> Result<(PartialAccount, BlockHeader, PartialBlockchain), DataStoreError> {
-        let account = self.get_account(account_id)?;
-        let partial_account = PartialAccount::from(account);
-        Ok((partial_account, self.block_header.clone(), self.partial_block_chain.clone()))
-    }
-
-    async fn get_storage_map_witness(
-        &self,
-        account_id: AccountId,
-        map_root: Word,
-        map_key: StorageMapKey,
-    ) -> Result<miden_protocol::account::StorageMapWitness, DataStoreError> {
-        let account = self.get_account(account_id)?;
-        for slot in account.storage().slots() {
-            if let miden_protocol::account::StorageSlotContent::Map(map) = slot.content() {
-                if map.root() == map_root {
-                    return Ok(map.open(&map_key));
-                }
-            }
-        }
-        Err(DataStoreError::Other {
-            error_msg: format!("no storage map with the requested root in account {account_id}")
-                .into(),
-            source: None,
-        })
-    }
-
-    async fn get_foreign_account_inputs(
-        &self,
-        _foreign_account_id: AccountId,
-        _ref_block: BlockNumber,
-    ) -> Result<AccountInputs, DataStoreError> {
-        unimplemented!("foreign account inputs are not needed for the benchmark")
-    }
-
-    async fn get_vault_asset_witnesses(
-        &self,
-        account_id: AccountId,
-        vault_root: Word,
-        vault_keys: BTreeSet<AssetVaultKey>,
-    ) -> Result<Vec<AssetWitness>, DataStoreError> {
-        let account = self.get_account(account_id)?;
-
-        if account.vault().root() != vault_root {
-            return Err(DataStoreError::Other {
-                error_msg: "vault root mismatch".into(),
-                source: None,
-            });
-        }
-
-        Result::<Vec<_>, _>::from_iter(vault_keys.into_iter().map(|vault_key| {
-            AssetWitness::new(account.vault().open(vault_key).into()).map_err(|err| {
-                DataStoreError::Other {
-                    error_msg: "failed to open vault asset tree".into(),
-                    source: Some(Box::new(err)),
-                }
-            })
-        }))
-    }
-
-    async fn get_note_script(
-        &self,
-        _script_root: NoteScriptRoot,
-    ) -> Result<Option<NoteScript>, DataStoreError> {
-        Ok(None)
-    }
+pub(crate) fn read_from_file<T: Deserializable>(path: &Path) -> T {
+    let bytes = fs_err::read(path).unwrap_or_else(|_| {
+        panic!("failed to read {} — run `create-proofs` first", path.display())
+    });
+    T::read_from_bytes(&bytes)
+        .unwrap_or_else(|_| panic!("failed to deserialize {}", path.display()))
 }
 
-impl MastForestStore for BenchmarkDataStore {
-    fn get(&self, procedure_hash: &Word) -> Option<Arc<MastForest>> {
-        self.mast_store.get(procedure_hash)
-    }
+pub(crate) fn write_to_file<T: Serializable>(path: &Path, value: &T) {
+    fs_err::write(path, value.to_bytes())
+        .unwrap_or_else(|err| panic!("failed to write {}: {err}", path.display()));
 }
diff --git a/bin/benchmark/src/rpc_state.rs b/bin/benchmark/src/rpc_state.rs
new file mode 100644
index 0000000000..f585c08238
--- /dev/null
+++ b/bin/benchmark/src/rpc_state.rs
@@ -0,0 +1,89 @@
+//! Thin-client state-fetch helpers used by `create_proofs::run`.
+//!
+//! These let the bench bind its proofs to the target node's actual chain
+//! state (chain MMR at the tip) instead of fabricating an empty
+//! `PartialBlockchain`. Without this, runs against any chain whose genesis
+//! state isn't minimal (testnet, devnet, any local node restored from
+//! a snapshot) fail with `AdviceError::MapKeyNotFound` during proof
+//! generation.
+
+use miden_node_proto::clients::RpcClient;
+use miden_node_proto::generated::rpc::sync_chain_mmr_request::UpperBound;
+use miden_node_proto::generated::rpc::{BlockHeaderByNumberRequest, SyncChainMmrRequest};
+use miden_protocol::block::BlockHeader;
+use miden_protocol::crypto::merkle::mmr::{MmrDelta, MmrPeaks, PartialMmr};
+use miden_protocol::transaction::PartialBlockchain;
+
+/// Fetch the header of the latest committed block from the target node.
+///
+/// `get_block_header_by_number(block_num=None)` returns the chain tip per
+/// the server's documented contract.
+pub(crate) async fn fetch_chain_tip_header(client: &mut RpcClient) -> BlockHeader {
+    let response = client
+        .get_block_header_by_number(BlockHeaderByNumberRequest {
+            block_num: None,
+            include_mmr_proof: None,
+        })
+        .await
+        .expect("failed to fetch chain tip header")
+        .into_inner();
+
+    response
+        .block_header
+        .expect("chain tip response missing block_header")
+        .try_into()
+        .expect("failed to decode chain tip block header")
+}
+
+/// Build a [`PartialBlockchain`] whose chain MMR matches the tip block's
+/// `chain_commitment`.
+///
+/// Construction is:
+///
+/// - `tip_block_num == 0` → empty MMR (chain at genesis has no prior blocks committed). No RPC
+///   calls.
+/// - `tip_block_num >= 1` → MMR starts empty, then the genesis block's commitment is added as leaf
+///   0 (this brings the local MMR's forest to 1, matching what the server expects as the caller's
+///   pre-state for `block_from = 0`).
+/// - `tip_block_num >= 2` → `sync_chain_mmr(block_from = 0, upper_bound = BlockNum(tip_block_num))`
+///   is called and the returned `MmrDelta` is applied, bringing the MMR's forest from 1 up to
+///   `tip_block_num`.
+///
+/// After this function returns, `partial_mmr.peaks().hash_peaks()` matches
+/// the tip block's `chain_commitment()`.
+pub(crate) async fn fetch_partial_blockchain(
+    client: &mut RpcClient,
+    tip_block_num: u32,
+    genesis_header: &BlockHeader,
+) -> PartialBlockchain {
+    let mut partial_mmr = PartialMmr::from_peaks(MmrPeaks::default());
+
+    if tip_block_num == 0 {
+        return PartialBlockchain::new(partial_mmr, Vec::new())
+            .expect("empty PartialBlockchain construction");
+    }
+
+    // Genesis is always leaf 0; this brings forest from 0 to 1.
+    partial_mmr.add(genesis_header.commitment(), false);
+
+    if tip_block_num >= 2 {
+        let request = SyncChainMmrRequest {
+            block_from: 0,
+            upper_bound: Some(UpperBound::BlockNum(tip_block_num)),
+        };
+        let response = client
+            .sync_chain_mmr(request)
+            .await
+            .expect("failed to call sync_chain_mmr")
+            .into_inner();
+        let mmr_delta_proto =
+            response.mmr_delta.expect("sync_chain_mmr response missing mmr_delta");
+        let mmr_delta: MmrDelta = mmr_delta_proto
+            .try_into()
+            .expect("failed to decode MmrDelta from sync_chain_mmr response");
+        partial_mmr.apply(mmr_delta).expect("failed to apply chain MMR delta");
+    }
+
+    PartialBlockchain::new(partial_mmr, Vec::new())
+        .expect("PartialBlockchain construction from fetched chain MMR")
+}
diff --git a/bin/benchmark/src/submit.rs b/bin/benchmark/src/submit.rs
new file mode 100644
index 0000000000..1f956ca955
--- /dev/null
+++ b/bin/benchmark/src/submit.rs
@@ -0,0 +1,267 @@
+//! The `run-benchmark` orchestrator and the submission RPC primitives.
+//!
+//! `run` is the top-level entry point invoked from `main::Cli::run` for the
+//! `RunBenchmark` subcommand. It owns the dance of loading the proven-tx
+//! bundle, submitting mints sequentially and consumes concurrently, waiting
+//! for the chain to advance, and handing off to [`crate::inclusion`] +
+//! [`crate::summary`] for the inclusion scan and the human-readable summary.
+//!
+//! The submission primitives ([`submit_all`], [`submit_sequential`]) and the
+//! aggregate types ([`SubmitOutcome`], [`PhaseStats`]) live here too because
+//! they're not used anywhere else — only `summary` reads `PhaseStats` and only
+//! by `&` reference.
+
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::time::{Duration, Instant, SystemTime};
+
+use miden_node_proto::clients::RpcClient;
+use miden_node_proto::generated as proto;
+use miden_protocol::transaction::{ProvenTransaction, TransactionId};
+use miden_protocol::utils::serde::Serializable;
+use tokio::sync::Semaphore;
+use url::Url;
+
+use crate::inclusion::{current_block_height, scan_with_drain};
+use crate::summary::{print_phase_progress, print_summary};
+use crate::{PROOFS_DIR, create_genesis_aware_rpc_client, read_from_file};
+
+// ORCHESTRATOR
+// ================================================================================================
+
+pub(crate) async fn run(rpc_url: Url, concurrency: usize, wait_blocks: u32) {
+    let in_dir = PathBuf::from(PROOFS_DIR);
+
+    println!("Loading mint txs from {}", in_dir.join("mint_txs.bin").display());
+    let mint_txs: Vec<ProvenTransaction> = read_from_file(&in_dir.join("mint_txs.bin"));
+    let mint_tx_inputs: Vec<Vec<u8>> = read_from_file(&in_dir.join("mint_tx_inputs.bin"));
+    assert_eq!(mint_txs.len(), mint_tx_inputs.len(), "mint tx/inputs length mismatch");
+
+    println!("Loading consume txs from {}", in_dir.join("consume_txs.bin").display());
+    let consume_txs: Vec<ProvenTransaction> = read_from_file(&in_dir.join("consume_txs.bin"));
+    let consume_tx_inputs: Vec<Vec<u8>> = read_from_file(&in_dir.join("consume_tx_inputs.bin"));
+    assert_eq!(consume_txs.len(), consume_tx_inputs.len(), "consume tx/inputs length mismatch");
+
+    // Compute the tx-id master lists up front so we can match them against
+    // on-chain block contents later, without having to interrogate the node.
+    let mint_ids: Vec<TransactionId> = mint_txs.iter().map(ProvenTransaction::id).collect();
+    let consume_ids: Vec<TransactionId> = consume_txs.iter().map(ProvenTransaction::id).collect();
+
+    println!("Connecting to {rpc_url}...");
+    let rpc_client = create_genesis_aware_rpc_client(&rpc_url, Duration::from_secs(30))
+        .await
+        .expect("failed to create RPC client");
+
+    let h_start = current_block_height(rpc_client.clone()).await;
+    println!("Chain height at start: {h_start}");
+
+    println!(
+        "Submitting {} mint txs sequentially (each one mutates the shared faucet, so the \
+         submits must be serialized for the mempool to chain them)...",
+        mint_txs.len()
+    );
+    let mint_stats = submit_sequential(rpc_client.clone(), mint_txs, mint_tx_inputs).await;
+    print_phase_progress("mint", &mint_stats);
+
+    println!("Submitting {} consume txs with concurrency={concurrency}...", consume_txs.len());
+    let consume_stats =
+        submit_all(rpc_client.clone(), consume_txs, consume_tx_inputs, concurrency).await;
+    print_phase_progress("consume", &consume_stats);
+
+    let ack_by_id = build_ack_map(&mint_ids, &mint_stats, &consume_ids, &consume_stats);
+    println!(
+        "Watching for inclusion of {} acked tx(s) (max {wait_blocks} blocks past height {h_start})...",
+        ack_by_id.len(),
+    );
+    let (h_final, inclusion) =
+        scan_with_drain(rpc_client.clone(), h_start, wait_blocks, ack_by_id).await;
+
+    print_summary(h_start, h_final, &mint_stats, &consume_stats, concurrency, &inclusion);
+}
+
+// SUBMISSION STATS
+// ================================================================================================
+
+/// Outcome of a single `submit_proven_transaction` RPC.
+#[derive(Debug)]
+pub(crate) struct SubmitOutcome {
+    /// Position of this tx in the original input vec — used to recover the
+    /// corresponding `TransactionId` from the caller-owned id list.
+    pub(crate) index: usize,
+    /// `Ok(rpc_round_trip_duration)` on success, `Err(grpc_code)` on failure.
+    pub(crate) result: Result<Duration, tonic::Code>,
+    /// Wall-clock timestamp at which the RPC returned `Ok`. `None` on error.
+    /// Stored as `SystemTime` so it is directly comparable to block headers'
+    /// unix-second timestamps when computing inclusion latency.
+    pub(crate) ack_at: Option<SystemTime>,
+}
+
+/// Aggregated stats for one submission phase (mint or consume).
+#[derive(Debug)]
+pub(crate) struct PhaseStats {
+    /// Wall-clock duration of the entire phase.
+    pub(crate) elapsed: Duration,
+    /// One entry per input tx, aligned by `index`.
+    pub(crate) outcomes: Vec<SubmitOutcome>,
+}
+
+impl PhaseStats {
+    pub(crate) fn ok_count(&self) -> u64 {
+        self.outcomes.iter().filter(|o| o.result.is_ok()).count() as u64
+    }
+
+    pub(crate) fn err_count(&self) -> u64 {
+        self.outcomes.iter().filter(|o| o.result.is_err()).count() as u64
+    }
+
+    pub(crate) fn submit_latencies(&self) -> Vec<Duration> {
+        self.outcomes.iter().filter_map(|o| o.result.as_ref().ok().copied()).collect()
+    }
+
+    pub(crate) fn err_by_code(&self) -> HashMap<tonic::Code, u64> {
+        let mut map: HashMap<tonic::Code, u64> = HashMap::new();
+        for o in &self.outcomes {
+            if let Err(code) = o.result {
+                *map.entry(code).or_insert(0) += 1;
+            }
+        }
+        map
+    }
+}
+
+// SUBMISSION PRIMITIVES
+// ================================================================================================
+
+async fn submit_all(
+    client: RpcClient,
+    txs: Vec<ProvenTransaction>,
+    tx_inputs: Vec<Vec<u8>>,
+    concurrency: usize,
+) -> PhaseStats {
+    /// How many distinct error messages to surface to the console as they
+    /// happen. The full failure breakdown still appears in the summary.
+    const MAX_ERRORS_TO_PRINT: u64 = 5;
+
+    let start = Instant::now();
+    let semaphore = Arc::new(Semaphore::new(concurrency));
+    // Incrementing-only counter used purely to budget the live error prints.
+    // It is never read on the hot path, so it does not introduce any
+    // submit-side synchronization beyond what was already there.
+    let printed = Arc::new(AtomicU64::new(0));
+
+    let total = txs.len();
+    let mut set = tokio::task::JoinSet::new();
+    for (i, (tx, inputs)) in txs.into_iter().zip(tx_inputs.into_iter()).enumerate() {
+        let permit = semaphore.clone().acquire_owned().await.unwrap();
+        let mut client = client.clone();
+        let printed = printed.clone();
+        set.spawn(async move {
+            let request = proto::transaction::ProvenTransaction {
+                transaction: tx.to_bytes(),
+                transaction_inputs: Some(inputs),
+            };
+            let t0 = Instant::now();
+            let outcome = match client.submit_proven_transaction(request).await {
+                Ok(_) => SubmitOutcome {
+                    index: i,
+                    result: Ok(t0.elapsed()),
+                    ack_at: Some(SystemTime::now()),
+                },
+                Err(status) => {
+                    if printed.fetch_add(1, Ordering::Relaxed) < MAX_ERRORS_TO_PRINT {
+                        eprintln!(
+                            "  tx idx {i} failed: code={:?} message={}",
+                            status.code(),
+                            status.message()
+                        );
+                    }
+                    SubmitOutcome {
+                        index: i,
+                        result: Err(status.code()),
+                        ack_at: None,
+                    }
+                },
+            };
+            drop(permit);
+            outcome
+        });
+    }
+
+    // Outcomes carry their original `index`, so completion order is fine —
+    // downstream summarizers don't depend on the vec being in spawn order.
+    let mut outcomes = Vec::with_capacity(total);
+    while let Some(res) = set.join_next().await {
+        outcomes.push(res.expect("submission task panicked"));
+    }
+
+    PhaseStats { elapsed: start.elapsed(), outcomes }
+}
+
+/// Submit txs one at a time, awaiting each RPC response before sending the
+/// next. Used for the mint phase, where every tx mutates the shared faucet
+/// and therefore must arrive at the mempool in order — the block-producer's
+/// mempool will reject out-of-order submissions but happily chains in-order
+/// ones against its own pending state, so we only need to serialize the
+/// `submit_proven_transaction` calls themselves, not wait for block
+/// inclusion in between.
+async fn submit_sequential(
+    mut client: RpcClient,
+    txs: Vec<ProvenTransaction>,
+    tx_inputs: Vec<Vec<u8>>,
+) -> PhaseStats {
+    let start = Instant::now();
+    let total = txs.len();
+    let mut outcomes = Vec::with_capacity(total);
+
+    for (i, (tx, inputs)) in txs.into_iter().zip(tx_inputs.into_iter()).enumerate() {
+        let request = proto::transaction::ProvenTransaction {
+            transaction: tx.to_bytes(),
+            transaction_inputs: Some(inputs),
+        };
+
+        let t0 = Instant::now();
+        let outcome = match client.submit_proven_transaction(request).await {
+            Ok(_) => SubmitOutcome {
+                index: i,
+                result: Ok(t0.elapsed()),
+                ack_at: Some(SystemTime::now()),
+            },
+            Err(status) => {
+                eprintln!("  tx {} / {total} failed: {status}", i + 1);
+                SubmitOutcome {
+                    index: i,
+                    result: Err(status.code()),
+                    ack_at: None,
+                }
+            },
+        };
+        outcomes.push(outcome);
+    }
+
+    PhaseStats { elapsed: start.elapsed(), outcomes }
+}
+
+/// Build a lookup from the on-chain `TransactionId` of every successfully
+/// submitted tx to the `SystemTime` at which the node ack'd its submission.
+/// Used by [`compute_inclusion`] to compute per-tx inclusion latency.
+fn build_ack_map(
+    mint_ids: &[TransactionId],
+    mint_stats: &PhaseStats,
+    consume_ids: &[TransactionId],
+    consume_stats: &PhaseStats,
+) -> HashMap<TransactionId, SystemTime> {
+    let mut map = HashMap::new();
+    for outcome in &mint_stats.outcomes {
+        if let Some(ack_at) = outcome.ack_at {
+            map.insert(mint_ids[outcome.index], ack_at);
+        }
+    }
+    for outcome in &consume_stats.outcomes {
+        if let Some(ack_at) = outcome.ack_at {
+            map.insert(consume_ids[outcome.index], ack_at);
+        }
+    }
+    map
+}
diff --git a/bin/benchmark/src/summary.rs b/bin/benchmark/src/summary.rs
new file mode 100644
index 0000000000..078280a3b5
--- /dev/null
+++ b/bin/benchmark/src/summary.rs
@@ -0,0 +1,314 @@
+//! Every line the bench prints to stdout, plus the formatting and metric
+//! helpers that only matter for output (percentiles, rate/percentage casts,
+//! duration formatters). Other modules pass `PhaseStats` and `InclusionResult`
+//! references in; this module never mutates or owns them.
+
+use std::collections::HashMap;
+use std::time::Duration;
+
+use crate::inclusion::{BlockHit, InclusionResult};
+use crate::submit::PhaseStats;
+
+// PROOF-GENERATION SUMMARY
+// ================================================================================================
+
+/// Prints a per-phase summary of how long proof generation took, broken down
+/// into the executor (VM execution) and prover (STARK proving) costs, plus the
+/// mean per tx for each so that runs of different sizes can be compared.
+pub(crate) fn print_proving_summary(
+    label: &str,
+    num_transactions: u64,
+    wall: Duration,
+    exec_total: Duration,
+    prove_total: Duration,
+) {
+    let n_u32 = u32::try_from(num_transactions).unwrap_or(u32::MAX);
+    let exec_mean = if num_transactions > 0 {
+        exec_total / n_u32
+    } else {
+        Duration::ZERO
+    };
+    let prove_mean = if num_transactions > 0 {
+        prove_total / n_u32
+    } else {
+        Duration::ZERO
+    };
+    let per_tx_mean = if num_transactions > 0 {
+        (exec_total + prove_total) / n_u32
+    } else {
+        Duration::ZERO
+    };
+    println!("{label} proving summary (n={num_transactions}):");
+    println!("  wall time:           {}", format_duration_secs(wall));
+    println!(
+        "  execute_transaction: total={}  mean={}/tx",
+        format_duration_secs(exec_total),
+        format_duration_secs(exec_mean),
+    );
+    println!(
+        "  prover.prove:        total={}  mean={}/tx",
+        format_duration_secs(prove_total),
+        format_duration_secs(prove_mean),
+    );
+    println!("  exec+prove per tx:   mean={}/tx", format_duration_secs(per_tx_mean));
+}
+
+// SUBMISSION SUMMARIES
+// ================================================================================================
+
+pub(crate) fn print_phase_progress(label: &str, stats: &PhaseStats) {
+    let elapsed = stats.elapsed.as_secs_f64();
+    let rate = rate_per_second(stats.ok_count(), stats.elapsed);
+    println!(
+        "  {label}: ok={ok} err={err} in {elapsed:.1}s ({rate:.1} tx/s ack rate)",
+        ok = stats.ok_count(),
+        err = stats.err_count(),
+    );
+}
+
+pub(crate) fn print_summary(
+    h_start: u32,
+    h_final: u32,
+    mint: &PhaseStats,
+    consume: &PhaseStats,
+    concurrency: usize,
+    inclusion: &InclusionResult,
+) {
+    println!();
+    println!("=== Summary ===");
+    println!(
+        "Chain height: {h_start} -> {h_final} ({} blocks, of which {} contained at least one of our txs)",
+        h_final - h_start,
+        inclusion.per_block_hits.len(),
+    );
+    println!();
+    print_phase_summary("Mint phase (sequential)", mint);
+    println!();
+    print_phase_summary(&format!("Consume phase (concurrent, c={concurrency})"), consume);
+    println!();
+    print_inclusion_summary(inclusion);
+}
+
+fn print_phase_summary(title: &str, stats: &PhaseStats) {
+    let ok = stats.ok_count();
+    let err = stats.err_count();
+    let elapsed = stats.elapsed.as_secs_f64();
+    let total = stats.outcomes.len() as u64;
+
+    println!("{title}:");
+    println!(
+        "  ok = {ok} / {total}   err = {err}   ({})",
+        format_err_breakdown(stats.err_by_code()),
+    );
+
+    let mut latencies = stats.submit_latencies();
+    if let Some(p) = percentiles(&mut latencies) {
+        println!(
+            "  submit RPC latency: mean={mean}  p50={p50}  p95={p95}  p99={p99}  max={max}",
+            mean = format_duration_ms(p.mean),
+            p50 = format_duration_ms(p.p50),
+            p95 = format_duration_ms(p.p95),
+            p99 = format_duration_ms(p.p99),
+            max = format_duration_ms(p.max),
+        );
+    } else {
+        println!("  submit RPC latency: (no successful submissions)");
+    }
+
+    let rate = rate_per_second(stats.ok_count(), stats.elapsed);
+    println!("  elapsed = {elapsed:.1}s,   RPC ack rate = {rate:.1} tx/s");
+}
+
+fn print_inclusion_summary(inclusion: &InclusionResult) {
+    let submitted = inclusion.submitted_count;
+    let included = inclusion.included_count;
+    let drop = submitted.saturating_sub(included);
+    let drop_pct = ratio_pct(drop, submitted);
+
+    println!("Inclusion (per-tx ID match against block contents):");
+    println!(
+        "  included = {included} / {submitted} submitted   ({drop} missing, {drop_pct:.1}% drop)",
+    );
+
+    let hits = &inclusion.per_block_hits;
+    if hits.is_empty() {
+        println!("  no blocks observed containing any of our txs");
+        return;
+    }
+
+    // Per-block aggregates.
+    let counts: Vec<u32> = hits.iter().map(|h| h.hit_count).collect();
+    let sum_counts: u32 = counts.iter().copied().sum();
+    let max_count = counts.iter().copied().max().unwrap_or(0);
+    let n_blocks = u32::try_from(counts.len()).unwrap_or(u32::MAX);
+    let mean_count = f64::from(sum_counts) / f64::from(n_blocks);
+
+    let peak_block = hits.iter().max_by_key(|h| h.hit_count).expect("non-empty hits");
+    let first_block = hits.first().expect("non-empty hits");
+    let last_block = hits.last().expect("non-empty hits");
+
+    println!(
+        "  blocks with our txs = {n_blocks} \
+         (block range {}..={}, mean txs/block when present = {mean_count:.1}, max = {max_count})",
+        first_block.block_num, last_block.block_num,
+    );
+
+    // Derive the block interval from consecutive scanned timestamps.
+    let Some(block_interval) = inclusion.derived_block_interval() else {
+        println!(
+            "  block interval: could not derive from {} scanned block(s) \
+             (need >=2 blocks spanning at least one second boundary)",
+            inclusion.scanned_block_count,
+        );
+        println!("  throughput metrics skipped; per-block series follows.");
+        print_per_block_series(hits, None);
+        return;
+    };
+
+    println!(
+        "  derived block interval = {} (from {} scanned blocks, span = {}s)",
+        format_duration_secs(block_interval),
+        inclusion.scanned_block_count,
+        inclusion.scanned_last_ts - inclusion.scanned_first_ts,
+    );
+
+    // Throughput. Each block-with-our-txs is treated as `block_interval`
+    // seconds of node work.
+    let interval_secs = block_interval.as_secs_f64();
+    let peak_rate = rate_per_second(u64::from(peak_block.hit_count), block_interval);
+    let mean_rate = if interval_secs > 0.0 {
+        mean_count / interval_secs
+    } else {
+        0.0
+    };
+    let window_rate = rate_per_second(included, block_interval.saturating_mul(n_blocks));
+
+    println!(
+        "  peak per-block rate  = {} txs in block {}  =>  {peak_rate:.1} tx/s",
+        peak_block.hit_count, peak_block.block_num,
+    );
+    println!("  mean per-block rate  = {mean_count:.1} txs/block  =>  {mean_rate:.1} tx/s");
+    println!(
+        "  window-average TPS   = {included} included / ({n_blocks} blocks * {}) \
+         =>  {window_rate:.1} tx/s",
+        format_duration_secs(block_interval),
+    );
+
+    print_per_block_series(hits, Some(block_interval));
+
+    let mut lats = inclusion.inclusion_latencies.clone();
+    if let Some(p) = percentiles(&mut lats) {
+        println!(
+            "  inclusion latency (submit_ack -> block timestamp): mean={mean}  p50={p50}  p95={p95}  p99={p99}  max={max}",
+            mean = format_duration_secs(p.mean),
+            p50 = format_duration_secs(p.p50),
+            p95 = format_duration_secs(p.p95),
+            p99 = format_duration_secs(p.p99),
+            max = format_duration_secs(p.max),
+        );
+    }
+}
+
+/// Print a compact per-block series so the operator can eyeball the
+/// time-series shape (ramp, plateau, dip). Empty blocks in the scan range
+/// are intentionally omitted. If `block_interval` is `Some`, each line also
+/// shows the equivalent rate; if `None`, only the raw count.
+fn print_per_block_series(hits: &[BlockHit], block_interval: Option<Duration>) {
+    println!("  per-block series:");
+    for hit in hits {
+        match block_interval {
+            Some(interval) => {
+                let rate = rate_per_second(u64::from(hit.hit_count), interval);
+                println!(
+                    "    block {} (ts={}): {} txs   ({rate:.1} tx/s @ block_interval)",
+                    hit.block_num, hit.block_ts, hit.hit_count,
+                );
+            },
+            None => {
+                println!(
+                    "    block {} (ts={}): {} txs",
+                    hit.block_num, hit.block_ts, hit.hit_count,
+                );
+            },
+        }
+    }
+}
+
+// METRIC HELPERS
+// ================================================================================================
+
+/// Computes `count / elapsed`, treating a zero-or-negative elapsed window as
+/// zero. Wrapping the cast in a helper keeps the precision-loss expect tightly
+/// scoped — the loss is harmless for display purposes.
+#[expect(
+    clippy::cast_precision_loss,
+    reason = "presentational rate; precision loss past 2^52 events is irrelevant"
+)]
+fn rate_per_second(count: u64, elapsed: Duration) -> f64 {
+    let secs = elapsed.as_secs_f64();
+    if secs > 0.0 { (count as f64) / secs } else { 0.0 }
+}
+
+/// Computes `100 * num / den` as a percentage, returning 0 when `den == 0`.
+#[expect(
+    clippy::cast_precision_loss,
+    reason = "presentational percentage; precision loss past 2^52 is irrelevant"
+)]
+fn ratio_pct(num: u64, den: u64) -> f64 {
+    if den == 0 {
+        0.0
+    } else {
+        (num as f64) * 100.0 / (den as f64)
+    }
+}
+
+fn format_err_breakdown(by_code: HashMap<tonic::Code, u64>) -> String {
+    if by_code.is_empty() {
+        return "no errors".to_string();
+    }
+    let mut entries: Vec<(tonic::Code, u64)> = by_code.into_iter().collect();
+    entries.sort_by(|a, b| b.1.cmp(&a.1));
+    let parts: Vec<String> = entries.iter().map(|(c, n)| format!("{c:?}={n}")).collect();
+    parts.join(", ")
+}
+
+fn format_duration_ms(d: Duration) -> String {
+    format!("{:.1}ms", d.as_secs_f64() * 1000.0)
+}
+
+fn format_duration_secs(d: Duration) -> String {
+    format!("{:.2}s", d.as_secs_f64())
+}
+
+#[derive(Debug, Clone, Copy)]
+struct Percentiles {
+    mean: Duration,
+    p50: Duration,
+    p95: Duration,
+    p99: Duration,
+    max: Duration,
+}
+
+/// Returns `None` if there are no samples.
+fn percentiles(samples: &mut [Duration]) -> Option<Percentiles> {
+    if samples.is_empty() {
+        return None;
+    }
+    samples.sort();
+    let n = samples.len();
+    // Integer index for percentile `num/den`. Picked over an `f64` cast to
+    // avoid the cast_sign_loss / cast_precision_loss footguns.
+    let pick = |num: usize, den: usize| -> Duration {
+        let idx = (n * num / den).min(n - 1);
+        samples[idx]
+    };
+    let sum: Duration = samples.iter().copied().sum();
+    let mean = sum / u32::try_from(n).unwrap_or(u32::MAX);
+    Some(Percentiles {
+        mean,
+        p50: pick(50, 100),
+        p95: pick(95, 100),
+        p99: pick(99, 100),
+        max: *samples.last().unwrap(),
+    })
+}

From fcad87ceea1cac81fe4d68eeaf3117e1e251628c Mon Sep 17 00:00:00 2001
From: SantiagoPittella <pittellasantiago@gmail.com>
Date: Wed, 13 May 2026 17:01:20 -0300
Subject: [PATCH 7/9] docs: add chagnglog entry

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a53aa2aa4..10557e1b85 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,8 @@
 - [BREAKING] Removed `miden-node ntx-builder` subcommand and created a separate `miden-ntx-builder` binary ([#2067](https://github.com/0xMiden/node/pull/2067)).
 - [BREAKING] Reworked note proto types for multi-attachment support: `NoteMetadata` now carries `attachment_schemes` (repeated) and `attachments_commitment` instead of a single `attachment`. `Note` and `NetworkNote` gained an `attachments` field. `NoteSyncRecord` now embeds full `NoteMetadata` instead of `NoteMetadataHeader`. Removed `NoteAttachmentKind` enum and `NoteMetadataHeader` message ([#2078](https://github.com/0xMiden/node/pull/2078)).
 - [BREAKING] Changed `SyncChainMmr` endpoint: the upper end of the block range we're syncing is now the chain tip with the requested finality level. Validator signature is also returned ([#2075](https://github.com/0xMiden/node/pull/2075)).
+- Added `miden-benchmark` binary for end-to-end TPS measurements. `create-proofs` generates locally-proven mint/consume transaction pairs bound to the target node's chain tip; `run-benchmark` submits the bundle and reports peak/mean/window-average TPS plus inclusion latency, all derived from block-header data ([#2073](https://github.com/0xMiden/node/pull/2073)).
+- Added `--batch.workers` flag (env `MIDEN_NODE_BLOCK_PRODUCER_BATCH_WORKERS`) to the block-producer to make the previously-hardcoded batch-builder worker pool size configurable; default remains 2 ([#2073](https://github.com/0xMiden/node/pull/2073)).
 
 ## v0.14.10 (2026-05-29)
 

From 2f3425dca5ceee9292cd2a13cddaaad525996eb2 Mon Sep 17 00:00:00 2001
From: SantiagoPittella <pittellasantiago@gmail.com>
Date: Fri, 15 May 2026 15:23:58 -0300
Subject: [PATCH 8/9] add remote prover support

---
 Cargo.lock                         |   1 +
 bin/benchmark/Cargo.toml           |  25 +--
 bin/benchmark/README.md            |  44 ++++-
 bin/benchmark/src/create_proofs.rs | 109 +++++++++---
 bin/benchmark/src/main.rs          |  17 +-
 bin/benchmark/src/prover.rs        | 274 +++++++++++++++++++++++++++++
 6 files changed, 422 insertions(+), 48 deletions(-)
 create mode 100644 bin/benchmark/src/prover.rs

diff --git a/Cargo.lock b/Cargo.lock
index 2b10a15d1a..b28a6c7d75 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2977,6 +2977,7 @@ dependencies = [
  "fs-err",
  "miden-node-proto",
  "miden-protocol",
+ "miden-remote-prover-client",
  "miden-standards",
  "miden-tx",
  "rand 0.9.2",
diff --git a/bin/benchmark/Cargo.toml b/bin/benchmark/Cargo.toml
index c8cc1f04c5..85fd0c49d8 100644
--- a/bin/benchmark/Cargo.toml
+++ b/bin/benchmark/Cargo.toml
@@ -17,15 +17,16 @@ version.workspace      = true
 workspace = true
 
 [dependencies]
-anyhow           = { workspace = true }
-clap             = { features = ["env", "string"], workspace = true }
-fs-err           = { workspace = true }
-miden-node-proto = { workspace = true }
-miden-protocol   = { features = ["std", "testing"], workspace = true }
-miden-standards  = { workspace = true }
-miden-tx         = { features = ["concurrent", "std"], workspace = true }
-rand             = { workspace = true }
-rayon            = { workspace = true }
-tokio            = { features = ["full"], workspace = true }
-tonic            = { workspace = true }
-url              = { features = ["serde"], workspace = true }
+anyhow                     = { workspace = true }
+clap                       = { features = ["env", "string"], workspace = true }
+fs-err                     = { workspace = true }
+miden-node-proto           = { workspace = true }
+miden-protocol             = { features = ["std", "testing"], workspace = true }
+miden-remote-prover-client = { features = ["tx-prover"], workspace = true }
+miden-standards            = { workspace = true }
+miden-tx                   = { features = ["concurrent", "std"], workspace = true }
+rand                       = { workspace = true }
+rayon                      = { workspace = true }
+tokio                      = { features = ["full"], workspace = true }
+tonic                      = { workspace = true }
+url                        = { features = ["serde"], workspace = true }
diff --git a/bin/benchmark/README.md b/bin/benchmark/README.md
index 847fee0188..9f4fc07c32 100644
--- a/bin/benchmark/README.md
+++ b/bin/benchmark/README.md
@@ -7,11 +7,13 @@ A binary for measuring transaction throughput on a Miden node by submitting loca
 End-to-end benchmarking is split into two phases because of proof generation is expensive and shouldn't be on the critical path of the throughput measurement:
 
 1. **`create-proofs`**: Generates a faucet, N wallets, and `2 * N` proven
-   transactions (one mint and one consume per wallet). Each proof is produced
-   locally with `LocalTransactionProver` and is bound to the chain state of
-   the target node at the moment of generation (genesis commitment, reference
-   block, initial account commitments, input note nullifiers). The bundle is
-   written to `./benchmark-proofs/` as serialized blobs.
+   transactions (one mint and one consume per wallet). By default each proof
+   is produced locally with `LocalTransactionProver`; pass
+   `--remote-prover-url` to offload proving to a remote prover (see [Using a
+   remote prover](#using-a-remote-prover)). Each proof is bound to the chain
+   state of the target node at the moment of generation (genesis commitment,
+   reference block, initial account commitments, input note nullifiers). The
+   bundle is written to `./benchmark-proofs/` as serialized blobs.
 2. **`run-benchmark`**: Loads the bundle from disk and submits it to the
    node's RPC. Mints are submitted sequentially (each mutates the shared
    faucet, so order matters) and consumes are submitted with bounded
@@ -53,6 +55,38 @@ miden-benchmark run-benchmark \
 
 Mints go in sequentially, then consumes with the requested concurrency, then the run waits `--wait-blocks` blocks before scanning for inclusion. Per-phase ack rate, RPC latency percentiles, inclusion rate, and inclusion TPS are printed at the end.
 
+### Using a remote prover
+
+Pass `--remote-prover-url` to `create-proofs` to offload STARK proving to a
+remote prover instead of producing proofs locally:
+
+```sh
+miden-benchmark create-proofs \
+  --rpc-url           http://127.0.0.1:57291 \
+  --num-transactions  100 \
+  --remote-prover-url http://prover.example.com:50051
+```
+
+The benchmark paces proving requests so that an autoscaling prover fleet has
+time to spin up additional workers before being saturated:
+
+- Dispatch starts at **1 req/s** and bumps by 1 req/s every **3 minutes**, up
+  to **10 req/s**, and then holds at 10 req/s for the rest of the run.
+- Up to **64** proving requests may be in flight at once (independent of the
+  rate cap).
+- A retryable gRPC error from the prover (`ResourceExhausted`,
+  `Unavailable`, `DeadlineExceeded`, or any transport-level failure) **freezes
+  the ramp** at the current step for the rest of the run, and the failing
+  request is retried with exponential backoff (500ms x 2**n, capped at 30s, up
+  to 10 attempts).
+- If the prover URL is unreachable or a non-retryable error is returned,
+  `create-proofs` exits with a non-zero status after the retry budget is
+  exhausted.
+
+Mint executions remain sequential (each mint mutates the shared faucet, so
+ordering matters), but proving runs concurrently under the rate limiter.
+Consume executions are also serial today, with concurrent proving.
+
 ## Re-using proofs across runs
 
 A `ProvenTransaction` is pinned to the chain state it was generated against:
diff --git a/bin/benchmark/src/create_proofs.rs b/bin/benchmark/src/create_proofs.rs
index 7dd34e7f02..25e93363a2 100644
--- a/bin/benchmark/src/create_proofs.rs
+++ b/bin/benchmark/src/create_proofs.rs
@@ -53,7 +53,6 @@ use miden_tx::auth::BasicAuthenticator;
 use miden_tx::{
     DataStore,
     DataStoreError,
-    LocalTransactionProver,
     MastForestStore,
     TransactionExecutor,
     TransactionMastStore,
@@ -62,6 +61,7 @@ use rand::Rng;
 use rayon::prelude::*;
 use url::Url;
 
+use crate::prover::BenchmarkProver;
 use crate::rpc_state::{fetch_chain_tip_header, fetch_partial_blockchain};
 use crate::summary::print_proving_summary;
 use crate::{
@@ -71,6 +71,42 @@ use crate::{
     write_to_file,
 };
 
+// PROVING TASK HELPERS
+// ================================================================================================
+
+/// Result of a single spawned proving task: the proof attempt and the wall
+/// time that task spent (which, for the remote path, includes rate-limit and
+/// retry waits).
+type ProveOutcome = (anyhow::Result<ProvenTransaction>, Duration);
+
+/// Await every spawned proving task in spawn order, returning the proofs in
+/// that same order plus the summed per-task wall time. If any task fails (or
+/// panics) we print the error and exit with a non-zero status. Proven txs
+/// later in the bundle reference earlier ones, so a single failure means the
+/// bundle is unusable anyway.
+async fn collect_proofs(
+    label: &str,
+    tasks: Vec<tokio::task::JoinHandle<ProveOutcome>>,
+) -> (Vec<ProvenTransaction>, Duration) {
+    let mut proofs = Vec::with_capacity(tasks.len());
+    let mut total = Duration::ZERO;
+    for (i, handle) in tasks.into_iter().enumerate() {
+        let (result, elapsed) = handle.await.unwrap_or_else(|err| {
+            eprintln!("{label} proving task {i} panicked: {err}");
+            std::process::exit(1);
+        });
+        total += elapsed;
+        match result {
+            Ok(tx) => proofs.push(tx),
+            Err(err) => {
+                eprintln!("{label} proving failed for tx {i}: {err:#}");
+                std::process::exit(1);
+            },
+        }
+    }
+    (proofs, total)
+}
+
 // ORCHESTRATOR
 // ================================================================================================
 
@@ -79,7 +115,7 @@ use crate::{
     reason = "single linear orchestration of genesis fetch + mint phase + consume phase; \
               splitting would just shuffle locals (faucet, data_store, authenticator) around"
 )]
-pub(crate) async fn run(rpc_url: Url, num_transactions: u64) {
+pub(crate) async fn run(rpc_url: Url, num_transactions: u64, remote_prover_url: Option<String>) {
     let mut rpc_client = create_genesis_aware_rpc_client(&rpc_url, Duration::from_secs(10))
         .await
         .unwrap();
@@ -128,17 +164,25 @@ pub(crate) async fn run(rpc_url: Url, num_transactions: u64) {
         AuthSecretKey::Falcon512Poseidon2(wallet_secret_key),
     ]);
 
-    let prover = LocalTransactionProver::default();
+    let prover = Arc::new(match remote_prover_url {
+        Some(url) => {
+            println!("Using remote prover at {url} (rate-limited ramp from 1 to 10 req/s).");
+            BenchmarkProver::remote(url)
+        },
+        None => BenchmarkProver::local(),
+    });
     let faucet_id = faucet.id();
 
-    // Mint phase — sequential because each mint mutates the faucet.
-    println!("Proving {num_transactions} mint transactions (sequential)...");
-    let mut mint_txs: Vec<ProvenTransaction> = Vec::with_capacity(num_transactions as usize);
+    // Mint phase: executions are sequential (each mutates the shared faucet),
+    // but proving runs concurrently on the prover (under the rate limiter when
+    // remote).
+    println!("Executing {num_transactions} mint transactions (sequential)...");
+    let mut mint_tasks: Vec<tokio::task::JoinHandle<ProveOutcome>> =
+        Vec::with_capacity(num_transactions as usize);
     let mut mint_tx_inputs: Vec<Vec<u8>> = Vec::with_capacity(num_transactions as usize);
     let mut mint_notes: Vec<Note> = Vec::with_capacity(num_transactions as usize);
     let mint_phase_start = Instant::now();
     let mut mint_exec_total = Duration::ZERO;
-    let mut mint_prove_total = Duration::ZERO;
 
     for index in 0..num_transactions {
         let wallet_id = wallets[index as usize].id();
@@ -179,14 +223,11 @@ pub(crate) async fn run(rpc_url: Url, num_transactions: u64) {
         let tx_inputs_bytes = executed_tx.tx_inputs().to_bytes();
         let delta = executed_tx.account_delta().clone();
 
-        let prove_t0 = Instant::now();
-        let proven_tx = prover.prove(executed_tx).await.expect("failed to prove mint transaction");
-        mint_prove_total += prove_t0.elapsed();
-
-        // Evolve the faucet state for the next iteration. The first mint of a
-        // never-before-seen account produces a full-state delta (because the
-        // delta carries the freshly deployed code); subsequent mints produce
-        // partial-state deltas that can be applied incrementally.
+        // Evolve the faucet state for the next iteration before we hand the
+        // executed tx off for proving. The first mint of a never-before-seen
+        // account produces a full-state delta (because the delta carries the
+        // freshly deployed code); subsequent mints produce partial-state
+        // deltas that can be applied incrementally.
         if delta.is_full_state() {
             faucet = Account::try_from(&delta)
                 .expect("failed to materialize faucet from full-state delta");
@@ -195,14 +236,22 @@ pub(crate) async fn run(rpc_url: Url, num_transactions: u64) {
         }
         data_store.add_account(faucet.clone());
 
-        mint_txs.push(proven_tx);
+        let prover = Arc::clone(&prover);
+        mint_tasks.push(tokio::spawn(async move {
+            let prove_t0 = Instant::now();
+            let result = prover.prove(executed_tx).await;
+            (result, prove_t0.elapsed())
+        }));
         mint_tx_inputs.push(tx_inputs_bytes);
         mint_notes.push(note);
 
         if (index + 1) % 10 == 0 || index + 1 == num_transactions {
-            println!("  proved {} / {num_transactions} mint txs", index + 1);
+            println!("  executed {} / {num_transactions} mint txs", index + 1);
         }
     }
+
+    println!("Awaiting {num_transactions} mint proofs...");
+    let (mint_txs, mint_prove_total) = collect_proofs("mint", mint_tasks).await;
     let mint_phase_elapsed = mint_phase_start.elapsed();
     print_proving_summary(
         "Mint",
@@ -212,14 +261,13 @@ pub(crate) async fn run(rpc_url: Url, num_transactions: u64) {
         mint_prove_total,
     );
 
-    // Consume phase — also sequential for now (each tx is one wallet, independent
-    // wallets, so this could be parallelized later with bounded concurrency).
-    println!("Proving {num_transactions} consume transactions (sequential)...");
-    let mut consume_txs: Vec<ProvenTransaction> = Vec::with_capacity(num_transactions as usize);
+    // Consume phase — same shape: sequential executions, concurrent proving.
+    println!("Executing {num_transactions} consume transactions (sequential)...");
+    let mut consume_tasks: Vec<tokio::task::JoinHandle<ProveOutcome>> =
+        Vec::with_capacity(num_transactions as usize);
     let mut consume_tx_inputs: Vec<Vec<u8>> = Vec::with_capacity(num_transactions as usize);
     let consume_phase_start = Instant::now();
     let mut consume_exec_total = Duration::ZERO;
-    let mut consume_prove_total = Duration::ZERO;
 
     for index in 0..num_transactions {
         let wallet_id = wallets[index as usize].id();
@@ -243,18 +291,21 @@ pub(crate) async fn run(rpc_url: Url, num_transactions: u64) {
 
         let tx_inputs_bytes = executed_tx.tx_inputs().to_bytes();
 
-        let prove_t0 = Instant::now();
-        let proven_tx =
-            prover.prove(executed_tx).await.expect("failed to prove consume transaction");
-        consume_prove_total += prove_t0.elapsed();
-
-        consume_txs.push(proven_tx);
+        let prover = Arc::clone(&prover);
+        consume_tasks.push(tokio::spawn(async move {
+            let prove_t0 = Instant::now();
+            let result = prover.prove(executed_tx).await;
+            (result, prove_t0.elapsed())
+        }));
         consume_tx_inputs.push(tx_inputs_bytes);
 
         if (index + 1) % 10 == 0 || index + 1 == num_transactions {
-            println!("  proved {} / {num_transactions} consume txs", index + 1);
+            println!("  executed {} / {num_transactions} consume txs", index + 1);
         }
     }
+
+    println!("Awaiting {num_transactions} consume proofs...");
+    let (consume_txs, consume_prove_total) = collect_proofs("consume", consume_tasks).await;
     let consume_phase_elapsed = consume_phase_start.elapsed();
     print_proving_summary(
         "Consume",
diff --git a/bin/benchmark/src/main.rs b/bin/benchmark/src/main.rs
index f13e33df99..d2cad3b7e8 100644
--- a/bin/benchmark/src/main.rs
+++ b/bin/benchmark/src/main.rs
@@ -18,6 +18,7 @@ use url::Url;
 
 mod create_proofs;
 mod inclusion;
+mod prover;
 mod rpc_state;
 mod submit;
 mod summary;
@@ -49,6 +50,14 @@ pub enum Command {
         /// pair takes seconds of real STARK proving, so start small.
         #[arg(long, default_value_t = 10)]
         num_transactions: u64,
+        /// If set, proofs are produced by the remote prover at this URL
+        /// instead of locally. Dispatch is rate-limited: starts at 1 req/s,
+        /// bumps by 1 req/s every 3 minutes up to 10 req/s, and freezes at
+        /// the current step if the prover returns a retryable error
+        /// (resource-exhausted, unavailable, or deadline-exceeded). If unset,
+        /// proving runs locally with `LocalTransactionProver`.
+        #[arg(long)]
+        remote_prover_url: Option<String>,
     },
     RunBenchmark {
         /// RPC endpoint of the target miden node.
@@ -76,8 +85,12 @@ async fn main() {
 impl Cli {
     async fn run(self) {
         match self.command {
-            Command::CreateProofs { rpc_url, num_transactions } => {
-                create_proofs::run(rpc_url, num_transactions).await;
+            Command::CreateProofs {
+                rpc_url,
+                num_transactions,
+                remote_prover_url,
+            } => {
+                create_proofs::run(rpc_url, num_transactions, remote_prover_url).await;
             },
             Command::RunBenchmark { rpc_url, concurrency, wait_blocks } => {
                 submit::run(rpc_url, concurrency, wait_blocks).await;
diff --git a/bin/benchmark/src/prover.rs b/bin/benchmark/src/prover.rs
new file mode 100644
index 0000000000..de258b4f3a
--- /dev/null
+++ b/bin/benchmark/src/prover.rs
@@ -0,0 +1,274 @@
+//! Pluggable prover for the `create-proofs` orchestrator.
+//!
+//! - [`BenchmarkProver::Local`] keeps the current `LocalTransactionProver` path (the default when
+//!   `--remote-prover-url` is not set).
+//! - [`BenchmarkProver::Remote`] talks to a deployed remote prover. To avoid slamming an
+//!   autoscaling fleet at t=0, requests are paced by a [`RampingRateLimiter`] that starts at
+//!   [`START_RATE`] rps and bumps by 1 rps every [`STEP_DURATION`] until it hits [`MAX_RATE`].
+//!   Retryable gRPC errors (resource-exhausted, unavailable, deadline-exceeded, or any
+//!   transport-level failure) freeze the ramp at the current step for the rest of the run.
+
+use std::error::Error as _;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicU32, Ordering};
+use std::time::{Duration, Instant};
+
+use anyhow::Result;
+use miden_protocol::transaction::{ExecutedTransaction, ProvenTransaction, TransactionInputs};
+use miden_remote_prover_client::RemoteTransactionProver;
+use miden_tx::{LocalTransactionProver, TransactionProverError};
+use tokio::sync::{Mutex, Semaphore};
+
+// SCHEDULE CONSTANTS
+// ================================================================================================
+
+const START_RATE: u32 = 1;
+const MAX_RATE: u32 = 10;
+const STEP_DURATION: Duration = Duration::from_secs(180);
+
+/// Per-request gRPC deadline. STARK transaction proofs routinely exceed the
+/// default 10s, so we override it. Anything past this is treated as a
+/// retryable error.
+const PROVE_TIMEOUT: Duration = Duration::from_secs(120);
+
+/// Cap on the number of proving requests in flight at once, independent of the
+/// rate. At [`MAX_RATE`] with ~30s proof latency we'd otherwise stack hundreds
+/// of in-flight requests.
+const MAX_IN_FLIGHT: usize = 64;
+
+// RETRY CONSTANTS
+// ================================================================================================
+
+const RETRY_BASE: Duration = Duration::from_millis(500);
+const RETRY_MAX_BACKOFF: Duration = Duration::from_secs(30);
+const RETRY_MAX_ATTEMPTS: u32 = 10;
+const RETRY_BACKOFF_SHIFT_CAP: u32 = 6;
+
+// BENCHMARK PROVER
+// ================================================================================================
+
+pub(crate) enum BenchmarkProver {
+    Local(LocalTransactionProver),
+    Remote {
+        prover: RemoteTransactionProver,
+        limiter: Arc<RampingRateLimiter>,
+        permits: Arc<Semaphore>,
+    },
+}
+
+impl BenchmarkProver {
+    pub(crate) fn local() -> Self {
+        Self::Local(LocalTransactionProver::default())
+    }
+
+    pub(crate) fn remote(endpoint: String) -> Self {
+        let prover = RemoteTransactionProver::new(endpoint).with_timeout(PROVE_TIMEOUT);
+        Self::Remote {
+            prover,
+            limiter: Arc::new(RampingRateLimiter::new()),
+            permits: Arc::new(Semaphore::new(MAX_IN_FLIGHT)),
+        }
+    }
+
+    /// Prove the given executed transaction. The remote path paces dispatch
+    /// through the rate limiter and retries retryable errors with exponential
+    /// backoff; the local path runs the in-process prover directly.
+    pub(crate) async fn prove(
+        &self,
+        executed_tx: ExecutedTransaction,
+    ) -> Result<ProvenTransaction> {
+        match self {
+            Self::Local(prover) => prover
+                .prove(executed_tx)
+                .await
+                .map_err(|err| anyhow::anyhow!("local proving failed: {err}")),
+            Self::Remote { prover, limiter, permits } => {
+                let tx_inputs: TransactionInputs = executed_tx.into();
+                prove_remote_with_retry(prover, limiter, permits, &tx_inputs).await
+            },
+        }
+    }
+}
+
+async fn prove_remote_with_retry(
+    prover: &RemoteTransactionProver,
+    limiter: &Arc<RampingRateLimiter>,
+    permits: &Arc<Semaphore>,
+    tx_inputs: &TransactionInputs,
+) -> Result<ProvenTransaction> {
+    // Hold one in-flight permit across every retry so the concurrency cap
+    // accounts for slow-but-still-progressing requests.
+    let _permit = permits
+        .clone()
+        .acquire_owned()
+        .await
+        .expect("in-flight semaphore is never closed");
+
+    let mut attempt: u32 = 0;
+    loop {
+        limiter.acquire().await;
+        match prover.prove(tx_inputs).await {
+            Ok(tx) => return Ok(tx),
+            Err(err) => {
+                if !is_retryable(&err) {
+                    return Err(anyhow::anyhow!("remote proving failed: {err}"));
+                }
+                limiter.freeze();
+                attempt += 1;
+                if attempt > RETRY_MAX_ATTEMPTS {
+                    return Err(anyhow::anyhow!(
+                        "remote proving failed after {RETRY_MAX_ATTEMPTS} retries: {err}"
+                    ));
+                }
+                let shift = attempt.min(RETRY_BACKOFF_SHIFT_CAP);
+                let backoff = (RETRY_BASE.saturating_mul(1 << shift)).min(RETRY_MAX_BACKOFF);
+                eprintln!(
+                    "remote prover returned retryable error (attempt {attempt}/{RETRY_MAX_ATTEMPTS}, backoff {backoff:?}): {err}"
+                );
+                tokio::time::sleep(backoff).await;
+            },
+        }
+    }
+}
+
+/// Walk the error source chain looking for a tonic status or transport error.
+/// We classify resource-exhausted, unavailable, deadline-exceeded, and any
+/// transport-level failure (e.g. broken pipe, connect refused) as retryable.
+fn is_retryable(err: &TransactionProverError) -> bool {
+    let mut src: Option<&(dyn std::error::Error + 'static)> = err.source();
+    while let Some(e) = src {
+        if let Some(status) = e.downcast_ref::<tonic::Status>() {
+            return matches!(
+                status.code(),
+                tonic::Code::ResourceExhausted
+                    | tonic::Code::Unavailable
+                    | tonic::Code::DeadlineExceeded
+            );
+        }
+        if e.downcast_ref::<tonic::transport::Error>().is_some() {
+            return true;
+        }
+        src = e.source();
+    }
+    false
+}
+
+// RAMPING RATE LIMITER
+// ================================================================================================
+
+/// A wall-clock-anchored rate limiter that ramps from [`START_RATE`] to
+/// [`MAX_RATE`] requests/sec, bumping by 1 rps every [`STEP_DURATION`].
+///
+/// [`freeze`](Self::freeze) caps the rate at its current value for the rest of
+/// the run; once frozen, the ramp never resumes.
+pub(crate) struct RampingRateLimiter {
+    start: Instant,
+    inner: Mutex<Inner>,
+    /// Last rate we logged a step transition for. Used purely for logging.
+    reported_rate: AtomicU32,
+}
+
+struct Inner {
+    /// Earliest instant at which the next `acquire()` may return.
+    next_release: Instant,
+    /// If `Some(rate)`, the rate is capped at `rate` for the rest of the run.
+    frozen_at: Option<u32>,
+}
+
+impl RampingRateLimiter {
+    fn new() -> Self {
+        let now = Instant::now();
+        Self {
+            start: now,
+            inner: Mutex::new(Inner { next_release: now, frozen_at: None }),
+            reported_rate: AtomicU32::new(0),
+        }
+    }
+
+    /// Block until this caller is allowed to dispatch one request under the
+    /// current rate schedule.
+    async fn acquire(&self) {
+        let sleep_until = {
+            let mut inner = self.inner.lock().await;
+            let rate = compute_rate(self.start, inner.frozen_at);
+            let now = Instant::now();
+            let earliest = inner.next_release.max(now);
+            let slot = earliest + slot_interval(rate);
+            inner.next_release = slot;
+
+            let prev = self.reported_rate.swap(rate, Ordering::Relaxed);
+            if prev != rate {
+                println!("  rate limiter: now dispatching at {rate} req/s");
+            }
+            earliest
+        };
+        tokio::time::sleep_until(sleep_until.into()).await;
+    }
+
+    /// Freeze the rate at the current value. Idempotent — first freeze wins.
+    fn freeze(&self) {
+        // Best-effort lock; if contended, the other caller will set it.
+        if let Ok(mut inner) = self.inner.try_lock() {
+            if inner.frozen_at.is_none() {
+                let rate = compute_rate(self.start, None);
+                inner.frozen_at = Some(rate);
+                println!(
+                    "  rate limiter: freezing ramp at {rate} req/s after retryable prover error"
+                );
+            }
+        }
+    }
+}
+
+fn compute_rate(start: Instant, frozen_at: Option<u32>) -> u32 {
+    let elapsed = start.elapsed();
+    let step = u32::try_from(elapsed.as_secs() / STEP_DURATION.as_secs()).unwrap_or(u32::MAX);
+    let target = START_RATE.saturating_add(step).min(MAX_RATE);
+    frozen_at.map_or(target, |cap| target.min(cap))
+}
+
+fn slot_interval(rate: u32) -> Duration {
+    Duration::from_micros(1_000_000 / u64::from(rate.max(1)))
+}
+
+// TESTS
+// ================================================================================================
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn rate_starts_at_start_rate() {
+        let now = Instant::now();
+        assert_eq!(compute_rate(now, None), START_RATE);
+    }
+
+    #[test]
+    fn rate_is_capped_by_freeze() {
+        let now = Instant::now();
+        // `frozen_at` is a cap, not a target — at t=0 the natural rate is
+        // `START_RATE`, which is already below caps of 3 or higher.
+        assert_eq!(compute_rate(now, Some(3)), START_RATE);
+        assert_eq!(compute_rate(now, Some(MAX_RATE)), START_RATE);
+        // A cap below the natural rate clamps the result down.
+        assert_eq!(compute_rate(now, Some(0)), 0);
+    }
+
+    #[test]
+    fn natural_rate_is_capped_at_max() {
+        // Simulate "elapsed > MAX_RATE * STEP_DURATION" by constructing a
+        // start instant far in the past.
+        let long_ago = Instant::now()
+            .checked_sub(STEP_DURATION * (MAX_RATE + 5))
+            .expect("test environment supports backdated Instants");
+        assert_eq!(compute_rate(long_ago, None), MAX_RATE);
+        assert_eq!(compute_rate(long_ago, Some(4)), 4);
+    }
+
+    #[test]
+    fn slot_interval_matches_rate() {
+        assert_eq!(slot_interval(1), Duration::from_secs(1));
+        assert_eq!(slot_interval(10), Duration::from_millis(100));
+    }
+}

From d75ee70cfe5344923821b4b210e9b2b011339b07 Mon Sep 17 00:00:00 2001
From: SantiagoPittella <pittellasantiago@gmail.com>
Date: Fri, 15 May 2026 16:27:29 -0300
Subject: [PATCH 9/9] update with latest changes in protocol

---
 bin/benchmark/README.md            |  2 ++
 bin/benchmark/src/create_proofs.rs | 38 +++++++++++++++++-------------
 bin/benchmark/src/rpc_state.rs     | 12 ++++++----
 3 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/bin/benchmark/README.md b/bin/benchmark/README.md
index 9f4fc07c32..95cba1680a 100644
--- a/bin/benchmark/README.md
+++ b/bin/benchmark/README.md
@@ -153,6 +153,8 @@ Start each component. The example below backgrounds them with `nohup` and captur
 ```sh
 mkdir -p logs
 
+DATA=./node-data
+
 nohup miden-validator start \
   --listen 127.0.0.1:50101 \
   --data-directory "$DATA/validator" \
diff --git a/bin/benchmark/src/create_proofs.rs b/bin/benchmark/src/create_proofs.rs
index 25e93363a2..7bfa9e8a2d 100644
--- a/bin/benchmark/src/create_proofs.rs
+++ b/bin/benchmark/src/create_proofs.rs
@@ -16,17 +16,25 @@ use miden_protocol::account::auth::{AuthScheme, AuthSecretKey};
 use miden_protocol::account::{
     Account,
     AccountBuilder,
+    AccountComponent,
     AccountId,
     AccountStorageMode,
     AccountType,
     PartialAccount,
     StorageMapKey,
 };
-use miden_protocol::asset::{Asset, AssetVaultKey, AssetWitness, FungibleAsset, TokenSymbol};
+use miden_protocol::asset::{
+    Asset,
+    AssetAmount,
+    AssetVaultKey,
+    AssetWitness,
+    FungibleAsset,
+    TokenSymbol,
+};
 use miden_protocol::block::{BlockHeader, BlockNumber};
 use miden_protocol::crypto::dsa::falcon512_poseidon2::SecretKey;
 use miden_protocol::crypto::rand::RandomCoin;
-use miden_protocol::note::{Note, NoteScript, NoteScriptRoot};
+use miden_protocol::note::{Note, NoteAttachments, NoteScript, NoteScriptRoot};
 use miden_protocol::transaction::{
     AccountInputs,
     InputNote,
@@ -38,9 +46,8 @@ use miden_protocol::transaction::{
 use miden_protocol::utils::serde::Serializable;
 use miden_protocol::{Felt, MastForest, Word};
 use miden_standards::account::auth::AuthSingleSig;
-use miden_standards::account::faucets::BasicFungibleFaucet;
+use miden_standards::account::faucets::{FungibleFaucet, TokenName};
 use miden_standards::account::interface::{AccountInterface, AccountInterfaceExt};
-use miden_standards::account::metadata::{FungibleTokenMetadata, TokenName};
 use miden_standards::account::policies::{
     BurnPolicyConfig,
     MintPolicyConfig,
@@ -193,7 +200,7 @@ pub(crate) async fn run(rpc_url: Url, num_transactions: u64, remote_prover_url:
                 wallet_id,
                 vec![asset],
                 miden_protocol::note::NoteType::Public,
-                miden_protocol::note::NoteAttachment::default(),
+                NoteAttachments::empty(),
                 &mut seed_rng,
             )
             .expect("note creation failed")
@@ -335,20 +342,19 @@ fn create_faucet() -> (Account, SecretKey) {
     let key_pair = SecretKey::with_rng(&mut rng);
     let init_seed = [0_u8; 32];
 
-    let token_symbol = TokenSymbol::new("TEST").unwrap();
-    let token_metadata = FungibleTokenMetadata::builder(
-        TokenName::new("TEST").unwrap(),
-        token_symbol,
-        2,
-        FungibleAsset::MAX_AMOUNT,
-    )
-    .build()
-    .unwrap();
+    let fungible_faucet: AccountComponent = FungibleFaucet::builder()
+        .name(TokenName::new("BENCHMARK").unwrap())
+        .symbol(TokenSymbol::new("BCM").unwrap())
+        .decimals(2)
+        .max_supply(AssetAmount::new(FungibleAsset::MAX_AMOUNT).unwrap())
+        .build()
+        .unwrap()
+        .into();
+
     let faucet = AccountBuilder::new(init_seed)
         .account_type(AccountType::FungibleFaucet)
         .storage_mode(AccountStorageMode::Private)
-        .with_component(token_metadata)
-        .with_component(BasicFungibleFaucet)
+        .with_component(fungible_faucet)
         .with_components(TokenPolicyManager::new(
             PolicyAuthority::AuthControlled,
             MintPolicyConfig::AllowAll,
diff --git a/bin/benchmark/src/rpc_state.rs b/bin/benchmark/src/rpc_state.rs
index f585c08238..4fe4e1cc97 100644
--- a/bin/benchmark/src/rpc_state.rs
+++ b/bin/benchmark/src/rpc_state.rs
@@ -8,8 +8,11 @@
 //! generation.
 
 use miden_node_proto::clients::RpcClient;
-use miden_node_proto::generated::rpc::sync_chain_mmr_request::UpperBound;
-use miden_node_proto::generated::rpc::{BlockHeaderByNumberRequest, SyncChainMmrRequest};
+use miden_node_proto::generated::rpc::{
+    BlockHeaderByNumberRequest,
+    FinalityLevel,
+    SyncChainMmrRequest,
+};
 use miden_protocol::block::BlockHeader;
 use miden_protocol::crypto::merkle::mmr::{MmrDelta, MmrPeaks, PartialMmr};
 use miden_protocol::transaction::PartialBlockchain;
@@ -68,9 +71,10 @@ pub(crate) async fn fetch_partial_blockchain(
 
     if tip_block_num >= 2 {
         let request = SyncChainMmrRequest {
-            block_from: 0,
-            upper_bound: Some(UpperBound::BlockNum(tip_block_num)),
+            current_client_block_height: 0,
+            finality_level: FinalityLevel::Committed.into(),
         };
+
         let response = client
             .sync_chain_mmr(request)
             .await