From 097424069bc76e3cfc32c90bfffa919c68e8dbe3 Mon Sep 17 00:00:00 2001
From: Connor Tsui <connor.tsui20@gmail.com>
Date: Fri, 12 Dec 2025 12:10:45 -0500
Subject: [PATCH] check benchmark

Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
---
 bench-vortex/scripts/diff_vortex_operators.sh | 22 +++++
 bench-vortex/src/benchmark_driver.rs          | 95 +++++++++++++++++++
 bench-vortex/src/bin/query_bench.rs           | 11 +++
 3 files changed, 128 insertions(+)
 create mode 100755 bench-vortex/scripts/diff_vortex_operators.sh

diff --git a/bench-vortex/scripts/diff_vortex_operators.sh b/bench-vortex/scripts/diff_vortex_operators.sh
new file mode 100755
index 00000000000..078ad0f63d2
--- /dev/null
+++ b/bench-vortex/scripts/diff_vortex_operators.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+# Run query_bench --check with VORTEX_OPERATORS=true and =false, then diff the output.
+#
+# Usage:
+#   ./diff_vortex_operators.sh statpopgen --scale-factor 1 --targets duckdb:vortex -q 7 --check
+
+set -euo pipefail
+
+RUST_LOG="${RUST_LOG:-error}"
+
+echo "Running with VORTEX_OPERATORS=true..."
+output_true=$(VORTEX_OPERATORS=true RUST_LOG="$RUST_LOG" cargo run --release -p bench-vortex --bin query_bench -- "$@")
+
+echo "Running with VORTEX_OPERATORS=false..."
+output_false=$(VORTEX_OPERATORS=false RUST_LOG="$RUST_LOG" cargo run --release -p bench-vortex --bin query_bench -- "$@")
+
+echo ""
+echo "=== Diff (VORTEX_OPERATORS=true vs false) ==="
+diff <(echo "$output_true") <(echo "$output_false") || true
diff --git a/bench-vortex/src/benchmark_driver.rs b/bench-vortex/src/benchmark_driver.rs
index e3116b6a097..728d1a3d5c5 100644
--- a/bench-vortex/src/benchmark_driver.rs
+++ b/bench-vortex/src/benchmark_driver.rs
@@ -7,6 +7,7 @@ use std::fmt;
 use std::path::PathBuf;
 
 use anyhow::Result;
+use datafusion::arrow::util::pretty::pretty_format_batches;
 use indicatif::ProgressBar;
 use tracing::warn;
 use vortex::error::VortexExpect;
@@ -69,10 +70,16 @@ pub struct DriverConfig {
     pub skip_generate: bool,
     pub explain: bool,
     pub explain_analyze: bool,
+    pub check: bool,
 }
 
 /// Run a benchmark using the provided implementation and configuration
 pub fn run_benchmark<B: Benchmark>(benchmark: B, config: DriverConfig) -> Result<()> {
+    // If check mode is enabled, run a single query and print results.
+    if config.check {
+        return run_check_query(benchmark, config);
+    }
+
     // If explain-analyze mode is enabled, run explain analyze
     if config.explain_analyze {
         return run_explain_query(benchmark, config, ExplainMode::Analyze);
@@ -425,3 +432,91 @@ fn run_explain_query<B: Benchmark>(
 
     Ok(())
 }
+
+/// Run a single query and print the results for correctness checking.
+fn run_check_query<B: Benchmark>(benchmark: B, config: DriverConfig) -> Result<()> {
+    // Validate exactly one target.
+    anyhow::ensure!(
+        config.targets.len() == 1,
+        "--check requires exactly 1 target, but {} were provided",
+        config.targets.len()
+    );
+
+    // Validate exactly one query is selected.
+    let Some(ref queries) = config.queries else {
+        anyhow::bail!("--check requires exactly 1 query to be specified via -q");
+    };
+    anyhow::ensure!(
+        queries.len() == 1,
+        "--check requires exactly 1 query, but {} were specified",
+        queries.len()
+    );
+
+    let target = &config.targets[0];
+
+    // Generate data (idempotent).
+    if !config.skip_generate {
+        benchmark.generate_data(target)?;
+    }
+
+    let filtered_queries = filter_queries(
+        benchmark.queries()?,
+        config.queries.as_ref(),
+        config.exclude_queries.as_ref(),
+    );
+
+    let tokio_runtime = new_tokio_runtime(config.threads)?;
+
+    let engine_ctx = benchmark.setup_engine_context(
+        target,
+        config.disable_datafusion_cache,
+        config.emit_plan,
+        config.delete_duckdb_database,
+        config.threads,
+    )?;
+
+    tokio_runtime.block_on(benchmark.register_tables(&engine_ctx, target.format()))?;
+
+    for &(query_idx, ref query_string) in filtered_queries.iter() {
+        println!("Query {}", query_idx);
+        println!("SQL: {}\n", query_string);
+
+        match &engine_ctx {
+            EngineCtx::DataFusion(ctx) => {
+                match tokio_runtime.block_on(ctx.execute_query(query_string)) {
+                    Ok((batches, _plan)) => {
+                        let row_count: usize = batches.iter().map(|b| b.num_rows()).sum();
+                        match pretty_format_batches(&batches) {
+                            Ok(formatted) => println!("{}", formatted),
+                            Err(err) => eprintln!("Error formatting results: {}", err),
+                        }
+                        println!("\n({} rows)", row_count);
+                    }
+                    Err(err) => {
+                        eprintln!("Error running query {}: {}", query_idx, err);
+                    }
+                }
+            }
+            EngineCtx::DuckDB(ctx) => match ctx.connection.query(query_string) {
+                Ok(result) => {
+                    let mut row_count = 0u64;
+                    for chunk in result {
+                        row_count += chunk.len();
+                        match String::try_from(&chunk) {
+                            Ok(output) => println!("{}", output),
+                            Err(err) => {
+                                eprintln!("Error converting chunk to string: {}", err)
+                            }
+                        }
+                    }
+                    println!("\n({} rows)", row_count);
+                }
+                Err(err) => {
+                    eprintln!("Error running query {}: {}", query_idx, err);
+                }
+            },
+        }
+    }
+
+    Ok(())
+}
diff --git a/bench-vortex/src/bin/query_bench.rs b/bench-vortex/src/bin/query_bench.rs
index 09d9aa5076a..b1574c19165 100644
--- a/bench-vortex/src/bin/query_bench.rs
+++ b/bench-vortex/src/bin/query_bench.rs
@@ -113,6 +113,11 @@ struct CommonArgs {
 
     #[arg(long, default_value_t = false)]
     explain_analyze: bool,
+
+    /// Run a single query once and print the results for correctness checking.
+    /// Requires exactly 1 target and 1 query (via -q).
+    #[arg(long, default_value_t = false)]
+    check: bool,
 }
 
 #[derive(Parser, Debug)]
@@ -322,6 +327,7 @@ fn run_clickbench(args: ClickBenchArgs) -> anyhow::Result<()> {
         skip_generate: args.common.skip_generate,
         explain: args.common.explain,
         explain_analyze: args.common.explain_analyze,
+        check: args.common.check,
     };
 
     // Determine data URL
@@ -354,6 +360,7 @@ fn run_tpch(args: TpcHArgs) -> anyhow::Result<()> {
         skip_generate: args.common.skip_generate,
         explain: args.common.explain,
         explain_analyze: args.common.explain_analyze,
+        check: args.common.check,
     };
 
     // Run benchmark using the trait system
@@ -387,6 +394,7 @@ fn run_tpcds(args: TpcDSArgs) -> anyhow::Result<()> {
         skip_generate: args.common.skip_generate,
         explain: args.common.explain,
         explain_analyze: args.common.explain_analyze,
+        check: args.common.check,
     };
 
     // Run benchmark using the trait system
@@ -422,6 +430,7 @@ fn run_statpopgen(args: StatPopGenArgs) -> anyhow::Result<()> {
         skip_generate: args.common.skip_generate,
         explain: args.common.explain,
         explain_analyze: args.common.explain_analyze,
+        check: args.common.check,
     };
 
     // Run benchmark using the trait system
@@ -451,6 +460,7 @@ fn run_fineweb(args: FinewebArgs) -> anyhow::Result<()> {
         skip_generate: args.common.skip_generate,
         explain: args.common.explain,
         explain_analyze: args.common.explain_analyze,
+        check: args.common.check,
     };
 
     run_benchmark(benchmark, config)
@@ -479,6 +489,7 @@ fn run_gharchive(args: GhArchiveArgs) -> anyhow::Result<()> {
         skip_generate: args.common.skip_generate,
         explain: args.common.explain,
         explain_analyze: args.common.explain_analyze,
+        check: args.common.check,
     };
 
     run_benchmark(benchmark, config)