From 097424069bc76e3cfc32c90bfffa919c68e8dbe3 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Fri, 12 Dec 2025 12:10:45 -0500 Subject: [PATCH] check benchmark Signed-off-by: Connor Tsui --- bench-vortex/scripts/diff_vortex_operators.sh | 22 +++++ bench-vortex/src/benchmark_driver.rs | 95 +++++++++++++++++++ bench-vortex/src/bin/query_bench.rs | 11 +++ 3 files changed, 128 insertions(+) create mode 100755 bench-vortex/scripts/diff_vortex_operators.sh diff --git a/bench-vortex/scripts/diff_vortex_operators.sh b/bench-vortex/scripts/diff_vortex_operators.sh new file mode 100755 index 00000000000..078ad0f63d2 --- /dev/null +++ b/bench-vortex/scripts/diff_vortex_operators.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors + +# Run query_bench --check with VORTEX_OPERATORS=true and =false, then diff the output. +# +# Usage: +# ./diff_vortex_operators.sh statpopgen --scale-factor 1 --targets duckdb:vortex -q 7 --check + +set -euo pipefail + +RUST_LOG="${RUST_LOG:-error}" + +echo "Running with VORTEX_OPERATORS=true..." +output_true=$(VORTEX_OPERATORS=true RUST_LOG="$RUST_LOG" cargo run --release -p bench-vortex --bin query_bench -- "$@") + +echo "Running with VORTEX_OPERATORS=false..." +output_false=$(VORTEX_OPERATORS=false RUST_LOG="$RUST_LOG" cargo run --release -p bench-vortex --bin query_bench -- "$@") + +echo "" +echo "=== Diff (VORTEX_OPERATORS=true vs false) ===" +diff <(echo "$output_true") <(echo "$output_false") || true diff --git a/bench-vortex/src/benchmark_driver.rs b/bench-vortex/src/benchmark_driver.rs index e3116b6a097..728d1a3d5c5 100644 --- a/bench-vortex/src/benchmark_driver.rs +++ b/bench-vortex/src/benchmark_driver.rs @@ -7,6 +7,7 @@ use std::fmt; use std::path::PathBuf; use anyhow::Result; +use datafusion::arrow::util::pretty::pretty_format_batches; use indicatif::ProgressBar; use tracing::warn; use vortex::error::VortexExpect; @@ -69,10 +70,16 @@ pub struct DriverConfig { pub skip_generate: bool, pub explain: bool, pub explain_analyze: bool, + pub check: bool, } /// Run a benchmark using the provided implementation and configuration pub fn run_benchmark(benchmark: B, config: DriverConfig) -> Result<()> { + // If check mode is enabled, run a single query and print results. + if config.check { + return run_check_query(benchmark, config); + } + // If explain-analyze mode is enabled, run explain analyze if config.explain_analyze { return run_explain_query(benchmark, config, ExplainMode::Analyze); @@ -425,3 +432,91 @@ fn run_explain_query( Ok(()) } + +/// Run a single query and print the results for correctness checking. +fn run_check_query(benchmark: B, config: DriverConfig) -> Result<()> { + // Validate exactly one target. + anyhow::ensure!( + config.targets.len() == 1, + "--check requires exactly 1 target, but {} were provided", + config.targets.len() + ); + + // Validate exactly one query is selected. + let Some(ref queries) = config.queries else { + anyhow::bail!("--check requires exactly 1 query to be specified via -q"); + }; + anyhow::ensure!( + queries.len() == 1, + "--check requires exactly 1 query, but {} were specified", + queries.len() + ); + + let target = &config.targets[0]; + + // Generate data (idempotent). + if !config.skip_generate { + benchmark.generate_data(target)?; + } + + let filtered_queries = filter_queries( + benchmark.queries()?, + config.queries.as_ref(), + config.exclude_queries.as_ref(), + ); + + let tokio_runtime = new_tokio_runtime(config.threads)?; + + let engine_ctx = benchmark.setup_engine_context( + target, + config.disable_datafusion_cache, + config.emit_plan, + config.delete_duckdb_database, + config.threads, + )?; + + tokio_runtime.block_on(benchmark.register_tables(&engine_ctx, target.format()))?; + + for &(query_idx, ref query_string) in filtered_queries.iter() { + println!("Query {}", query_idx); + println!("SQL: {}\n", query_string); + + match &engine_ctx { + EngineCtx::DataFusion(ctx) => { + match tokio_runtime.block_on(ctx.execute_query(query_string)) { + Ok((batches, _plan)) => { + let row_count: usize = batches.iter().map(|b| b.num_rows()).sum(); + match pretty_format_batches(&batches) { + Ok(formatted) => println!("{}", formatted), + Err(err) => eprintln!("Error formatting results: {}", err), + } + println!("\n({} rows)", row_count); + } + Err(err) => { + eprintln!("Error running query {}: {}", query_idx, err); + } + } + } + EngineCtx::DuckDB(ctx) => match ctx.connection.query(query_string) { + Ok(result) => { + let mut row_count = 0u64; + for chunk in result { + row_count += chunk.len(); + match String::try_from(&chunk) { + Ok(output) => println!("{}", output), + Err(err) => { + eprintln!("Error converting chunk to string: {}", err) + } + } + } + println!("\n({} rows)", row_count); + } + Err(err) => { + eprintln!("Error running query {}: {}", query_idx, err); + } + }, + } + } + + Ok(()) +} diff --git a/bench-vortex/src/bin/query_bench.rs b/bench-vortex/src/bin/query_bench.rs index 09d9aa5076a..b1574c19165 100644 --- a/bench-vortex/src/bin/query_bench.rs +++ b/bench-vortex/src/bin/query_bench.rs @@ -113,6 +113,11 @@ struct CommonArgs { #[arg(long, default_value_t = false)] explain_analyze: bool, + + /// Run a single query once and print the results for correctness checking. + /// Requires exactly 1 target and 1 query (via -q). + #[arg(long, default_value_t = false)] + check: bool, } #[derive(Parser, Debug)] @@ -322,6 +327,7 @@ fn run_clickbench(args: ClickBenchArgs) -> anyhow::Result<()> { skip_generate: args.common.skip_generate, explain: args.common.explain, explain_analyze: args.common.explain_analyze, + check: args.common.check, }; // Determine data URL @@ -354,6 +360,7 @@ fn run_tpch(args: TpcHArgs) -> anyhow::Result<()> { skip_generate: args.common.skip_generate, explain: args.common.explain, explain_analyze: args.common.explain_analyze, + check: args.common.check, }; // Run benchmark using the trait system @@ -387,6 +394,7 @@ fn run_tpcds(args: TpcDSArgs) -> anyhow::Result<()> { skip_generate: args.common.skip_generate, explain: args.common.explain, explain_analyze: args.common.explain_analyze, + check: args.common.check, }; // Run benchmark using the trait system @@ -422,6 +430,7 @@ fn run_statpopgen(args: StatPopGenArgs) -> anyhow::Result<()> { skip_generate: args.common.skip_generate, explain: args.common.explain, explain_analyze: args.common.explain_analyze, + check: args.common.check, }; // Run benchmark using the trait system @@ -451,6 +460,7 @@ fn run_fineweb(args: FinewebArgs) -> anyhow::Result<()> { skip_generate: args.common.skip_generate, explain: args.common.explain, explain_analyze: args.common.explain_analyze, + check: args.common.check, }; run_benchmark(benchmark, config) @@ -479,6 +489,7 @@ fn run_gharchive(args: GhArchiveArgs) -> anyhow::Result<()> { skip_generate: args.common.skip_generate, explain: args.common.explain, explain_analyze: args.common.explain_analyze, + check: args.common.check, }; run_benchmark(benchmark, config)