Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions bench-vortex/scripts/diff_vortex_operators.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright the Vortex contributors

# Run query_bench --check with VORTEX_OPERATORS=true and =false, then diff the output.
#
# Usage:
# ./diff_vortex_operators.sh statpopgen --scale-factor 1 --targets duckdb:vortex -q 7 --check

set -euo pipefail

RUST_LOG="${RUST_LOG:-error}"

echo "Running with VORTEX_OPERATORS=true..."
output_true=$(VORTEX_OPERATORS=true RUST_LOG="$RUST_LOG" cargo run --release -p bench-vortex --bin query_bench -- "$@")

echo "Running with VORTEX_OPERATORS=false..."
output_false=$(VORTEX_OPERATORS=false RUST_LOG="$RUST_LOG" cargo run --release -p bench-vortex --bin query_bench -- "$@")

echo ""
echo "=== Diff (VORTEX_OPERATORS=true vs false) ==="
diff <(echo "$output_true") <(echo "$output_false") || true
95 changes: 95 additions & 0 deletions bench-vortex/src/benchmark_driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::fmt;
use std::path::PathBuf;

use anyhow::Result;
use datafusion::arrow::util::pretty::pretty_format_batches;
use indicatif::ProgressBar;
use tracing::warn;
use vortex::error::VortexExpect;
Expand Down Expand Up @@ -69,10 +70,16 @@ pub struct DriverConfig {
pub skip_generate: bool,
pub explain: bool,
pub explain_analyze: bool,
pub check: bool,
}

/// Run a benchmark using the provided implementation and configuration
pub fn run_benchmark<B: Benchmark>(benchmark: B, config: DriverConfig) -> Result<()> {
// If check mode is enabled, run a single query and print results.
if config.check {
return run_check_query(benchmark, config);
}

// If explain-analyze mode is enabled, run explain analyze
if config.explain_analyze {
return run_explain_query(benchmark, config, ExplainMode::Analyze);
Expand Down Expand Up @@ -425,3 +432,91 @@ fn run_explain_query<B: Benchmark>(

Ok(())
}

/// Run a single query and print the results for correctness checking.
fn run_check_query<B: Benchmark>(benchmark: B, config: DriverConfig) -> Result<()> {
// Validate exactly one target.
anyhow::ensure!(
config.targets.len() == 1,
"--check requires exactly 1 target, but {} were provided",
config.targets.len()
);

// Validate exactly one query is selected.
let Some(ref queries) = config.queries else {
anyhow::bail!("--check requires exactly 1 query to be specified via -q");
};
anyhow::ensure!(
queries.len() == 1,
"--check requires exactly 1 query, but {} were specified",
queries.len()
);

let target = &config.targets[0];

// Generate data (idempotent).
if !config.skip_generate {
benchmark.generate_data(target)?;
}

let filtered_queries = filter_queries(
benchmark.queries()?,
config.queries.as_ref(),
config.exclude_queries.as_ref(),
);

let tokio_runtime = new_tokio_runtime(config.threads)?;

let engine_ctx = benchmark.setup_engine_context(
target,
config.disable_datafusion_cache,
config.emit_plan,
config.delete_duckdb_database,
config.threads,
)?;

tokio_runtime.block_on(benchmark.register_tables(&engine_ctx, target.format()))?;

for &(query_idx, ref query_string) in filtered_queries.iter() {
println!("Query {}", query_idx);
println!("SQL: {}\n", query_string);

match &engine_ctx {
EngineCtx::DataFusion(ctx) => {
match tokio_runtime.block_on(ctx.execute_query(query_string)) {
Ok((batches, _plan)) => {
let row_count: usize = batches.iter().map(|b| b.num_rows()).sum();
match pretty_format_batches(&batches) {
Ok(formatted) => println!("{}", formatted),
Err(err) => eprintln!("Error formatting results: {}", err),
}
println!("\n({} rows)", row_count);
}
Err(err) => {
eprintln!("Error running query {}: {}", query_idx, err);
}
}
}
EngineCtx::DuckDB(ctx) => match ctx.connection.query(query_string) {
Ok(result) => {
let mut row_count = 0u64;
for chunk in result {
row_count += chunk.len();
match String::try_from(&chunk) {
Ok(output) => println!("{}", output),
Err(err) => {
eprintln!("Error converting chunk to string: {}", err)
}
}
}
println!("\n({} rows)", row_count);
}
Err(err) => {
eprintln!("Error running query {}: {}", query_idx, err);
}
},
}
}

Ok(())
}
11 changes: 11 additions & 0 deletions bench-vortex/src/bin/query_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ struct CommonArgs {

#[arg(long, default_value_t = false)]
explain_analyze: bool,

/// Run a single query once and print the results for correctness checking.
/// Requires exactly 1 target and 1 query (via -q).
#[arg(long, default_value_t = false)]
check: bool,
}

#[derive(Parser, Debug)]
Expand Down Expand Up @@ -322,6 +327,7 @@ fn run_clickbench(args: ClickBenchArgs) -> anyhow::Result<()> {
skip_generate: args.common.skip_generate,
explain: args.common.explain,
explain_analyze: args.common.explain_analyze,
check: args.common.check,
};

// Determine data URL
Expand Down Expand Up @@ -354,6 +360,7 @@ fn run_tpch(args: TpcHArgs) -> anyhow::Result<()> {
skip_generate: args.common.skip_generate,
explain: args.common.explain,
explain_analyze: args.common.explain_analyze,
check: args.common.check,
};

// Run benchmark using the trait system
Expand Down Expand Up @@ -387,6 +394,7 @@ fn run_tpcds(args: TpcDSArgs) -> anyhow::Result<()> {
skip_generate: args.common.skip_generate,
explain: args.common.explain,
explain_analyze: args.common.explain_analyze,
check: args.common.check,
};

// Run benchmark using the trait system
Expand Down Expand Up @@ -422,6 +430,7 @@ fn run_statpopgen(args: StatPopGenArgs) -> anyhow::Result<()> {
skip_generate: args.common.skip_generate,
explain: args.common.explain,
explain_analyze: args.common.explain_analyze,
check: args.common.check,
};

// Run benchmark using the trait system
Expand Down Expand Up @@ -451,6 +460,7 @@ fn run_fineweb(args: FinewebArgs) -> anyhow::Result<()> {
skip_generate: args.common.skip_generate,
explain: args.common.explain,
explain_analyze: args.common.explain_analyze,
check: args.common.check,
};

run_benchmark(benchmark, config)
Expand Down Expand Up @@ -479,6 +489,7 @@ fn run_gharchive(args: GhArchiveArgs) -> anyhow::Result<()> {
skip_generate: args.common.skip_generate,
explain: args.common.explain,
explain_analyze: args.common.explain_analyze,
check: args.common.check,
};

run_benchmark(benchmark, config)
Expand Down
Loading