diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index 65e1afced..1a63ea173 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -133,6 +133,11 @@ bool BenchmarkFamilies::FindBenchmarks( // Special list of thread counts to use when none are specified const std::vector one_thread = {1}; + // Optimization: Check if spec is a simple literal string (no regex metacharacters) + // If so, we can use faster string matching to skip families early + const std::string regex_meta = "^$.*+?[]{}()|\\"; + bool is_literal = (spec.find_first_of(regex_meta) == std::string::npos); + int next_family_index = 0; MutexLock l(mutex_); @@ -166,6 +171,23 @@ bool BenchmarkFamilies::FindBenchmarks( benchmarks->reserve(benchmarks->size() + family_size); } + // Optimization: For literal string filters (no regex metacharacters), + // we can skip entire families if the family name doesn't contain the literal. + // This is safe because all instances will have names starting with the family name. + // For positive filters: skip if literal not found in family name + // For negative filters: skip if literal IS found (all instances will match the negative filter) + if (is_literal && !family->name_.empty()) { + bool family_contains_literal = family->name_.find(spec) != std::string::npos; + if (!is_negative_filter && !family_contains_literal) { + // Positive filter: family name doesn't contain literal, skip family + continue; + } + if (is_negative_filter && family_contains_literal) { + // Negative filter: family name contains literal, all instances will be excluded + continue; + } + } + for (auto const& args : family->args_) { for (int num_threads : *thread_counts) { BenchmarkInstance instance(family.get(), family_index, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8a1a1a968..bcee2ffbf 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -128,6 +128,9 @@ add_filter_test(filter_regex_begin2_negative "-^N" 4) add_filter_test(filter_regex_end ".*Ba$" 1) add_filter_test(filter_regex_end_negative "-.*Ba$" 4) +compile_benchmark_test(filter_optimization_test) +benchmark_add_test(NAME filter_optimization_benefit COMMAND filter_optimization_test) + compile_benchmark_test(options_test) benchmark_add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01s) diff --git a/test/filter_optimization_test.cc b/test/filter_optimization_test.cc new file mode 100644 index 000000000..c2166db29 --- /dev/null +++ b/test/filter_optimization_test.cc @@ -0,0 +1,115 @@ +#include +#include +#include + +#include "benchmark/benchmark.h" + +namespace { + +// Macro to create benchmark families with many arguments +#define CREATE_BENCHMARK_FAMILY(name) \ + void name(benchmark::State& state) { \ + for (auto _ : state) {} \ + } \ + BENCHMARK(name)->DenseRange(0, 999, 1); + +// Create many benchmark families, each with 1000 instances (args 0-999) +CREATE_BENCHMARK_FAMILY(BM_Alpha) +CREATE_BENCHMARK_FAMILY(BM_Beta) +CREATE_BENCHMARK_FAMILY(BM_Gamma) +CREATE_BENCHMARK_FAMILY(BM_Delta) +CREATE_BENCHMARK_FAMILY(BM_Epsilon) +CREATE_BENCHMARK_FAMILY(BM_Zeta) +CREATE_BENCHMARK_FAMILY(BM_Eta) +CREATE_BENCHMARK_FAMILY(BM_Theta) +CREATE_BENCHMARK_FAMILY(BM_Iota) +CREATE_BENCHMARK_FAMILY(BM_Kappa) +CREATE_BENCHMARK_FAMILY(BM_Lambda) +CREATE_BENCHMARK_FAMILY(BM_Mu) +CREATE_BENCHMARK_FAMILY(BM_Nu) +CREATE_BENCHMARK_FAMILY(BM_Xi) +CREATE_BENCHMARK_FAMILY(BM_Omicron) + +// The target benchmark we're looking for (also with 1000 instances) +CREATE_BENCHMARK_FAMILY(BM_TargetBenchmark) + +class NullReporter : public benchmark::BenchmarkReporter { + public: + bool ReportContext(const Context&) override { return true; } + void ReportRuns(const std::vector&) override {} + void Finalize() override {} +}; + +} // namespace + +int main(int argc, char** argv) { + benchmark::MaybeReenterWithoutASLR(argc, argv); + + std::cout << "\n=== Filter Optimization Performance Test ===\n"; + std::cout << "Total families: 16 (15 non-matching + 1 target)\n"; + std::cout << "Total instances: 16000 (16 families × 1000 args each)\n\n"; + + // Measure time to filter with literal string (optimization applies) + NullReporter null_reporter; + + std::cout << "Testing literal filter \"TargetBenchmark\"...\n"; + int argc1 = 3; + const char* argv1[] = {"test", "--benchmark_filter=TargetBenchmark", "--benchmark_list_tests"}; + benchmark::Initialize(&argc1, const_cast(argv1)); + auto start_literal = std::chrono::high_resolution_clock::now(); + size_t count_literal = benchmark::RunSpecifiedBenchmarks(&null_reporter); + auto end_literal = std::chrono::high_resolution_clock::now(); + auto duration_literal = std::chrono::duration_cast(end_literal - start_literal); + + std::cout << "Testing regex filter \".*TargetBenchmark.*\"...\n"; + int argc2 = 3; + const char* argv2[] = {"test", "--benchmark_filter=.*TargetBenchmark.*", "--benchmark_list_tests"}; + benchmark::Initialize(&argc2, const_cast(argv2)); + auto start_regex = std::chrono::high_resolution_clock::now(); + size_t count_regex = benchmark::RunSpecifiedBenchmarks(&null_reporter); + auto end_regex = std::chrono::high_resolution_clock::now(); + auto duration_regex = std::chrono::duration_cast(end_regex - start_regex); + + // Verify both found exactly 1000 benchmarks (all instances of BM_TargetBenchmark) + if (count_literal != 1000) { + std::cerr << "ERROR: Literal filter expected 1000 matches, got " << count_literal << "\n"; + return -1; + } + if (count_regex != 1000) { + std::cerr << "ERROR: Regex filter expected 1000 matches, got " << count_regex << "\n"; + return -1; + } + + std::cout << "\n=== RESULTS ===\n"; + std::cout << "Literal filter \"TargetBenchmark\": " << duration_literal.count() << " μs\n"; + std::cout << "Regex filter \".*TargetBenchmark.*\": " << duration_regex.count() << " μs\n\n"; + + if (duration_literal.count() > 0) { + double speedup = static_cast(duration_regex.count()) / duration_literal.count(); + std::cout << "Speedup with optimization: " << speedup << "x faster\n\n"; + + // Verify optimization provides at least 5x speedup + if (speedup < 5.0) { + std::cerr << "ERROR: Expected at least 5x speedup, got " << speedup << "x\n"; + std::cerr << "Optimization may not be working correctly!\n"; + return -1; + } + } else { + std::cerr << "ERROR: Literal filter completed too fast to measure accurately\n"; + return -1; + } + + std::cout << "=== WHY THE DIFFERENCE? ===\n"; + std::cout << "Literal filter (\"TargetBenchmark\"):\n"; + std::cout << " - Detects no regex metacharacters\n"; + std::cout << " - Uses family->name_.find(\"TargetBenchmark\")\n"; + std::cout << " - Skips 15 families immediately (15000 instances not generated)\n"; + std::cout << " - Only processes BM_TargetBenchmark family (1000 instances generated)\n\n"; + + std::cout << "Regex filter (\".*TargetBenchmark.*\"):\n"; + std::cout << " - Detects metacharacters (. and *)\n"; + std::cout << " - Must process ALL 16 families\n"; + std::cout << " - Generates all 16000 instances and regex-matches each name\n\n"; + + return 0; +}