Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/benchmark_register.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ bool BenchmarkFamilies::FindBenchmarks(
// Special list of thread counts to use when none are specified
const std::vector<int> one_thread = {1};

// Optimization: Check if spec is a simple literal string (no regex metacharacters)
// If so, we can use faster string matching to skip families early
const std::string regex_meta = "^$.*+?[]{}()|\\";
bool is_literal = (spec.find_first_of(regex_meta) == std::string::npos);

int next_family_index = 0;

MutexLock l(mutex_);
Expand Down Expand Up @@ -166,6 +171,23 @@ bool BenchmarkFamilies::FindBenchmarks(
benchmarks->reserve(benchmarks->size() + family_size);
}

// Optimization: For literal string filters (no regex metacharacters),
// we can skip entire families if the family name doesn't contain the literal.
// This is safe because all instances will have names starting with the family name.
// For positive filters: skip if literal not found in family name
// For negative filters: skip if literal IS found (all instances will match the negative filter)
if (is_literal && !family->name_.empty()) {
bool family_contains_literal = family->name_.find(spec) != std::string::npos;
if (!is_negative_filter && !family_contains_literal) {
// Positive filter: family name doesn't contain literal, skip family
continue;
}
Comment on lines +181 to +184
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this errneously skip BM_Family/arg1/arg2 given filter BM_Family/arg ?

if (is_negative_filter && family_contains_literal) {
// Negative filter: family name contains literal, all instances will be excluded
continue;
}
}

for (auto const& args : family->args_) {
for (int num_threads : *thread_counts) {
BenchmarkInstance instance(family.get(), family_index,
Expand Down
3 changes: 3 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ add_filter_test(filter_regex_begin2_negative "-^N" 4)
add_filter_test(filter_regex_end ".*Ba$" 1)
add_filter_test(filter_regex_end_negative "-.*Ba$" 4)

compile_benchmark_test(filter_optimization_test)
benchmark_add_test(NAME filter_optimization_benefit COMMAND filter_optimization_test)

compile_benchmark_test(options_test)
benchmark_add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01s)

Expand Down
115 changes: 115 additions & 0 deletions test/filter_optimization_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#include <chrono>
#include <iostream>
#include <string>

#include "benchmark/benchmark.h"

namespace {

// Macro to create benchmark families with many arguments
#define CREATE_BENCHMARK_FAMILY(name) \
void name(benchmark::State& state) { \
for (auto _ : state) {} \
} \
BENCHMARK(name)->DenseRange(0, 999, 1);

// Create many benchmark families, each with 1000 instances (args 0-999)
CREATE_BENCHMARK_FAMILY(BM_Alpha)
CREATE_BENCHMARK_FAMILY(BM_Beta)
CREATE_BENCHMARK_FAMILY(BM_Gamma)
CREATE_BENCHMARK_FAMILY(BM_Delta)
CREATE_BENCHMARK_FAMILY(BM_Epsilon)
CREATE_BENCHMARK_FAMILY(BM_Zeta)
CREATE_BENCHMARK_FAMILY(BM_Eta)
CREATE_BENCHMARK_FAMILY(BM_Theta)
CREATE_BENCHMARK_FAMILY(BM_Iota)
CREATE_BENCHMARK_FAMILY(BM_Kappa)
CREATE_BENCHMARK_FAMILY(BM_Lambda)
CREATE_BENCHMARK_FAMILY(BM_Mu)
CREATE_BENCHMARK_FAMILY(BM_Nu)
CREATE_BENCHMARK_FAMILY(BM_Xi)
CREATE_BENCHMARK_FAMILY(BM_Omicron)

// The target benchmark we're looking for (also with 1000 instances)
CREATE_BENCHMARK_FAMILY(BM_TargetBenchmark)

class NullReporter : public benchmark::BenchmarkReporter {
public:
bool ReportContext(const Context&) override { return true; }
void ReportRuns(const std::vector<Run>&) override {}
void Finalize() override {}
};

} // namespace

int main(int argc, char** argv) {
benchmark::MaybeReenterWithoutASLR(argc, argv);

std::cout << "\n=== Filter Optimization Performance Test ===\n";
std::cout << "Total families: 16 (15 non-matching + 1 target)\n";
std::cout << "Total instances: 16000 (16 families × 1000 args each)\n\n";

// Measure time to filter with literal string (optimization applies)
NullReporter null_reporter;

std::cout << "Testing literal filter \"TargetBenchmark\"...\n";
int argc1 = 3;
const char* argv1[] = {"test", "--benchmark_filter=TargetBenchmark", "--benchmark_list_tests"};
benchmark::Initialize(&argc1, const_cast<char**>(argv1));
auto start_literal = std::chrono::high_resolution_clock::now();
size_t count_literal = benchmark::RunSpecifiedBenchmarks(&null_reporter);
auto end_literal = std::chrono::high_resolution_clock::now();
auto duration_literal = std::chrono::duration_cast<std::chrono::microseconds>(end_literal - start_literal);

std::cout << "Testing regex filter \".*TargetBenchmark.*\"...\n";
int argc2 = 3;
const char* argv2[] = {"test", "--benchmark_filter=.*TargetBenchmark.*", "--benchmark_list_tests"};
benchmark::Initialize(&argc2, const_cast<char**>(argv2));
auto start_regex = std::chrono::high_resolution_clock::now();
size_t count_regex = benchmark::RunSpecifiedBenchmarks(&null_reporter);
auto end_regex = std::chrono::high_resolution_clock::now();
auto duration_regex = std::chrono::duration_cast<std::chrono::microseconds>(end_regex - start_regex);

// Verify both found exactly 1000 benchmarks (all instances of BM_TargetBenchmark)
if (count_literal != 1000) {
std::cerr << "ERROR: Literal filter expected 1000 matches, got " << count_literal << "\n";
return -1;
}
if (count_regex != 1000) {
std::cerr << "ERROR: Regex filter expected 1000 matches, got " << count_regex << "\n";
return -1;
}

std::cout << "\n=== RESULTS ===\n";
std::cout << "Literal filter \"TargetBenchmark\": " << duration_literal.count() << " μs\n";
std::cout << "Regex filter \".*TargetBenchmark.*\": " << duration_regex.count() << " μs\n\n";

if (duration_literal.count() > 0) {
double speedup = static_cast<double>(duration_regex.count()) / duration_literal.count();
std::cout << "Speedup with optimization: " << speedup << "x faster\n\n";

// Verify optimization provides at least 5x speedup
if (speedup < 5.0) {
std::cerr << "ERROR: Expected at least 5x speedup, got " << speedup << "x\n";
std::cerr << "Optimization may not be working correctly!\n";
return -1;
}
} else {
std::cerr << "ERROR: Literal filter completed too fast to measure accurately\n";
return -1;
}
Comment on lines +91 to +100
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not a good idea to make tests depend on host machie performance.


std::cout << "=== WHY THE DIFFERENCE? ===\n";
std::cout << "Literal filter (\"TargetBenchmark\"):\n";
std::cout << " - Detects no regex metacharacters\n";
std::cout << " - Uses family->name_.find(\"TargetBenchmark\")\n";
std::cout << " - Skips 15 families immediately (15000 instances not generated)\n";
std::cout << " - Only processes BM_TargetBenchmark family (1000 instances generated)\n\n";

std::cout << "Regex filter (\".*TargetBenchmark.*\"):\n";
std::cout << " - Detects metacharacters (. and *)\n";
std::cout << " - Must process ALL 16 families\n";
std::cout << " - Generates all 16000 instances and regex-matches each name\n\n";

return 0;
}
Loading