Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ SQLTraceBench is a tool for benchmarking database performance using trace-based
- **Multi-Database Support**: Architecture allows plugins for different databases (e.g., MySQL, ClickHouse, StarRocks).
- **Statistical Modeling**: Uses statistical models to synthesize realistic parameter values.
- **Extensible**: Easily add new database dialects and workload patterns.
- **Visual Reports**: Generates comprehensive HTML validation reports with interactive charts.

## Getting Started

Expand All @@ -19,6 +20,16 @@ It covers:
* Running conversion, generation, and benchmarking commands
* Using the automated `examples/quickstart.sh` script

## Validation Reports

SQLTraceBench generates detailed validation reports to compare your benchmark results against a baseline.

![Validation Report](docs/images/report_preview.png)

*Example of an HTML validation report showing QPS deviation and latency distribution.*

See [Report Interpretation Guide](docs/user_guide/report_interpretation.md) for details on how to read the reports.

## Development

1. **Build**: `make build`
Expand Down
13 changes: 13 additions & 0 deletions chart.min.js

Large diffs are not rendered by default.

41 changes: 40 additions & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package cmd

import (
"bufio"
"fmt"
"os"
"strings"

"github.com/spf13/cobra"
"github.com/turtacn/SQLTraceBench/internal/app/conversion"
Expand All @@ -10,6 +13,7 @@ import (
"github.com/turtacn/SQLTraceBench/internal/app/validation"
"github.com/turtacn/SQLTraceBench/internal/app/workflow"
"github.com/turtacn/SQLTraceBench/internal/infrastructure/parsers"
"github.com/turtacn/SQLTraceBench/internal/utils/terminal"
"github.com/turtacn/SQLTraceBench/pkg/config"
"github.com/turtacn/SQLTraceBench/pkg/types"
"github.com/turtacn/SQLTraceBench/pkg/utils"
Expand All @@ -21,12 +25,20 @@ var (
Version = types.Version
cfgFile string
pluginDir string
noColor bool
verbose bool
autoYes bool // For workflow run
cfg *types.Config
rootCmd = &cobra.Command{
Use: "sqltracebench",
Short: "SQL trace-based workload benchmark CLI",
Version: Version,
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
// Handle flags
if noColor {
terminal.ColorEnabled = false
}

// Load the configuration.
var err error
cfg, err = config.Load(cfgFile)
Expand All @@ -35,7 +47,12 @@ var (
}

// Initialize the logger.
logger := utils.NewLogger(cfg.Log.Level, cfg.Log.Format, nil)
// If verbose is on, maybe force Debug level?
logLevel := cfg.Log.Level
if verbose {
logLevel = "debug"
}
logger := utils.NewLogger(logLevel, cfg.Log.Format, nil)
utils.SetGlobalLogger(logger)

// Load plugins
Expand Down Expand Up @@ -71,6 +88,25 @@ var workflowRunCmd = &cobra.Command{
return err
}

// Confirmation Step
if !autoYes && terminal.IsTerminal() {
fmt.Println(terminal.Info("Workflow Plan:"))
fmt.Printf(" Target Plugin: %s\n", pipelineCfg.TargetPlugin)
fmt.Printf(" Input Traces: %s\n", pipelineCfg.InputTracePath)
fmt.Printf(" Generation Count: %d\n", pipelineCfg.Generation.Count)
fmt.Printf(" Concurrency: %d\n", pipelineCfg.Execution.Concurrency)
fmt.Printf(" Output Dir: %s\n", pipelineCfg.OutputDir)

fmt.Print("\nDo you want to proceed? [y/N]: ")
reader := bufio.NewReader(os.Stdin)
input, _ := reader.ReadString('\n')
input = strings.TrimSpace(strings.ToLower(input))
if input != "y" && input != "yes" {
fmt.Println(terminal.Warning("Workflow cancelled by user."))
return nil
}
}

// Initialize Services
parser := parsers.NewAntlrParser()
registry := plugin_registry.GlobalRegistry
Expand All @@ -91,8 +127,11 @@ var workflowRunCmd = &cobra.Command{
func init() {
rootCmd.PersistentFlags().StringVar(&cfgFile, "config", types.DefaultConfigPath, "config file")
rootCmd.PersistentFlags().StringVar(&pluginDir, "plugin-dir", "./bin", "Directory where plugins are located")
rootCmd.PersistentFlags().BoolVar(&noColor, "no-color", false, "Disable color output")
rootCmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "Enable verbose output")

workflowRunCmd.Flags().StringP("config", "c", "", "Pipeline config YAML")
workflowRunCmd.Flags().BoolVarP(&autoYes, "yes", "y", false, "Skip confirmation prompt")
workflowRunCmd.MarkFlagRequired("config")
workflowCmd.AddCommand(workflowRunCmd)
rootCmd.AddCommand(workflowCmd)
Expand Down
3 changes: 3 additions & 0 deletions configs/workflow_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ input_schema_path: "testdata/fixtures/mysql_schema.sql"
output_dir: "output/pipeline_example"
target_plugin: "clickhouse"

# Report Style Configuration (Optional)
report_style: "html" # Options: html, json

generation:
count: 1000

Expand Down
Binary file added docs/images/report_preview.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
51 changes: 51 additions & 0 deletions docs/user_guide/report_interpretation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Validation Report Interpretation Guide

This guide explains how to interpret the `validation_report.html` generated by SQLTraceBench after a benchmark run.

## 1. Report Overview

The report provides a comprehensive view of the benchmark results, comparing the performance of the candidate database (or configuration) against a baseline.

### Key Sections:

1. **Status Card**: Immediate visual feedback on whether the validation passed or failed.
2. **Performance Metrics**: Charts visualizing QPS (Queries Per Second) and Latency distributions.
3. **Statistical Validation**: Detailed table of statistical tests performed (e.g., KS Test, Chi-Square) and their results.

## 2. Key Metrics & Interpretation

### 2.1 QPS Deviation
**Definition:** `(Actual QPS - Baseline QPS) / Baseline QPS * 100%`

**Interpretation:**
* **Green (|Deviation| < 5%)**: Excellent match. The candidate performs similarly to the baseline.
* **Yellow (5% ≤ |Deviation| < 15%)**: Acceptable variance. Minor tuning may be required.
* **Red (|Deviation| ≥ 15%)**: Significant deviation. Requires investigation.

**Common Scenarios:**
* **Negative Deviation (e.g., -20%)**: Candidate is slower. Check resource utilization (CPU, IO), index usage, or locking issues.
* **Positive Deviation (e.g., +20%)**: Candidate is faster. While generally good, if the goal is to *replicate* behavior, this might indicate the candidate is skipping work or caching more aggressively.

### 2.2 Statistical Tests

#### KS Test (Kolmogorov-Smirnov)
**Purpose:** Checks if the latency distribution of the candidate matches the baseline.

**p-value:**
* **p > 0.05**: PASS. No significant difference in distributions.
* **p ≤ 0.05**: FAIL. Significant difference detected.

#### Chi-Square Test
**Purpose:** Often used to check uniformity or goodness-of-fit for categorical data or binned distributions.

## 3. Troubleshooting

**If Status is FAIL:**
1. Check **QPS Deviation**. Is the system under too much load?
2. Examine **Latency Charts**. Is there a long tail? Are P99 latencies spiking?
3. Review **Error Rates**. High error rates will invalidate performance metrics.
4. Check logs for specific query failures.

**Tips:**
* Hover over charts to see exact values.
* Use the "Baseline" values as your ground truth.
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ require (
golang.org/x/mod v0.28.0 // indirect
golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82 // indirect
golang.org/x/sync v0.17.0 // indirect
golang.org/x/sys v0.37.0 // indirect
golang.org/x/sys v0.38.0 // indirect
golang.org/x/term v0.37.0 // indirect
golang.org/x/text v0.30.0 // indirect
golang.org/x/tools v0.37.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,11 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
Expand Down
66 changes: 56 additions & 10 deletions internal/app/workflow/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ import (
"github.com/turtacn/SQLTraceBench/internal/app/generation"
"github.com/turtacn/SQLTraceBench/internal/app/validation"
"github.com/turtacn/SQLTraceBench/internal/domain/models"
"github.com/turtacn/SQLTraceBench/pkg/utils"
"github.com/turtacn/SQLTraceBench/internal/infrastructure/reporters"
"github.com/turtacn/SQLTraceBench/internal/utils"
"github.com/turtacn/SQLTraceBench/internal/utils/progress"
"github.com/turtacn/SQLTraceBench/internal/utils/terminal"
)

// Manager coordinates the workflow pipeline.
Expand Down Expand Up @@ -43,6 +46,7 @@ func NewManager(
// Run executes the full 4-phase pipeline.
func (m *Manager) Run(ctx context.Context, cfg WorkflowConfig) error {
m.logger.Info("Starting Workflow", utils.Field{Key: "config", Value: cfg})
fmt.Println(terminal.Info("Starting SQLTraceBench Workflow..."))

if err := os.MkdirAll(cfg.OutputDir, 0755); err != nil {
return fmt.Errorf("failed to create output dir: %w", err)
Expand All @@ -52,22 +56,28 @@ func (m *Manager) Run(ctx context.Context, cfg WorkflowConfig) error {
// Phase 1: Conversion
// ==========================================
m.logger.Info("Phase 1: Conversion starting...")
p1Bar := progress.NewProgressBar(100, "Phase 1: Conversion") // Estimation

// 1.1 Trace Conversion
traceReq := conversion.ConvertTraceRequest{
SourcePath: cfg.InputTracePath,
TargetDBType: cfg.TargetPlugin,
}

// Simulation of progress for conversion (since streaming isn't fully exposed with progress callback yet)
p1Bar.Increment(10)
convRes, err := m.conversionSvc.ConvertFromFile(ctx, traceReq)
if err != nil {
return fmt.Errorf("conversion phase failed (traces): %w", err)
}
p1Bar.Increment(50)

// Save converted traces (optional but good for debugging/validation)
// Save converted traces
convertedTracePath := filepath.Join(cfg.OutputDir, "converted", "traces.jsonl")
if err := saveJSONL(convertedTracePath, convRes.Traces); err != nil {
return fmt.Errorf("failed to save converted traces: %w", err)
}
p1Bar.Increment(20)

// 1.2 Schema Conversion (if schema path provided)
if cfg.InputSchemaPath != "" {
Expand All @@ -84,75 +94,116 @@ func (m *Manager) Run(ctx context.Context, cfg WorkflowConfig) error {
return fmt.Errorf("conversion phase failed (schema): %w", err)
}
}
p1Bar.Finish()
fmt.Println(terminal.Success("Phase 1: Conversion complete"))
m.logger.Info("Phase 1: Conversion complete")

// ==========================================
// Phase 2: Generation
// ==========================================
m.logger.Info("Phase 2: Generation starting...")
p2Bar := progress.NewProgressBar(int64(cfg.Generation.Count), "Phase 2: Generation")

// Update Generation Request with converted traces
genReq := cfg.Generation
genReq.SourceTraces = convRes.Traces

// TODO: Add progress callback to generation service if possible, currently we wait
workload, err := m.generationSvc.GenerateWorkload(ctx, genReq)
if err != nil {
return fmt.Errorf("generation phase failed: %w", err)
}
// Complete the bar
p2Bar.Increment(int64(cfg.Generation.Count))
p2Bar.Finish()

workloadPath := filepath.Join(cfg.OutputDir, "workload", "benchmark.jsonl")
if err := saveJSONL(workloadPath, workload); err != nil {
return fmt.Errorf("failed to save workload: %w", err)
}
fmt.Println(terminal.Success("Phase 2: Generation complete"))
m.logger.Info("Phase 2: Generation complete")

// ==========================================
// Phase 3: Execution
// ==========================================
m.logger.Info("Phase 3: Execution starting...")
totalQueries := int64(len(workload.Queries))
p3Bar := progress.NewProgressBar(totalQueries, "Phase 3: Execution ")

execCfg := cfg.Execution
// Ensure TargetDB is set from top-level config if not in sub-config
if execCfg.TargetDB == "" {
execCfg.TargetDB = cfg.TargetPlugin
}

// We might need to wrap execution to update progress, but ExecutionService is black box here.
// For now, we just indicate start and end. Ideally we'd pass a progress channel.
p3Bar.Increment(1) // Started

result, err := m.executionSvc.RunBenchmark(ctx, workload, execCfg)
if err != nil {
return fmt.Errorf("execution phase failed: %w", err)
}

p3Bar.Increment(totalQueries) // Done
p3Bar.Finish()

resultPath := filepath.Join(cfg.OutputDir, "results", "metrics.json")
if err := saveJSON(resultPath, result); err != nil {
return fmt.Errorf("failed to save metrics: %w", err)
}
fmt.Println(terminal.Success("Phase 3: Execution complete"))
m.logger.Info("Phase 3: Execution complete")

// ==========================================
// Phase 4: Validation
// Phase 4: Validation & Reporting
// ==========================================
if cfg.BaselineMetricsPath != "" {
m.logger.Info("Phase 4: Validation starting...")
fmt.Println(terminal.Info("Phase 4: Validation starting..."))

// Load baseline
var baseline models.BenchmarkResult
if err := loadJSON(cfg.BaselineMetricsPath, &baseline); err != nil {
m.logger.Warn("Failed to load baseline metrics, skipping validation", utils.Field{Key: "error", Value: err})
fmt.Println(terminal.Warning("Skipping validation: could not load baseline metrics"))
} else {
report, err := m.validationSvc.ValidateBenchmarks(ctx, &baseline, result)
if err != nil {
return fmt.Errorf("validation phase failed: %w", err)
}

// Save report (HTML or JSON - for now let's save as JSON)
// Generate HTML Report
htmlReporter, err := reporters.NewHTMLReporter()
if err != nil {
m.logger.Error("Failed to initialize HTML reporter", utils.Field{Key: "error", Value: err})
} else {
htmlPath := filepath.Join(cfg.OutputDir, "validation_report.html")
if err := htmlReporter.GenerateReport(report, cfg.TargetPlugin, htmlPath); err != nil {
m.logger.Error("Failed to generate HTML report", utils.Field{Key: "error", Value: err})
} else {
fmt.Println(terminal.Success(fmt.Sprintf("HTML Report generated: %s", htmlPath)))
}
}

// Save JSON report
reportPath := filepath.Join(cfg.OutputDir, "report.json")
if err := saveJSON(reportPath, report); err != nil {
return fmt.Errorf("failed to save validation report: %w", err)
}
m.logger.Info("Phase 4: Validation complete", utils.Field{Key: "status", Value: report.Status})

statusMsg := fmt.Sprintf("Phase 4: Validation complete. Status: %s", utils.SafeString(report.Pass))
if report.Pass {
fmt.Println(terminal.Success(statusMsg))
} else {
fmt.Println(terminal.Error(statusMsg))
}
m.logger.Info("Phase 4: Validation complete", utils.Field{Key: "status", Value: report.Pass})
}
} else {
m.logger.Info("Phase 4: Validation skipped (no baseline provided)")
fmt.Println(terminal.Warning("Phase 4: Validation skipped (no baseline provided)"))
}

return nil
Expand All @@ -170,10 +221,6 @@ func saveJSONL(path string, data interface{}) error {

enc := json.NewEncoder(f)

// If it's a slice of things, encode each one.
// But `data` here can be `[]models.SQLTrace` or `*models.BenchmarkWorkload`.
// For `BenchmarkWorkload`, we probably want to save queries one per line.

switch v := data.(type) {
case []models.SQLTrace:
for _, t := range v {
Expand All @@ -188,7 +235,6 @@ func saveJSONL(path string, data interface{}) error {
}
}
default:
// Fallback: just dump as one JSON object (not JSONL actually)
return enc.Encode(data)
}
return nil
Expand Down
1 change: 1 addition & 0 deletions internal/app/workflow/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ type WorkflowConfig struct {

// Settings
TargetPlugin string `yaml:"target_plugin"`
ReportStyle string `yaml:"report_style"` // html, json

// Phase Configs
Generation generation.GenerateRequest `yaml:"generation"`
Expand Down
Loading