turtacn · google-labs-jules · Dec 7, 2025
diff --git a/README.md b/README.md
@@ -8,6 +8,7 @@ SQLTraceBench is a tool for benchmarking database performance using trace-based
 - **Multi-Database Support**: Architecture allows plugins for different databases (e.g., MySQL, ClickHouse, StarRocks).
 - **Statistical Modeling**: Uses statistical models to synthesize realistic parameter values.
 - **Extensible**: Easily add new database dialects and workload patterns.
+- **Visual Reports**: Generates comprehensive HTML validation reports with interactive charts.
 
 ## Getting Started
 
@@ -19,6 +20,16 @@ It covers:
 * Running conversion, generation, and benchmarking commands
 * Using the automated `examples/quickstart.sh` script
 
+## Validation Reports
+
+SQLTraceBench generates detailed validation reports to compare your benchmark results against a baseline.
+
+![Validation Report](docs/images/report_preview.png)
+
+*Example of an HTML validation report showing QPS deviation and latency distribution.*
+
+See [Report Interpretation Guide](docs/user_guide/report_interpretation.md) for details on how to read the reports.
+
 ## Development
 
 1.  **Build**: `make build`

diff --git a/chart.min.js b/chart.min.js
diff --git a/cmd/root.go b/cmd/root.go
@@ -1,7 +1,10 @@
 package cmd
 
 import (
+	"bufio"
+	"fmt"
 	"os"
+	"strings"
 
 	"github.com/spf13/cobra"
 	"github.com/turtacn/SQLTraceBench/internal/app/conversion"
@@ -10,6 +13,7 @@ import (
 	"github.com/turtacn/SQLTraceBench/internal/app/validation"
 	"github.com/turtacn/SQLTraceBench/internal/app/workflow"
 	"github.com/turtacn/SQLTraceBench/internal/infrastructure/parsers"
+	"github.com/turtacn/SQLTraceBench/internal/utils/terminal"
 	"github.com/turtacn/SQLTraceBench/pkg/config"
 	"github.com/turtacn/SQLTraceBench/pkg/types"
 	"github.com/turtacn/SQLTraceBench/pkg/utils"
@@ -21,12 +25,20 @@ var (
 	Version   = types.Version
 	cfgFile   string
 	pluginDir string
+	noColor   bool
+	verbose   bool
+	autoYes   bool // For workflow run
 	cfg       *types.Config
 	rootCmd   = &cobra.Command{
 		Use:     "sqltracebench",
 		Short:   "SQL trace-based workload benchmark CLI",
 		Version: Version,
 		PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
+			// Handle flags
+			if noColor {
+				terminal.ColorEnabled = false
+			}
+
 			// Load the configuration.
 			var err error
 			cfg, err = config.Load(cfgFile)
@@ -35,7 +47,12 @@ var (
 			}
 
 			// Initialize the logger.
-			logger := utils.NewLogger(cfg.Log.Level, cfg.Log.Format, nil)
+			// If verbose is on, maybe force Debug level?
+			logLevel := cfg.Log.Level
+			if verbose {
+				logLevel = "debug"
+			}
+			logger := utils.NewLogger(logLevel, cfg.Log.Format, nil)
 			utils.SetGlobalLogger(logger)
 
 			// Load plugins
@@ -71,6 +88,25 @@ var workflowRunCmd = &cobra.Command{
 			return err
 		}
 
+		// Confirmation Step
+		if !autoYes && terminal.IsTerminal() {
+			fmt.Println(terminal.Info("Workflow Plan:"))
+			fmt.Printf("  Target Plugin:    %s\n", pipelineCfg.TargetPlugin)
+			fmt.Printf("  Input Traces:     %s\n", pipelineCfg.InputTracePath)
+			fmt.Printf("  Generation Count: %d\n", pipelineCfg.Generation.Count)
+			fmt.Printf("  Concurrency:      %d\n", pipelineCfg.Execution.Concurrency)
+			fmt.Printf("  Output Dir:       %s\n", pipelineCfg.OutputDir)
+
+			fmt.Print("\nDo you want to proceed? [y/N]: ")
+			reader := bufio.NewReader(os.Stdin)
+			input, _ := reader.ReadString('\n')
+			input = strings.TrimSpace(strings.ToLower(input))
+			if input != "y" && input != "yes" {
+				fmt.Println(terminal.Warning("Workflow cancelled by user."))
+				return nil
+			}
+		}
+
 		// Initialize Services
 		parser := parsers.NewAntlrParser()
 		registry := plugin_registry.GlobalRegistry
@@ -91,8 +127,11 @@ var workflowRunCmd = &cobra.Command{
 func init() {
 	rootCmd.PersistentFlags().StringVar(&cfgFile, "config", types.DefaultConfigPath, "config file")
 	rootCmd.PersistentFlags().StringVar(&pluginDir, "plugin-dir", "./bin", "Directory where plugins are located")
+	rootCmd.PersistentFlags().BoolVar(&noColor, "no-color", false, "Disable color output")
+	rootCmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "Enable verbose output")
 
 	workflowRunCmd.Flags().StringP("config", "c", "", "Pipeline config YAML")
+	workflowRunCmd.Flags().BoolVarP(&autoYes, "yes", "y", false, "Skip confirmation prompt")
 	workflowRunCmd.MarkFlagRequired("config")
 	workflowCmd.AddCommand(workflowRunCmd)
 	rootCmd.AddCommand(workflowCmd)

diff --git a/configs/workflow_example.yaml b/configs/workflow_example.yaml
@@ -4,6 +4,9 @@ input_schema_path: "testdata/fixtures/mysql_schema.sql"
 output_dir: "output/pipeline_example"
 target_plugin: "clickhouse"
 
+# Report Style Configuration (Optional)
+report_style: "html" # Options: html, json
+
 generation:
   count: 1000
 

diff --git a/docs/images/report_preview.png b/docs/images/report_preview.png
diff --git a/docs/user_guide/report_interpretation.md b/docs/user_guide/report_interpretation.md
@@ -0,0 +1,51 @@
+# Validation Report Interpretation Guide
+
+This guide explains how to interpret the `validation_report.html` generated by SQLTraceBench after a benchmark run.
+
+## 1. Report Overview
+
+The report provides a comprehensive view of the benchmark results, comparing the performance of the candidate database (or configuration) against a baseline.
+
+### Key Sections:
+
+1.  **Status Card**: Immediate visual feedback on whether the validation passed or failed.
+2.  **Performance Metrics**: Charts visualizing QPS (Queries Per Second) and Latency distributions.
+3.  **Statistical Validation**: Detailed table of statistical tests performed (e.g., KS Test, Chi-Square) and their results.
+
+## 2. Key Metrics & Interpretation
+
+### 2.1 QPS Deviation
+**Definition:** `(Actual QPS - Baseline QPS) / Baseline QPS * 100%`
+
+**Interpretation:**
+*   **Green (|Deviation| < 5%)**: Excellent match. The candidate performs similarly to the baseline.
+*   **Yellow (5% ≤ |Deviation| < 15%)**: Acceptable variance. Minor tuning may be required.
+*   **Red (|Deviation| ≥ 15%)**: Significant deviation. Requires investigation.
+
+**Common Scenarios:**
+*   **Negative Deviation (e.g., -20%)**: Candidate is slower. Check resource utilization (CPU, IO), index usage, or locking issues.
+*   **Positive Deviation (e.g., +20%)**: Candidate is faster. While generally good, if the goal is to *replicate* behavior, this might indicate the candidate is skipping work or caching more aggressively.
+
+### 2.2 Statistical Tests
+
+#### KS Test (Kolmogorov-Smirnov)
+**Purpose:** Checks if the latency distribution of the candidate matches the baseline.
+
+**p-value:**
+*   **p > 0.05**: PASS. No significant difference in distributions.
+*   **p ≤ 0.05**: FAIL. Significant difference detected.
+
+#### Chi-Square Test
+**Purpose:** Often used to check uniformity or goodness-of-fit for categorical data or binned distributions.
+
+## 3. Troubleshooting
+
+**If Status is FAIL:**
+1.  Check **QPS Deviation**. Is the system under too much load?
+2.  Examine **Latency Charts**. Is there a long tail? Are P99 latencies spiking?
+3.  Review **Error Rates**. High error rates will invalidate performance metrics.
+4.  Check logs for specific query failures.
+
+**Tips:**
+*   Hover over charts to see exact values.
+*   Use the "Baseline" values as your ground truth.
diff --git a/go.mod b/go.mod
@@ -91,7 +91,8 @@ require (
 	golang.org/x/mod v0.28.0 // indirect
 	golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82 // indirect
 	golang.org/x/sync v0.17.0 // indirect
-	golang.org/x/sys v0.37.0 // indirect
+	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/term v0.37.0 // indirect
 	golang.org/x/text v0.30.0 // indirect
 	golang.org/x/tools v0.37.0 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8 // indirect

diff --git a/go.sum b/go.sum
@@ -262,7 +262,11 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
 golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
+golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=

diff --git a/internal/app/workflow/manager.go b/internal/app/workflow/manager.go
@@ -12,7 +12,10 @@ import (
 	"github.com/turtacn/SQLTraceBench/internal/app/generation"
 	"github.com/turtacn/SQLTraceBench/internal/app/validation"
 	"github.com/turtacn/SQLTraceBench/internal/domain/models"
-	"github.com/turtacn/SQLTraceBench/pkg/utils"
+	"github.com/turtacn/SQLTraceBench/internal/infrastructure/reporters"
+	"github.com/turtacn/SQLTraceBench/internal/utils"
+	"github.com/turtacn/SQLTraceBench/internal/utils/progress"
+	"github.com/turtacn/SQLTraceBench/internal/utils/terminal"
 )
 
 // Manager coordinates the workflow pipeline.
@@ -43,6 +46,7 @@ func NewManager(
 // Run executes the full 4-phase pipeline.
 func (m *Manager) Run(ctx context.Context, cfg WorkflowConfig) error {
 	m.logger.Info("Starting Workflow", utils.Field{Key: "config", Value: cfg})
+	fmt.Println(terminal.Info("Starting SQLTraceBench Workflow..."))
 
 	if err := os.MkdirAll(cfg.OutputDir, 0755); err != nil {
 		return fmt.Errorf("failed to create output dir: %w", err)
@@ -52,22 +56,28 @@ func (m *Manager) Run(ctx context.Context, cfg WorkflowConfig) error {
 	// Phase 1: Conversion
 	// ==========================================
 	m.logger.Info("Phase 1: Conversion starting...")
+	p1Bar := progress.NewProgressBar(100, "Phase 1: Conversion") // Estimation
 
 	// 1.1 Trace Conversion
 	traceReq := conversion.ConvertTraceRequest{
 		SourcePath:   cfg.InputTracePath,
 		TargetDBType: cfg.TargetPlugin,
 	}
+
+	// Simulation of progress for conversion (since streaming isn't fully exposed with progress callback yet)
+	p1Bar.Increment(10)
 	convRes, err := m.conversionSvc.ConvertFromFile(ctx, traceReq)
 	if err != nil {
 		return fmt.Errorf("conversion phase failed (traces): %w", err)
 	}
+	p1Bar.Increment(50)
 
-	// Save converted traces (optional but good for debugging/validation)
+	// Save converted traces
 	convertedTracePath := filepath.Join(cfg.OutputDir, "converted", "traces.jsonl")
 	if err := saveJSONL(convertedTracePath, convRes.Traces); err != nil {
 		return fmt.Errorf("failed to save converted traces: %w", err)
 	}
+	p1Bar.Increment(20)
 
 	// 1.2 Schema Conversion (if schema path provided)
 	if cfg.InputSchemaPath != "" {
@@ -84,75 +94,116 @@ func (m *Manager) Run(ctx context.Context, cfg WorkflowConfig) error {
 			return fmt.Errorf("conversion phase failed (schema): %w", err)
 		}
 	}
+	p1Bar.Finish()
+	fmt.Println(terminal.Success("Phase 1: Conversion complete"))
 	m.logger.Info("Phase 1: Conversion complete")
 
 	// ==========================================
 	// Phase 2: Generation
 	// ==========================================
 	m.logger.Info("Phase 2: Generation starting...")
+	p2Bar := progress.NewProgressBar(int64(cfg.Generation.Count), "Phase 2: Generation")
 
 	// Update Generation Request with converted traces
 	genReq := cfg.Generation
 	genReq.SourceTraces = convRes.Traces
 
+	// TODO: Add progress callback to generation service if possible, currently we wait
 	workload, err := m.generationSvc.GenerateWorkload(ctx, genReq)
 	if err != nil {
 		return fmt.Errorf("generation phase failed: %w", err)
 	}
+	// Complete the bar
+	p2Bar.Increment(int64(cfg.Generation.Count))
+	p2Bar.Finish()
 
 	workloadPath := filepath.Join(cfg.OutputDir, "workload", "benchmark.jsonl")
 	if err := saveJSONL(workloadPath, workload); err != nil {
 		return fmt.Errorf("failed to save workload: %w", err)
 	}
+	fmt.Println(terminal.Success("Phase 2: Generation complete"))
 	m.logger.Info("Phase 2: Generation complete")
 
 	// ==========================================
 	// Phase 3: Execution
 	// ==========================================
 	m.logger.Info("Phase 3: Execution starting...")
+	totalQueries := int64(len(workload.Queries))
+	p3Bar := progress.NewProgressBar(totalQueries, "Phase 3: Execution ")
 
 	execCfg := cfg.Execution
 	// Ensure TargetDB is set from top-level config if not in sub-config
 	if execCfg.TargetDB == "" {
 		execCfg.TargetDB = cfg.TargetPlugin
 	}
 
+	// We might need to wrap execution to update progress, but ExecutionService is black box here.
+	// For now, we just indicate start and end. Ideally we'd pass a progress channel.
+	p3Bar.Increment(1) // Started
+
 	result, err := m.executionSvc.RunBenchmark(ctx, workload, execCfg)
 	if err != nil {
 		return fmt.Errorf("execution phase failed: %w", err)
 	}
 
+	p3Bar.Increment(totalQueries) // Done
+	p3Bar.Finish()
+
 	resultPath := filepath.Join(cfg.OutputDir, "results", "metrics.json")
 	if err := saveJSON(resultPath, result); err != nil {
 		return fmt.Errorf("failed to save metrics: %w", err)
 	}
+	fmt.Println(terminal.Success("Phase 3: Execution complete"))
 	m.logger.Info("Phase 3: Execution complete")
 
 	// ==========================================
-	// Phase 4: Validation
+	// Phase 4: Validation & Reporting
 	// ==========================================
 	if cfg.BaselineMetricsPath != "" {
 		m.logger.Info("Phase 4: Validation starting...")
+		fmt.Println(terminal.Info("Phase 4: Validation starting..."))
 
 		// Load baseline
 		var baseline models.BenchmarkResult
 		if err := loadJSON(cfg.BaselineMetricsPath, &baseline); err != nil {
 			m.logger.Warn("Failed to load baseline metrics, skipping validation", utils.Field{Key: "error", Value: err})
+			fmt.Println(terminal.Warning("Skipping validation: could not load baseline metrics"))
 		} else {
 			report, err := m.validationSvc.ValidateBenchmarks(ctx, &baseline, result)
 			if err != nil {
 				return fmt.Errorf("validation phase failed: %w", err)
 			}
 
-			// Save report (HTML or JSON - for now let's save as JSON)
+			// Generate HTML Report
+			htmlReporter, err := reporters.NewHTMLReporter()
+			if err != nil {
+				m.logger.Error("Failed to initialize HTML reporter", utils.Field{Key: "error", Value: err})
+			} else {
+				htmlPath := filepath.Join(cfg.OutputDir, "validation_report.html")
+				if err := htmlReporter.GenerateReport(report, cfg.TargetPlugin, htmlPath); err != nil {
+					m.logger.Error("Failed to generate HTML report", utils.Field{Key: "error", Value: err})
+				} else {
+					fmt.Println(terminal.Success(fmt.Sprintf("HTML Report generated: %s", htmlPath)))
+				}
+			}
+
+			// Save JSON report
 			reportPath := filepath.Join(cfg.OutputDir, "report.json")
 			if err := saveJSON(reportPath, report); err != nil {
 				return fmt.Errorf("failed to save validation report: %w", err)
 			}
-			m.logger.Info("Phase 4: Validation complete", utils.Field{Key: "status", Value: report.Status})
+
+			statusMsg := fmt.Sprintf("Phase 4: Validation complete. Status: %s", utils.SafeString(report.Pass))
+			if report.Pass {
+				fmt.Println(terminal.Success(statusMsg))
+			} else {
+				fmt.Println(terminal.Error(statusMsg))
+			}
+			m.logger.Info("Phase 4: Validation complete", utils.Field{Key: "status", Value: report.Pass})
 		}
 	} else {
 		m.logger.Info("Phase 4: Validation skipped (no baseline provided)")
+		fmt.Println(terminal.Warning("Phase 4: Validation skipped (no baseline provided)"))
 	}
 
 	return nil
@@ -170,10 +221,6 @@ func saveJSONL(path string, data interface{}) error {
 
 	enc := json.NewEncoder(f)
 
-	// If it's a slice of things, encode each one.
-	// But `data` here can be `[]models.SQLTrace` or `*models.BenchmarkWorkload`.
-	// For `BenchmarkWorkload`, we probably want to save queries one per line.
-
 	switch v := data.(type) {
 	case []models.SQLTrace:
 		for _, t := range v {
@@ -188,7 +235,6 @@ func saveJSONL(path string, data interface{}) error {
 			}
 		}
 	default:
-		// Fallback: just dump as one JSON object (not JSONL actually)
 		return enc.Encode(data)
 	}
 	return nil

diff --git a/internal/app/workflow/pipeline.go b/internal/app/workflow/pipeline.go
@@ -14,6 +14,7 @@ type WorkflowConfig struct {
 
 	// Settings
 	TargetPlugin string `yaml:"target_plugin"`
+	ReportStyle  string `yaml:"report_style"` // html, json
 
 	// Phase Configs
 	Generation generation.GenerateRequest `yaml:"generation"`