posit-dev · rich-iannone · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026
diff --git a/docs/user-guide/yaml-reference.qmd b/docs/user-guide/yaml-reference.qmd
@@ -22,6 +22,12 @@ label: "Validation Description"        # OPTIONAL: Description for the validatio
 lang: "en"                             # OPTIONAL: Language code (default: "en")
 locale: "en"                           # OPTIONAL: Locale setting (default: "en")
 brief: "Global brief: {auto}"          # OPTIONAL: Global brief template
+owner: "Data Engineering"              # OPTIONAL: Data owner (governance metadata)
+consumers: [Analytics, Finance]        # OPTIONAL: Data consumers (governance metadata)
+version: "1.0.0"                       # OPTIONAL: Validation version (governance metadata)
+reference:                             # OPTIONAL: Reference table for comparison validations
+  python: |
+    pb.load_dataset("ref_table")
 thresholds:                            # OPTIONAL: Global failure thresholds
   warning: 0.1
   error: 0.2
@@ -31,6 +37,9 @@ actions:                               # OPTIONAL: Global failure actions
   error: "Error message template"
   critical: "Critical message template"
   highest_only: false
+final_actions:                         # OPTIONAL: Actions triggered after all steps complete
+  warning: "Post-validation warning"
+  error: "Post-validation error"
 steps:                                 # REQUIRED: List of validation steps
   - validation_method_name
   - validation_method_name:
@@ -838,6 +847,68 @@ Examples:
 - Performance-critical validations with large datasets
 - When deterministic results are required
 
+### Data Quality Methods
+
+`col_pct_null`: is the percentage of null values in a column within bounds?
+
+```yaml
+- col_pct_null:
+    columns: [column_name]             # REQUIRED: Column(s) to validate
+    value: 0.05                        # REQUIRED: Maximum allowed null fraction
+    thresholds:                        # OPTIONAL: Step-level thresholds
+      warning: 0.1
+    actions:                           # OPTIONAL: Step-level actions
+      warning: "Custom message"
+    brief: "Null rate check"           # OPTIONAL: Step description
+```
+
+`data_freshness`: is the data in a date/datetime column recent?
+
+```yaml
+- data_freshness:
+    columns: [date_column]             # REQUIRED: Date/datetime column
+    freshness: "24h"                   # REQUIRED: Maximum age of data
+    thresholds:                        # OPTIONAL: Step-level thresholds
+      warning: 0.1
+    actions:                           # OPTIONAL: Step-level actions
+      warning: "Custom message"
+    brief: "Data is recent"            # OPTIONAL: Step description
+```
+
+### Aggregate Validations
+
+Aggregate methods validate column-level statistics (sum, average, standard deviation) against a
+threshold. They follow the pattern `col_{stat}_{comparator}`:
+
+```yaml
+# Sum validations
+- col_sum_gt:
+    columns: [revenue]
+    value: 0
+    brief: "Total revenue is positive"
+
+# Average validations
+- col_avg_le:
+    columns: [rating]
+    value: 5
+    brief: "Average rating at most 5"
+
+# Standard deviation validations
+- col_sd_lt:
+    columns: [temperature]
+    value: 10
+    brief: "Temperature variation is bounded"
+```
+
+Available aggregate methods:
+
+- **Sum**: `col_sum_gt`, `col_sum_lt`, `col_sum_ge`, `col_sum_le`, `col_sum_eq`
+- **Average**: `col_avg_gt`, `col_avg_lt`, `col_avg_ge`, `col_avg_le`, `col_avg_eq`
+- **Standard deviation**: `col_sd_gt`, `col_sd_lt`, `col_sd_ge`, `col_sd_le`, `col_sd_eq`
+
+All aggregate methods accept these common parameters: `columns`, `value`, `thresholds`, `actions`,
+`brief`, `active`, and `pre`.
+
 ## Column Selection Patterns
 
 All validation methods that accept a `columns` parameter support these selection patterns:
@@ -871,6 +942,25 @@ These parameters are available for most validation methods:
 - `thresholds`: step-level failure thresholds (dict)
 - `actions`: step-level failure actions (dict)
 - `brief`: step description (string, boolean, or template)
+- `active`: whether the step is active (boolean, default: true)
+
+### Active Parameter
+
+The `active` parameter controls whether a validation step runs. It defaults to `true`; set it to
+`false` to skip a step without removing it from the configuration:
+
+```yaml
+steps:
+  # This step will be skipped
+  - col_vals_gt:
+      columns: [amount]
+      value: 0
+      active: false
+
+  # This step runs normally (default active: true)
+  - col_vals_not_null:
+      columns: [customer_id]
+```
 
 ### Brief Parameter Options
 

diff --git a/docs/user-guide/yaml-validation-workflows.qmd b/docs/user-guide/yaml-validation-workflows.qmd
@@ -722,6 +722,111 @@ Brief Templating Options:
   - `{value}`: the comparison value used in the validation (for single-value comparisons)
   - `{pattern}`: for regex validations, the pattern being matched
 
+### Governance Metadata
+
+YAML workflows support governance metadata that identifies ownership and usage of validation
+workflows. These fields are embedded in the validation report:
+
+```yaml
+tbl: sales_data.csv
+tbl_name: "Sales Pipeline"
+owner: "Data Engineering"
+consumers: [Analytics Team, Finance, Compliance]
+version: "2.1.0"
+steps:
+  - col_vals_not_null:
+      columns: [customer_id, revenue]
+  - col_vals_gt:
+      columns: [revenue]
+      value: 0
+```
+
+The `owner`, `consumers`, and `version` fields are forwarded to the `Validate` constructor and
+appear in the validation report header. These fields are optional and do not affect validation
+behavior.
+
+### Data Freshness and Null Percentage
+
+Two additional validation methods support common data quality checks:
+
+**`data_freshness`**: Validate that a date/datetime column has recent data:
+
+```yaml
+steps:
+  - data_freshness:
+      columns: event_date
+      freshness: "24h"
+```
+
+**`col_pct_null`**: Validate that the percentage of null values is within bounds:
+
+```yaml
+steps:
+  - col_pct_null:
+      columns: [email, phone]
+      value: 0.05
+```
+
+### Aggregate Validations
+
+Aggregate methods validate column-level statistics like sum, average, and standard deviation:
+
+```yaml
+steps:
+  # Check that total revenue is positive
+  - col_sum_gt:
+      columns: [revenue]
+      value: 0
+
+  # Validate average rating is at most 5
+  - col_avg_le:
+      columns: [rating]
+      value: 5
+
+  # Ensure temperature variation is bounded
+  - col_sd_lt:
+      columns: [temperature]
+      value: 10
+```
+
+Available methods follow the `col_{stat}_{comparator}` pattern where `{stat}` is `sum`, `avg`, or
+`sd`, and `{comparator}` is `gt`, `lt`, `ge`, `le`, `eq`, `between`, or `outside`.
+
+### Step Activation Control
+
+The `active` parameter allows you to temporarily disable validation steps without removing them
+from the configuration:
+
+```yaml
+steps:
+  # This step is disabled
+  - col_vals_gt:
+      columns: [amount]
+      value: 0
+      active: false
+
+  # This step runs normally (active: true is the default)
+  - col_vals_not_null:
+      columns: [customer_id]
+```
+
+This is useful for debugging, phased rollouts, or temporarily skipping steps that are known to fail.
+
+### Reference Tables
+
+The `reference` top-level key specifies a reference table for comparison-based validations:
+
+```yaml
+tbl: current_data.csv
+reference:
+  python: |
+    pb.load_dataset("baseline_data", tbl_type="polars")
+steps:
+  - tbl_match:
+      tbl_compare:
+        python: |
+          pb.load_dataset("baseline_data", tbl_type="polars")
+```
 
 ## Working with YAML Files
 

diff --git a/pointblank/cli.py b/pointblank/cli.py
@@ -3944,12 +3944,20 @@ def make_template(output_file: str | None):
 tbl: small_table  # Replace with your data source
                   # Can be: dataset name, CSV file, Parquet file, database connection, etc.
 
+# Optional: DataFrame library ("polars", "pandas", "duckdb")
+# df_library: polars
+
 # Optional: Table name for reporting (defaults to filename if not specified)
 tbl_name: "Example Validation"
 
 # Optional: Label for this validation run
 label: "Validation Template"
 
+# Optional: Governance metadata
+# owner: "Data Engineering"
+# consumers: [Analytics, Finance]
+# version: "1.0.0"
+
 # Optional: Validation thresholds (defaults shown below)
 # thresholds:
 #   warning: 0.05   # 5% failure rate triggers warning
@@ -3993,6 +4001,27 @@ def make_template(output_file: str | None):
   #     columns: status
   #     set: [active, inactive, pending]
 
+  # Aggregate validations (uncomment and modify as needed)
+  # - col_sum_gt:
+  #     columns: revenue
+  #     value: 0
+  #     brief: "Total revenue is positive"
+
+  # - col_avg_between:
+  #     columns: rating
+  #     left: 1
+  #     right: 5
+
+  # Check null percentage (uncomment and modify as needed)
+  # - col_pct_null:
+  #     columns: [email, phone]
+  #     value: 0.05
+
+  # Data freshness check (uncomment and modify as needed)
+  # - data_freshness:
+  #     columns: event_date
+  #     freshness: "24h"
+
 # Add more validation steps as needed
 # See the Pointblank documentation for the full list of available validation functions
 """