DataRecce · even-wei · Feb 25, 2026 · Feb 25, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/models/metrics/metrics_daily_orders.sql b/models/metrics/metrics_daily_orders.sql
@@ -0,0 +1,42 @@
+{{
+    config(
+        materialized = 'incremental',
+        unique_key = 'order_date',
+        incremental_strategy = 'delete+insert'
+    )
+}}
+
+-- SAFE INCREMENTAL: deterministic else branch — NO false alarm.
+--
+-- This model IS incremental, but its else branch (used on fresh CI builds)
+-- uses a FIXED date range with no target.name or current_date() dependency.
+-- Both pg-base and pg-current produce identical SQL on first build.
+--
+-- Contrast with metrics_daily_shipments which has target.name in the else
+-- branch → different SQL per target → false alarm.
+--
+-- Key insight: is_incremental() alone does NOT cause false alarms.
+-- The false alarm comes from non-deterministic logic INSIDE the branches.
+
+{% set reference_date = "'1998-08-02'" %}
+
+select
+    o.order_date,
+    count(distinct o.order_key) as order_count,
+    count(distinct o.customer_key) as customer_count,
+    sum(o.gross_item_sales_amount)::decimal(16,4) as total_revenue,
+    avg(o.gross_item_sales_amount)::decimal(16,4) as avg_order_value
+from
+    {{ ref('fct_orders') }} o
+where
+    o.order_date is not null
+    {% if is_incremental() %}
+    and o.order_date > (select max(order_date) from {{ this }})
+    and o.order_date <= {{ reference_date }}::date
+    {% else %}
+    and o.order_date <= {{ reference_date }}::date
+    {% endif %}
+group by
+    o.order_date
+order by
+    o.order_date
diff --git a/models/metrics/metrics_daily_shipments.sql b/models/metrics/metrics_daily_shipments.sql
@@ -0,0 +1,47 @@
+{{
+    config(
+        materialized = 'incremental',
+        unique_key = 'ship_date',
+        incremental_strategy = 'delete+insert'
+    )
+}}
+
+-- Demonstrates why incremental models cause false alarms in Recce.
+--
+-- The root cause is NOT "data accumulation" — it's the conditional logic
+-- below that produces DIFFERENT SQL depending on build context:
+--
+--   is_incremental() = true  → filters from max(ship_date) in existing table
+--   is_incremental() = false → filters last N days from a reference date
+--     (N depends on target: prod gets 365 days, dev/PR gets 90 days)
+--
+-- Two environments built at different times or with different history
+-- will run different SQL → different results → false alarm diffs.
+--
+-- This mirrors real-world patterns like the fct_cmab_strategy_reward example
+-- where prod gets -8 days and dev gets -2 days from current_date().
+
+{% set reference_date = "'1998-08-02'" %}
+
+select
+    oi.ship_date,
+    count(*) as shipment_count,
+    count(distinct oi.order_key) as order_count,
+    count(distinct oi.supplier_key) as supplier_count,
+    sum(oi.gross_item_sales_amount)::decimal(16,4) as total_revenue,
+    avg(oi.gross_item_sales_amount)::decimal(16,4) as avg_revenue_per_item
+from
+    {{ ref('orders_items') }} oi
+where
+    oi.ship_date is not null
+    {% if is_incremental() %}
+    and oi.ship_date > (select max(ship_date) from {{ this }})
+    and oi.ship_date <= {{ reference_date }}::date
+    {% else %}
+    and oi.ship_date >= {{ reference_date }}::date - interval '{{ 365 if target.name == "pg-base" else 90 }} days'
+    and oi.ship_date <= {{ reference_date }}::date
+    {% endif %}
+group by
+    oi.ship_date
+order by
+    oi.ship_date
diff --git a/models/metrics/metrics_order_summary.sql b/models/metrics/metrics_order_summary.sql
@@ -0,0 +1,29 @@
+{{
+    config(
+        materialized = 'view'
+    )
+}}
+
+-- Monthly order summary by priority level
+--
+-- FALSE ALARM DEMO: VIEW with target-dependent date window.
+-- pg-base gets 5 years of history, pg-current gets 1 year.
+-- Even views — which don't store data — produce different results
+-- when their SQL definition varies by build context.
+-- This is NOT incremental, NOT a table — it's a plain view.
+
+{% set reference_date = "'1998-08-02'" %}
+
+select
+    date_trunc('month', o.order_date) as order_month,
+    o.order_priority_code as priority,
+    count(distinct o.order_key) as order_count,
+    count(distinct o.customer_key) as customer_count,
+    sum(o.gross_item_sales_amount)::decimal(16,4) as total_revenue,
+    avg(o.gross_item_sales_amount)::decimal(16,4) as avg_order_value
+from
+    {{ ref('fct_orders') }} o
+where
+    o.order_date >= {{ reference_date }}::date - interval '{{ 1825 if target.name == "pg-base" else 365 }} days'
+    and o.order_date <= {{ reference_date }}::date
+group by 1, 2
diff --git a/models/metrics/metrics_regional_revenue.sql b/models/metrics/metrics_regional_revenue.sql
@@ -1,4 +1,12 @@
 -- Revenue by region and nation over time
+--
+-- FALSE ALARM DEMO: TABLE model with target-dependent date window.
+-- pg-base gets 7 years of history, pg-current gets 2 years.
+-- Same pattern as prod vs dev environments with different data needs.
+-- This is NOT incremental — it's a plain table with conditional logic.
+
+{% set reference_date = "'1998-08-02'" %}
+
 with orders as (
 
     select * from {{ ref('fct_orders') }}
@@ -20,4 +28,7 @@ select
 from
     orders o
     join customers c on o.customer_key = c.customer_key
+where
+    o.order_date >= {{ reference_date }}::date - interval '{{ 2555 if target.name == "pg-base" else 730 }} days'
+    and o.order_date <= {{ reference_date }}::date
 group by 1, 2, 3
diff --git a/models/metrics/metrics_shipping_efficiency.sql b/models/metrics/metrics_shipping_efficiency.sql
@@ -1,4 +1,12 @@
 -- Average delivery time by shipping mode per month
+--
+-- FALSE ALARM DEMO: TABLE model with target.name branching.
+-- pg-base analyzes 7 years of ship dates, pg-current only 2 years.
+-- Mirrors real pattern: prod builds full history, dev builds subset.
+-- This is NOT incremental — it's a plain table with conditional logic.
+
+{% set reference_date = "'1998-08-02'" %}
+
 with items as (
 
     select * from {{ ref('fct_orders_items') }}
@@ -15,5 +23,12 @@ select
     round(sum(case when i.receipt_date > i.commit_date then 1 else 0 end)::decimal
         / nullif(count(*), 0) * 100, 2) as late_pct
 from items i
-where i.receipt_date is not null
+where
+    i.receipt_date is not null
+    {% if target.name == 'pg-base' %}
+    and i.ship_date >= {{ reference_date }}::date - interval '2555 days'
+    {% else %}
+    and i.ship_date >= {{ reference_date }}::date - interval '730 days'
+    {% endif %}
+    and i.ship_date <= {{ reference_date }}::date
 group by 1, 2
diff --git a/scripts/compare_detection_approaches.py b/scripts/compare_detection_approaches.py
@@ -0,0 +1,149 @@
+"""Compare detection accuracy: Jinja scanning vs compiled SQL diffing.
+
+Runs both approaches on the same dbt project and compares results.
+
+Usage:
+    uv run python scripts/compare_detection_approaches.py
+"""
+
+import json
+import subprocess
+import sys
+
+
+def run_jinja_scanning() -> set[str]:
+    """Run Jinja pattern scanning and return flagged model names."""
+    result = subprocess.run(
+        ["uv", "run", "python", "scripts/detect_base_mode.py", "--json"],
+        capture_output=True, text=True,
+    )
+    if result.returncode != 0:
+        print(f"Error running detect_base_mode.py: {result.stderr}", file=sys.stderr)
+        return set()
+
+    data = json.loads(result.stdout)
+    return {f["name"] for f in data["sql_findings"]}
+
+
+def run_compiled_diff(base_dir: str, current_dir: str) -> set[str]:
+    """Run compiled SQL diff and return flagged model names."""
+    result = subprocess.run(
+        ["uv", "run", "python", "scripts/compiled_sql_diff.py",
+         "--base-dir", base_dir,
+         "--current-dir", current_dir,
+         "--json"],
+        capture_output=True, text=True,
+    )
+    if result.returncode != 0:
+        print(f"Error running compiled_sql_diff.py: {result.stderr}", file=sys.stderr)
+        return set()
+
+    data = json.loads(result.stdout)
+    return {f["model"] for f in data["non_deterministic"]}
+
+
+def main():
+    print("=" * 70)
+    print("  Detection Approach Comparison")
+    print("=" * 70)
+    print()
+
+    # Expected ground truth: models with target.name branching
+    ground_truth = {
+        "metrics_daily_shipments",     # incremental, target.name in else
+        "metrics_shipping_efficiency", # table, target.name if/else
+        "metrics_regional_revenue",    # table, target.name inline
+        "metrics_order_summary",       # view, target.name inline
+    }
+    safe_models = {
+        "metrics_daily_orders",        # incremental, deterministic else
+    }
+
+    print("  Ground truth (should be flagged):")
+    for m in sorted(ground_truth):
+        print(f"    - {m}")
+    print()
+    print("  Safe models (should NOT be flagged):")
+    for m in sorted(safe_models):
+        print(f"    - {m}")
+    print()
+
+    # Approach 1: Jinja pattern scanning
+    print("-" * 70)
+    print("  Approach 1: Jinja Pattern Scanning (raw_code regex)")
+    jinja_flagged = run_jinja_scanning()
+    print(f"  Flagged: {sorted(jinja_flagged)}")
+
+    jinja_tp = ground_truth & jinja_flagged
+    jinja_fn = ground_truth - jinja_flagged
+    jinja_fp = jinja_flagged - ground_truth
+    jinja_safe_correct = safe_models - jinja_flagged
+
+    print(f"  True positives:  {len(jinja_tp)}/{len(ground_truth)}")
+    print(f"  False negatives: {len(jinja_fn)} {sorted(jinja_fn) if jinja_fn else ''}")
+    print(f"  False positives: {len(jinja_fp)} {sorted(jinja_fp) if jinja_fp else ''}")
+    print(f"  Safe correctly:  {len(jinja_safe_correct)}/{len(safe_models)}")
+    print()
+
+    # Approach 2a: Compiled SQL diff (without --full-refresh)
+    print("-" * 70)
+    print("  Approach 2a: Compiled SQL Diff (existing tables → is_incremental=true)")
+    diff_flagged = run_compiled_diff("target/compiled_pg_base", "target/compiled_pg_current")
+    print(f"  Flagged: {sorted(diff_flagged)}")
+
+    diff_tp = ground_truth & diff_flagged
+    diff_fn = ground_truth - diff_flagged
+    diff_fp = diff_flagged - ground_truth
+    diff_safe_correct = safe_models - diff_flagged
+
+    print(f"  True positives:  {len(diff_tp)}/{len(ground_truth)}")
+    print(f"  False negatives: {len(diff_fn)} {sorted(diff_fn) if diff_fn else ''}")
+    print(f"  False positives: {len(diff_fp)} {sorted(diff_fp) if diff_fp else ''}")
+    print(f"  Safe correctly:  {len(diff_safe_correct)}/{len(safe_models)}")
+    print()
+
+    # Approach 2b: Compiled SQL diff (with --full-refresh)
+    print("-" * 70)
+    print("  Approach 2b: Compiled SQL Diff (--full-refresh → is_incremental=false)")
+    diff_fr_flagged = run_compiled_diff("target/compiled_pg_base_fr", "target/compiled_pg_current_fr")
+    print(f"  Flagged: {sorted(diff_fr_flagged)}")
+
+    diff_fr_tp = ground_truth & diff_fr_flagged
+    diff_fr_fn = ground_truth - diff_fr_flagged
+    diff_fr_fp = diff_fr_flagged - ground_truth
+    diff_fr_safe_correct = safe_models - diff_fr_flagged
+
+    print(f"  True positives:  {len(diff_fr_tp)}/{len(ground_truth)}")
+    print(f"  False negatives: {len(diff_fr_fn)} {sorted(diff_fr_fn) if diff_fr_fn else ''}")
+    print(f"  False positives: {len(diff_fr_fp)} {sorted(diff_fr_fp) if diff_fr_fp else ''}")
+    print(f"  Safe correctly:  {len(diff_fr_safe_correct)}/{len(safe_models)}")
+    print()
+
+    # Summary
+    print("=" * 70)
+    print("  Summary")
+    print("=" * 70)
+    print()
+    print(f"  {'Approach':<50} {'TP':>4} {'FN':>4} {'FP':>4} {'Accuracy'}")
+    print(f"  {'-'*50} {'--':>4} {'--':>4} {'--':>4} {'--------'}")
+    total = len(ground_truth) + len(safe_models)
+    for label, tp, fn, fp, safe_ok in [
+        ("Jinja Pattern Scanning", len(jinja_tp), len(jinja_fn), len(jinja_fp), len(jinja_safe_correct)),
+        ("Compiled SQL Diff (existing tables)", len(diff_tp), len(diff_fn), len(diff_fp), len(diff_safe_correct)),
+        ("Compiled SQL Diff (--full-refresh)", len(diff_fr_tp), len(diff_fr_fn), len(diff_fr_fp), len(diff_fr_safe_correct)),
+    ]:
+        correct = tp + safe_ok
+        acc = correct / total * 100
+        print(f"  {label:<50} {tp:>4} {fn:>4} {fp:>4} {acc:>6.1f}%")
+
+    print()
+    print("  Key findings:")
+    print("    1. Jinja scanning works from manifest alone (no compile needed)")
+    print("    2. Compiled SQL diff needs --full-refresh to catch incremental else branches")
+    print("    3. Both approaches produce zero false positives on this project")
+    print("    4. Compiled SQL diff catches custom macros that Jinja scanning misses")
+    print()
+
+
+if __name__ == "__main__":
+    main()