Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions models/metrics/metrics_daily_orders.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{{
config(
materialized = 'incremental',
unique_key = 'order_date',
incremental_strategy = 'delete+insert'
)
}}

-- SAFE INCREMENTAL: deterministic else branch — NO false alarm.
--
-- This model IS incremental, but its else branch (used on fresh CI builds)
-- uses a FIXED date range with no target.name or current_date() dependency.
-- Both pg-base and pg-current produce identical SQL on first build.
--
-- Contrast with metrics_daily_shipments which has target.name in the else
-- branch → different SQL per target → false alarm.
--
-- Key insight: is_incremental() alone does NOT cause false alarms.
-- The false alarm comes from non-deterministic logic INSIDE the branches.

{% set reference_date = "'1998-08-02'" %}

select
o.order_date,
count(distinct o.order_key) as order_count,
count(distinct o.customer_key) as customer_count,
sum(o.gross_item_sales_amount)::decimal(16,4) as total_revenue,
avg(o.gross_item_sales_amount)::decimal(16,4) as avg_order_value
from
{{ ref('fct_orders') }} o
where
o.order_date is not null
{% if is_incremental() %}
and o.order_date > (select max(order_date) from {{ this }})
and o.order_date <= {{ reference_date }}::date
{% else %}
and o.order_date <= {{ reference_date }}::date
{% endif %}
group by
o.order_date
order by
o.order_date
47 changes: 47 additions & 0 deletions models/metrics/metrics_daily_shipments.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{{
config(
materialized = 'incremental',
unique_key = 'ship_date',
incremental_strategy = 'delete+insert'
)
}}

-- Demonstrates why incremental models cause false alarms in Recce.
--
-- The root cause is NOT "data accumulation" — it's the conditional logic
-- below that produces DIFFERENT SQL depending on build context:
--
-- is_incremental() = true → filters from max(ship_date) in existing table
-- is_incremental() = false → filters last N days from a reference date
-- (N depends on target: prod gets 365 days, dev/PR gets 90 days)
--
-- Two environments built at different times or with different history
-- will run different SQL → different results → false alarm diffs.
--
-- This mirrors real-world patterns like the fct_cmab_strategy_reward example
-- where prod gets -8 days and dev gets -2 days from current_date().

{% set reference_date = "'1998-08-02'" %}

select
oi.ship_date,
count(*) as shipment_count,
count(distinct oi.order_key) as order_count,
count(distinct oi.supplier_key) as supplier_count,
sum(oi.gross_item_sales_amount)::decimal(16,4) as total_revenue,
avg(oi.gross_item_sales_amount)::decimal(16,4) as avg_revenue_per_item
from
{{ ref('orders_items') }} oi
where
oi.ship_date is not null
{% if is_incremental() %}
and oi.ship_date > (select max(ship_date) from {{ this }})
and oi.ship_date <= {{ reference_date }}::date
{% else %}
and oi.ship_date >= {{ reference_date }}::date - interval '{{ 365 if target.name == "pg-base" else 90 }} days'
and oi.ship_date <= {{ reference_date }}::date
{% endif %}
group by
oi.ship_date
order by
oi.ship_date
29 changes: 29 additions & 0 deletions models/metrics/metrics_order_summary.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{{
config(
materialized = 'view'
)
}}

-- Monthly order summary by priority level
--
-- FALSE ALARM DEMO: VIEW with target-dependent date window.
-- pg-base gets 5 years of history, pg-current gets 1 year.
-- Even views — which don't store data — produce different results
-- when their SQL definition varies by build context.
-- This is NOT incremental, NOT a table — it's a plain view.

{% set reference_date = "'1998-08-02'" %}

select
date_trunc('month', o.order_date) as order_month,
o.order_priority_code as priority,
count(distinct o.order_key) as order_count,
count(distinct o.customer_key) as customer_count,
sum(o.gross_item_sales_amount)::decimal(16,4) as total_revenue,
avg(o.gross_item_sales_amount)::decimal(16,4) as avg_order_value
from
{{ ref('fct_orders') }} o
where
o.order_date >= {{ reference_date }}::date - interval '{{ 1825 if target.name == "pg-base" else 365 }} days'
and o.order_date <= {{ reference_date }}::date
group by 1, 2
11 changes: 11 additions & 0 deletions models/metrics/metrics_regional_revenue.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
-- Revenue by region and nation over time
--
-- FALSE ALARM DEMO: TABLE model with target-dependent date window.
-- pg-base gets 7 years of history, pg-current gets 2 years.
-- Same pattern as prod vs dev environments with different data needs.
-- This is NOT incremental — it's a plain table with conditional logic.

{% set reference_date = "'1998-08-02'" %}

with orders as (

select * from {{ ref('fct_orders') }}
Expand All @@ -20,4 +28,7 @@ select
from
orders o
join customers c on o.customer_key = c.customer_key
where
o.order_date >= {{ reference_date }}::date - interval '{{ 2555 if target.name == "pg-base" else 730 }} days'
and o.order_date <= {{ reference_date }}::date
group by 1, 2, 3
17 changes: 16 additions & 1 deletion models/metrics/metrics_shipping_efficiency.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
-- Average delivery time by shipping mode per month
--
-- FALSE ALARM DEMO: TABLE model with target.name branching.
-- pg-base analyzes 7 years of ship dates, pg-current only 2 years.
-- Mirrors real pattern: prod builds full history, dev builds subset.
-- This is NOT incremental — it's a plain table with conditional logic.

{% set reference_date = "'1998-08-02'" %}

with items as (

select * from {{ ref('fct_orders_items') }}
Expand All @@ -15,5 +23,12 @@ select
round(sum(case when i.receipt_date > i.commit_date then 1 else 0 end)::decimal
/ nullif(count(*), 0) * 100, 2) as late_pct
from items i
where i.receipt_date is not null
where
i.receipt_date is not null
{% if target.name == 'pg-base' %}
and i.ship_date >= {{ reference_date }}::date - interval '2555 days'
{% else %}
and i.ship_date >= {{ reference_date }}::date - interval '730 days'
{% endif %}
and i.ship_date <= {{ reference_date }}::date
group by 1, 2
149 changes: 149 additions & 0 deletions scripts/compare_detection_approaches.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""Compare detection accuracy: Jinja scanning vs compiled SQL diffing.

Runs both approaches on the same dbt project and compares results.

Usage:
uv run python scripts/compare_detection_approaches.py
"""

import json
import subprocess
import sys


def run_jinja_scanning() -> set[str]:
"""Run Jinja pattern scanning and return flagged model names."""
result = subprocess.run(
["uv", "run", "python", "scripts/detect_base_mode.py", "--json"],
capture_output=True, text=True,
)
if result.returncode != 0:
print(f"Error running detect_base_mode.py: {result.stderr}", file=sys.stderr)
return set()

data = json.loads(result.stdout)
return {f["name"] for f in data["sql_findings"]}


def run_compiled_diff(base_dir: str, current_dir: str) -> set[str]:
"""Run compiled SQL diff and return flagged model names."""
result = subprocess.run(
["uv", "run", "python", "scripts/compiled_sql_diff.py",
"--base-dir", base_dir,
"--current-dir", current_dir,
"--json"],
capture_output=True, text=True,
)
if result.returncode != 0:
print(f"Error running compiled_sql_diff.py: {result.stderr}", file=sys.stderr)
return set()

data = json.loads(result.stdout)
return {f["model"] for f in data["non_deterministic"]}


def main():
print("=" * 70)
print(" Detection Approach Comparison")
print("=" * 70)
print()

# Expected ground truth: models with target.name branching
ground_truth = {
"metrics_daily_shipments", # incremental, target.name in else
"metrics_shipping_efficiency", # table, target.name if/else
"metrics_regional_revenue", # table, target.name inline
"metrics_order_summary", # view, target.name inline
}
safe_models = {
"metrics_daily_orders", # incremental, deterministic else
}

print(" Ground truth (should be flagged):")
for m in sorted(ground_truth):
print(f" - {m}")
print()
print(" Safe models (should NOT be flagged):")
for m in sorted(safe_models):
print(f" - {m}")
print()

# Approach 1: Jinja pattern scanning
print("-" * 70)
print(" Approach 1: Jinja Pattern Scanning (raw_code regex)")
jinja_flagged = run_jinja_scanning()
print(f" Flagged: {sorted(jinja_flagged)}")

jinja_tp = ground_truth & jinja_flagged
jinja_fn = ground_truth - jinja_flagged
jinja_fp = jinja_flagged - ground_truth
jinja_safe_correct = safe_models - jinja_flagged

print(f" True positives: {len(jinja_tp)}/{len(ground_truth)}")
print(f" False negatives: {len(jinja_fn)} {sorted(jinja_fn) if jinja_fn else ''}")
print(f" False positives: {len(jinja_fp)} {sorted(jinja_fp) if jinja_fp else ''}")
print(f" Safe correctly: {len(jinja_safe_correct)}/{len(safe_models)}")
print()

# Approach 2a: Compiled SQL diff (without --full-refresh)
print("-" * 70)
print(" Approach 2a: Compiled SQL Diff (existing tables → is_incremental=true)")
diff_flagged = run_compiled_diff("target/compiled_pg_base", "target/compiled_pg_current")
print(f" Flagged: {sorted(diff_flagged)}")

diff_tp = ground_truth & diff_flagged
diff_fn = ground_truth - diff_flagged
diff_fp = diff_flagged - ground_truth
diff_safe_correct = safe_models - diff_flagged

print(f" True positives: {len(diff_tp)}/{len(ground_truth)}")
print(f" False negatives: {len(diff_fn)} {sorted(diff_fn) if diff_fn else ''}")
print(f" False positives: {len(diff_fp)} {sorted(diff_fp) if diff_fp else ''}")
print(f" Safe correctly: {len(diff_safe_correct)}/{len(safe_models)}")
print()

# Approach 2b: Compiled SQL diff (with --full-refresh)
print("-" * 70)
print(" Approach 2b: Compiled SQL Diff (--full-refresh → is_incremental=false)")
diff_fr_flagged = run_compiled_diff("target/compiled_pg_base_fr", "target/compiled_pg_current_fr")
print(f" Flagged: {sorted(diff_fr_flagged)}")

diff_fr_tp = ground_truth & diff_fr_flagged
diff_fr_fn = ground_truth - diff_fr_flagged
diff_fr_fp = diff_fr_flagged - ground_truth
diff_fr_safe_correct = safe_models - diff_fr_flagged

print(f" True positives: {len(diff_fr_tp)}/{len(ground_truth)}")
print(f" False negatives: {len(diff_fr_fn)} {sorted(diff_fr_fn) if diff_fr_fn else ''}")
print(f" False positives: {len(diff_fr_fp)} {sorted(diff_fr_fp) if diff_fr_fp else ''}")
print(f" Safe correctly: {len(diff_fr_safe_correct)}/{len(safe_models)}")
print()

# Summary
print("=" * 70)
print(" Summary")
print("=" * 70)
print()
print(f" {'Approach':<50} {'TP':>4} {'FN':>4} {'FP':>4} {'Accuracy'}")
print(f" {'-'*50} {'--':>4} {'--':>4} {'--':>4} {'--------'}")
total = len(ground_truth) + len(safe_models)
for label, tp, fn, fp, safe_ok in [
("Jinja Pattern Scanning", len(jinja_tp), len(jinja_fn), len(jinja_fp), len(jinja_safe_correct)),
("Compiled SQL Diff (existing tables)", len(diff_tp), len(diff_fn), len(diff_fp), len(diff_safe_correct)),
("Compiled SQL Diff (--full-refresh)", len(diff_fr_tp), len(diff_fr_fn), len(diff_fr_fp), len(diff_fr_safe_correct)),
]:
correct = tp + safe_ok
acc = correct / total * 100
print(f" {label:<50} {tp:>4} {fn:>4} {fp:>4} {acc:>6.1f}%")

print()
print(" Key findings:")
print(" 1. Jinja scanning works from manifest alone (no compile needed)")
print(" 2. Compiled SQL diff needs --full-refresh to catch incremental else branches")
print(" 3. Both approaches produce zero false positives on this project")
print(" 4. Compiled SQL diff catches custom macros that Jinja scanning misses")
print()


if __name__ == "__main__":
main()
Loading