Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions recce/summary.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import os
import sys
from typing import Dict, List, Optional, Set, Type, Union
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Type, Union
from uuid import UUID

if TYPE_CHECKING:
from recce.models.types import NodeDiff

from pydantic import BaseModel

from recce.apis.check_func import get_node_name_by_id
Expand Down Expand Up @@ -67,14 +70,21 @@ def __init__(self, node_id: str, node_data: dict, data_from: str = "base"):

self.base_data = {}
self.current_data = {}
self._forced_change_status = None

if data_from == "base":
self.base_data = node_data
elif data_from == "current":
self.current_data = node_data

def apply_diff(self, node_diff: "NodeDiff"):
"""Apply an externally computed diff (e.g., from state:modified)."""
self._forced_change_status = node_diff.change_status

@property
def change_status(self):
if self._forced_change_status is not None:
return self._forced_change_status
base_checksum = self.base_data.get("checksum", {}).get("checksum")
curr_checksum = self.current_data.get("checksum", {}).get("checksum")
if self.data_from == "base":
Expand Down Expand Up @@ -291,7 +301,7 @@ def get_edge_str(self, edge_id):
return f"{edge.parent_id}-...->{edge.child_id}\n"


def _build_lineage_graph(base, current) -> LineageGraph:
def _build_lineage_graph(base, current, diff: Optional[Dict[str, "NodeDiff"]] = None) -> LineageGraph:
graph = LineageGraph()

# Get the current package name to filter nodes (from the current manifest metadata)
Expand Down Expand Up @@ -319,6 +329,14 @@ def _build_lineage_graph(base, current) -> LineageGraph:
node = graph.nodes[node_id]
node.update_data(node_data, "current")

# Apply externally computed diff (e.g., from state:modified or macro detection).
# This allows nodes whose SQL checksum didn't change (e.g., macro-affected nodes)
# to be surfaced as modified in the graph.
if diff:
for node_id, node_diff in diff.items():
if node_id in graph.nodes:
graph.nodes[node_id].apply_diff(node_diff)

# Build edges
for child_id, parents in base.get("parent_map", {}).items():
for parent_id in parents:
Expand Down Expand Up @@ -508,7 +526,7 @@ def generate_markdown_summary(ctx: RecceContext, summary_format: str = "markdown

lineage_diff = ctx.get_lineage_diff()
summary_metadata = generate_summary_metadata(lineage_diff.base, lineage_diff.current)
graph = _build_lineage_graph(lineage_diff.base, lineage_diff.current)
graph = _build_lineage_graph(lineage_diff.base, lineage_diff.current, lineage_diff.diff)
graph.checks, check_statistics = generate_check_summary(lineage_diff.base, lineage_diff.current)
summary_config = RecceConfig().get("summary") or {}
node_shapes = summary_config.get("node_shapes") or {}
Expand Down
102 changes: 102 additions & 0 deletions tests/test_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from recce.adapter.dbt_adapter import DbtAdapter, DbtVersion, load_manifest
from recce.core import RecceContext, set_default_context
from recce.models.types import NodeDiff
from recce.summary import (
MERMAID_NODE_SHAPES,
Node,
Expand Down Expand Up @@ -185,3 +186,104 @@ def test_no_diff_returns_none(self, mock_get_diff):
mock_get_diff.return_value = (None, None)
node = _make_node()
assert node._cal_row_count_delta_percentage() is None


def _make_node_with_checksum(node_id, name, checksum="abc123"):
"""Helper to create a Node with checksum data (both base and current)."""
node_data = {
"name": name,
"resource_type": "model",
"package_name": "test",
"checksum": {"checksum": checksum},
}
node = Node(node_id, node_data, "both")
node.base_data = node_data
node.current_data = node_data
return node


def _make_both_node(node_id="model.test.my_model", name="my_model"):
"""Helper to create a Node present in both base and current (data_from='both')."""
node = _make_node_with_checksum(node_id, name, checksum="same")
return node


class TestNodeApplyDiff:
def test_apply_diff_sets_forced_change_status(self):
node = _make_both_node()
assert node.change_status is None # same checksum → no change
node.apply_diff(NodeDiff(change_status="modified"))
assert node.change_status == "modified"

def test_apply_diff_overrides_checksum_based_none(self):
node = _make_node_with_checksum("model.test.m", "m", checksum="same")
assert node.change_status is None # same checksum → no change
node.apply_diff(NodeDiff(change_status="modified"))
assert node.change_status == "modified"


class TestWhatChanged:
@patch("recce.summary._get_node_row_count_diff", return_value=(None, None))
def test_modified_shows_code(self, _mock):
node = _make_both_node()
node.apply_diff(NodeDiff(change_status="modified"))
changes = node._what_changed()
assert "Code" in changes


class TestBuildLineageGraphWithDiff:
def _make_lineage(self, node_ids):
nodes = {}
for nid in node_ids:
name = nid.split(".")[-1]
nodes[nid] = {
"id": nid,
"name": name,
"resource_type": "model",
"package_name": "test",
"checksum": {"checksum": "same_checksum"},
"raw_code": "SELECT 1",
}
return {"nodes": nodes, "parent_map": {}}

def test_diff_marks_state_modified_nodes(self):
base = self._make_lineage(["model.test.a", "model.test.b"])
current = self._make_lineage(["model.test.a", "model.test.b"])

# Without diff: node a and b have same checksum → no change
graph = _build_lineage_graph(base, current)
assert graph.nodes["model.test.a"].change_status is None
assert graph.nodes["model.test.b"].change_status is None

# With diff from state:modified: node b surfaces as modified
diff = {"model.test.b": NodeDiff(change_status="modified")}
graph = _build_lineage_graph(base, current, diff)
assert "model.test.b" in graph.modified_set
assert graph.nodes["model.test.a"].change_status is None

@patch("recce.summary._get_node_row_count_diff", return_value=(None, None))
def test_diff_node_shows_code_label(self, _mock):
base = self._make_lineage(["model.test.a"])
current = self._make_lineage(["model.test.a"])
diff = {"model.test.a": NodeDiff(change_status="modified")}
graph = _build_lineage_graph(base, current, diff)
changes = graph.nodes["model.test.a"]._what_changed()
assert "Code" in changes

def test_no_diff_preserves_existing_behavior(self):
"""Passing diff=None should behave identically to the original implementation."""
dbt_version = DbtVersion()
if dbt_version < "1.8.1":
pytest.skip("Dbt version is less than 1.8.1")

base_manifest_path = os.path.join(current_dir, "data", "manifest", "base", "manifest.json")
pr2_manifest_path = os.path.join(current_dir, "data", "manifest", "pr2", "manifest.json")
base_manifest = load_manifest(path=base_manifest_path)
curr_manifest = load_manifest(path=pr2_manifest_path)
dbt_adapter = DbtAdapter(curr_manifest=curr_manifest, base_manifest=base_manifest)
curr_lineage = dbt_adapter.get_lineage()
base_lineage = dbt_adapter.get_lineage(base=True)

graph_no_diff = _build_lineage_graph(curr_lineage, base_lineage)
graph_with_none = _build_lineage_graph(curr_lineage, base_lineage, None)
Comment on lines +287 to +288
Copy link

Copilot AI Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this new test, _build_lineage_graph’s signature is (base, current, ...), but the call passes curr_lineage as the first argument and base_lineage as the second. That reverses the meaning of added/removed nodes and can hide regressions because modified_set doesn’t distinguish added vs removed. Please swap the arguments to _build_lineage_graph(base_lineage, curr_lineage, ...) in both calls here so the test matches production usage (see generate_markdown_summary calling it with lineage_diff.base then lineage_diff.current).

Suggested change
graph_no_diff = _build_lineage_graph(curr_lineage, base_lineage)
graph_with_none = _build_lineage_graph(curr_lineage, base_lineage, None)
graph_no_diff = _build_lineage_graph(base_lineage, curr_lineage)
graph_with_none = _build_lineage_graph(base_lineage, curr_lineage, None)

Copilot uses AI. Check for mistakes.
assert graph_no_diff.modified_set == graph_with_none.modified_set
Loading