From fe17f94a5aef6ef0b15ef660f1ab965514df0c1e Mon Sep 17 00:00:00 2001 From: Daisuke Taniwaki Date: Mon, 9 Mar 2026 16:42:47 +0900 Subject: [PATCH] feat: detect macro and config changes in lineage diff and summary Signed-off-by: Daisuke Taniwaki --- recce/summary.py | 24 ++++++++-- tests/test_summary.py | 102 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 3 deletions(-) diff --git a/recce/summary.py b/recce/summary.py index f8f3d066c..1221fd75d 100644 --- a/recce/summary.py +++ b/recce/summary.py @@ -1,8 +1,11 @@ import os import sys -from typing import Dict, List, Optional, Set, Type, Union +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Type, Union from uuid import UUID +if TYPE_CHECKING: + from recce.models.types import NodeDiff + from pydantic import BaseModel from recce.apis.check_func import get_node_name_by_id @@ -67,14 +70,21 @@ def __init__(self, node_id: str, node_data: dict, data_from: str = "base"): self.base_data = {} self.current_data = {} + self._forced_change_status = None if data_from == "base": self.base_data = node_data elif data_from == "current": self.current_data = node_data + def apply_diff(self, node_diff: "NodeDiff"): + """Apply an externally computed diff (e.g., from state:modified).""" + self._forced_change_status = node_diff.change_status + @property def change_status(self): + if self._forced_change_status is not None: + return self._forced_change_status base_checksum = self.base_data.get("checksum", {}).get("checksum") curr_checksum = self.current_data.get("checksum", {}).get("checksum") if self.data_from == "base": @@ -291,7 +301,7 @@ def get_edge_str(self, edge_id): return f"{edge.parent_id}-...->{edge.child_id}\n" -def _build_lineage_graph(base, current) -> LineageGraph: +def _build_lineage_graph(base, current, diff: Optional[Dict[str, "NodeDiff"]] = None) -> LineageGraph: graph = LineageGraph() # Get the current package name to filter nodes (from the current manifest metadata) @@ -319,6 +329,14 @@ def _build_lineage_graph(base, current) -> LineageGraph: node = graph.nodes[node_id] node.update_data(node_data, "current") + # Apply externally computed diff (e.g., from state:modified or macro detection). + # This allows nodes whose SQL checksum didn't change (e.g., macro-affected nodes) + # to be surfaced as modified in the graph. + if diff: + for node_id, node_diff in diff.items(): + if node_id in graph.nodes: + graph.nodes[node_id].apply_diff(node_diff) + # Build edges for child_id, parents in base.get("parent_map", {}).items(): for parent_id in parents: @@ -508,7 +526,7 @@ def generate_markdown_summary(ctx: RecceContext, summary_format: str = "markdown lineage_diff = ctx.get_lineage_diff() summary_metadata = generate_summary_metadata(lineage_diff.base, lineage_diff.current) - graph = _build_lineage_graph(lineage_diff.base, lineage_diff.current) + graph = _build_lineage_graph(lineage_diff.base, lineage_diff.current, lineage_diff.diff) graph.checks, check_statistics = generate_check_summary(lineage_diff.base, lineage_diff.current) summary_config = RecceConfig().get("summary") or {} node_shapes = summary_config.get("node_shapes") or {} diff --git a/tests/test_summary.py b/tests/test_summary.py index 6c1fe0805..9358abfe7 100644 --- a/tests/test_summary.py +++ b/tests/test_summary.py @@ -5,6 +5,7 @@ from recce.adapter.dbt_adapter import DbtAdapter, DbtVersion, load_manifest from recce.core import RecceContext, set_default_context +from recce.models.types import NodeDiff from recce.summary import ( MERMAID_NODE_SHAPES, Node, @@ -185,3 +186,104 @@ def test_no_diff_returns_none(self, mock_get_diff): mock_get_diff.return_value = (None, None) node = _make_node() assert node._cal_row_count_delta_percentage() is None + + +def _make_node_with_checksum(node_id, name, checksum="abc123"): + """Helper to create a Node with checksum data (both base and current).""" + node_data = { + "name": name, + "resource_type": "model", + "package_name": "test", + "checksum": {"checksum": checksum}, + } + node = Node(node_id, node_data, "both") + node.base_data = node_data + node.current_data = node_data + return node + + +def _make_both_node(node_id="model.test.my_model", name="my_model"): + """Helper to create a Node present in both base and current (data_from='both').""" + node = _make_node_with_checksum(node_id, name, checksum="same") + return node + + +class TestNodeApplyDiff: + def test_apply_diff_sets_forced_change_status(self): + node = _make_both_node() + assert node.change_status is None # same checksum → no change + node.apply_diff(NodeDiff(change_status="modified")) + assert node.change_status == "modified" + + def test_apply_diff_overrides_checksum_based_none(self): + node = _make_node_with_checksum("model.test.m", "m", checksum="same") + assert node.change_status is None # same checksum → no change + node.apply_diff(NodeDiff(change_status="modified")) + assert node.change_status == "modified" + + +class TestWhatChanged: + @patch("recce.summary._get_node_row_count_diff", return_value=(None, None)) + def test_modified_shows_code(self, _mock): + node = _make_both_node() + node.apply_diff(NodeDiff(change_status="modified")) + changes = node._what_changed() + assert "Code" in changes + + +class TestBuildLineageGraphWithDiff: + def _make_lineage(self, node_ids): + nodes = {} + for nid in node_ids: + name = nid.split(".")[-1] + nodes[nid] = { + "id": nid, + "name": name, + "resource_type": "model", + "package_name": "test", + "checksum": {"checksum": "same_checksum"}, + "raw_code": "SELECT 1", + } + return {"nodes": nodes, "parent_map": {}} + + def test_diff_marks_state_modified_nodes(self): + base = self._make_lineage(["model.test.a", "model.test.b"]) + current = self._make_lineage(["model.test.a", "model.test.b"]) + + # Without diff: node a and b have same checksum → no change + graph = _build_lineage_graph(base, current) + assert graph.nodes["model.test.a"].change_status is None + assert graph.nodes["model.test.b"].change_status is None + + # With diff from state:modified: node b surfaces as modified + diff = {"model.test.b": NodeDiff(change_status="modified")} + graph = _build_lineage_graph(base, current, diff) + assert "model.test.b" in graph.modified_set + assert graph.nodes["model.test.a"].change_status is None + + @patch("recce.summary._get_node_row_count_diff", return_value=(None, None)) + def test_diff_node_shows_code_label(self, _mock): + base = self._make_lineage(["model.test.a"]) + current = self._make_lineage(["model.test.a"]) + diff = {"model.test.a": NodeDiff(change_status="modified")} + graph = _build_lineage_graph(base, current, diff) + changes = graph.nodes["model.test.a"]._what_changed() + assert "Code" in changes + + def test_no_diff_preserves_existing_behavior(self): + """Passing diff=None should behave identically to the original implementation.""" + dbt_version = DbtVersion() + if dbt_version < "1.8.1": + pytest.skip("Dbt version is less than 1.8.1") + + base_manifest_path = os.path.join(current_dir, "data", "manifest", "base", "manifest.json") + pr2_manifest_path = os.path.join(current_dir, "data", "manifest", "pr2", "manifest.json") + base_manifest = load_manifest(path=base_manifest_path) + curr_manifest = load_manifest(path=pr2_manifest_path) + dbt_adapter = DbtAdapter(curr_manifest=curr_manifest, base_manifest=base_manifest) + curr_lineage = dbt_adapter.get_lineage() + base_lineage = dbt_adapter.get_lineage(base=True) + + graph_no_diff = _build_lineage_graph(curr_lineage, base_lineage) + graph_with_none = _build_lineage_graph(curr_lineage, base_lineage, None) + assert graph_no_diff.modified_set == graph_with_none.modified_set