diff --git a/src/policyengine/results/__init__.py b/src/policyengine/results/__init__.py new file mode 100644 index 00000000..812f5027 --- /dev/null +++ b/src/policyengine/results/__init__.py @@ -0,0 +1,17 @@ +from policyengine.results.schema import ( + ChartEntry, + ResultsJson, + ResultsMetadata, + TableEntry, + ValueEntry, +) +from policyengine.results.tracking import tracked_value + +__all__ = [ + "ChartEntry", + "ResultsJson", + "ResultsMetadata", + "TableEntry", + "ValueEntry", + "tracked_value", +] diff --git a/src/policyengine/results/schema.py b/src/policyengine/results/schema.py new file mode 100644 index 00000000..8ca94fb8 --- /dev/null +++ b/src/policyengine/results/schema.py @@ -0,0 +1,115 @@ +"""Pydantic schema for results.json — the contract between analysis +repos and blog posts. + +Every PolicyEngine blog post references a results.json file produced +by an analysis script. This module validates that the file conforms +to the expected schema so errors are caught at generation time rather +than at build time when resolve-posts tries to render templates. +""" + +import json +from pathlib import Path +from typing import Any + +from pydantic import BaseModel, model_validator + + +class ResultsMetadata(BaseModel): + """Top-level metadata about the analysis.""" + + title: str + repo: str + slug: str | None = None + commit: str | None = None + generated_at: str | None = None + policyengine_version: str | None = None + dataset: str | None = None + country_id: str | None = None + year: int | None = None + + +class ValueEntry(BaseModel): + """A single traceable value in results.json.""" + + value: Any + display: str + source_line: int + source_url: str + + +class TableEntry(BaseModel): + """A table in results.json.""" + + title: str + headers: list[str] + rows: list[list[str]] + source_line: int + source_url: str + + @model_validator(mode="after") + def check_row_widths(self) -> "TableEntry": + n_cols = len(self.headers) + for i, row in enumerate(self.rows): + if len(row) != n_cols: + raise ValueError( + f"Row {i} has {len(row)} columns but headers " + f"has {n_cols}" + ) + return self + + +class ChartEntry(BaseModel): + """A chart reference in results.json.""" + + url: str + alt: str + width: int = 1200 + height: int = 600 + source_line: int + source_url: str + + @model_validator(mode="after") + def check_alt_text(self) -> "ChartEntry": + if len(self.alt) < 20: + raise ValueError( + f"Alt text is too short ({len(self.alt)} chars). " + "Include chart type and 2-3 key data points." + ) + return self + + +class ResultsJson(BaseModel): + """Full results.json schema. + + Usage:: + + from policyengine.results import ResultsJson + + results = ResultsJson( + metadata=ResultsMetadata( + title="SALT Cap Repeal", + repo="PolicyEngine/analyses", + ), + values={ + "budget_impact": ValueEntry( + value=-15.2e9, + display="$15.2 billion", + source_line=47, + source_url="https://github.com/.../analysis.py#L47", + ), + }, + ) + results.write("results.json") + """ + + metadata: ResultsMetadata + values: dict[str, ValueEntry] = {} + tables: dict[str, TableEntry] = {} + charts: dict[str, ChartEntry] = {} + + def write(self, path: str | Path) -> None: + """Write validated results.json to disk.""" + path = Path(path) + path.parent.mkdir(parents=True, exist_ok=True) + data = self.model_dump(mode="json") + path.write_text(json.dumps(data, indent=2) + "\n") diff --git a/src/policyengine/results/tracking.py b/src/policyengine/results/tracking.py new file mode 100644 index 00000000..5650b820 --- /dev/null +++ b/src/policyengine/results/tracking.py @@ -0,0 +1,68 @@ +"""Source-line tracking helper for results.json values. + +The key traceability guarantee of the blog pipeline is that every +number in a blog post links back to the exact line of code that +produced it. This module provides a helper that captures the +caller's line number automatically via ``inspect``. +""" + +import inspect +from typing import Any + + +def tracked_value( + value: Any, + display: str, + repo: str, + filename: str = "analysis.py", + branch: str = "main", + *, + _stack_offset: int = 1, +) -> dict: + """Build a results.json value entry with automatic source tracking. + + Captures the caller's file and line number so every value in + results.json points to the code that produced it. + + Args: + value: The raw numeric value. + display: Human-readable formatted string (e.g. "$15.2 billion"). + repo: GitHub org/repo (e.g. "PolicyEngine/analyses"). + filename: Script filename within the repo. + branch: Git branch for the source URL. + _stack_offset: How many frames to skip (default 1 = caller). + + Returns: + Dict matching the ValueEntry schema:: + + { + "value": -15200000000, + "display": "$15.2 billion", + "source_line": 47, + "source_url": "https://github.com/.../analysis.py#L47", + } + + Example:: + + from policyengine.results import tracked_value + + budget = reform_revenue - baseline_revenue + results["values"]["budget_impact"] = tracked_value( + value=budget, + display=f"${abs(budget)/1e9:.1f} billion", + repo="PolicyEngine/analyses", + ) + """ + frame = inspect.stack()[_stack_offset] + line = frame.lineno + + source_url = ( + f"https://github.com/{repo}/blob/{branch}/{filename}#L{line}" + ) + + return { + "value": value, + "display": display, + "source_line": line, + "source_url": source_url, + } diff --git a/tests/test_results.py b/tests/test_results.py new file mode 100644 index 00000000..43be3439 --- /dev/null +++ b/tests/test_results.py @@ -0,0 +1,219 @@ +"""Tests for the results.json schema validation and source tracking.""" + +import json +import tempfile +from pathlib import Path + +import pytest + +from policyengine.results import ( + ChartEntry, + ResultsJson, + ResultsMetadata, + TableEntry, + ValueEntry, + tracked_value, +) + + +def test_valid_results_json(): + """A fully valid results.json passes validation.""" + results = ResultsJson( + metadata=ResultsMetadata( + title="Test Analysis", + repo="PolicyEngine/test", + year=2026, + country_id="us", + ), + values={ + "budget_impact": ValueEntry( + value=-15200000000, + display="$15.2 billion", + source_line=47, + source_url="https://github.com/PolicyEngine/test/blob/main/analysis.py#L47", + ), + }, + tables={ + "household": TableEntry( + title="Household impacts", + headers=["Household", "Income", "Change"], + rows=[ + ["Single", "$50,000", "+$1,200"], + ["Married", "$100,000", "+$2,400"], + ], + source_line=80, + source_url="https://github.com/PolicyEngine/test/blob/main/analysis.py#L80", + ), + }, + charts={ + "decile": ChartEntry( + url="https://PolicyEngine.github.io/test/charts/decile.png", + alt="Bar chart showing impact by decile. Top decile gains $8,200.", + source_line=105, + source_url="https://github.com/PolicyEngine/test/blob/main/analysis.py#L105", + ), + }, + ) + + assert results.metadata.title == "Test Analysis" + assert results.values["budget_impact"].value == -15200000000 + assert len(results.tables["household"].rows) == 2 + assert results.charts["decile"].width == 1200 + + +def test_value_entry_requires_source_line(): + """ValueEntry without source_line raises ValidationError.""" + with pytest.raises(Exception): + ValueEntry( + value=100, + display="$100", + source_url="https://github.com/x/y#L1", + ) + + +def test_value_entry_requires_source_url(): + """ValueEntry without source_url raises ValidationError.""" + with pytest.raises(Exception): + ValueEntry( + value=100, + display="$100", + source_line=10, + ) + + +def test_table_row_width_mismatch(): + """Table with wrong number of columns per row raises error.""" + with pytest.raises(Exception): + TableEntry( + title="Bad table", + headers=["A", "B", "C"], + rows=[["x", "y"]], # 2 cols, need 3 + source_line=1, + source_url="https://github.com/x/y#L1", + ) + + +def test_chart_alt_text_too_short(): + """Chart with vague alt text raises error.""" + with pytest.raises(Exception): + ChartEntry( + url="https://example.com/chart.png", + alt="A chart.", # Too short + source_line=1, + source_url="https://github.com/x/y#L1", + ) + + +def test_chart_alt_text_descriptive(): + """Chart with descriptive alt text passes.""" + chart = ChartEntry( + url="https://example.com/chart.png", + alt="Bar chart showing reform impact by income decile. Top decile gains $8,200 average.", + source_line=1, + source_url="https://github.com/x/y#L1", + ) + assert chart.width == 1200 + assert chart.height == 600 + + +def test_write_results_json(): + """ResultsJson.write() produces valid JSON file.""" + results = ResultsJson( + metadata=ResultsMetadata( + title="Write Test", + repo="PolicyEngine/test", + ), + values={ + "x": ValueEntry( + value=42, + display="42", + source_line=1, + source_url="https://github.com/x/y#L1", + ), + }, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + path = Path(tmpdir) / "results.json" + results.write(path) + + raw = path.read_text() + assert raw.endswith("\n"), "File should end with a newline" + data = json.loads(raw) + assert data["metadata"]["title"] == "Write Test" + assert data["values"]["x"]["value"] == 42 + assert data["values"]["x"]["source_line"] == 1 + + +def test_write_creates_parent_directories(): + """ResultsJson.write() creates parent directories if needed.""" + results = ResultsJson( + metadata=ResultsMetadata( + title="Nested", + repo="PolicyEngine/test", + ), + ) + + with tempfile.TemporaryDirectory() as tmpdir: + path = Path(tmpdir) / "sub" / "dir" / "results.json" + results.write(path) + + assert path.exists() + data = json.loads(path.read_text()) + assert data["metadata"]["title"] == "Nested" + + +def test_empty_results_json(): + """ResultsJson with only metadata is valid.""" + results = ResultsJson( + metadata=ResultsMetadata( + title="Empty", + repo="PolicyEngine/test", + ), + ) + assert results.values == {} + assert results.tables == {} + assert results.charts == {} + + +def test_tracked_value(): + """tracked_value() captures line number and builds source URL.""" + result = tracked_value( + value=-15200000000, + display="$15.2 billion", + repo="PolicyEngine/analyses", + filename="analysis.py", + ) + + assert result["value"] == -15200000000 + assert result["display"] == "$15.2 billion" + assert isinstance(result["source_line"], int) + assert result["source_line"] > 0 + assert "PolicyEngine/analyses" in result["source_url"] + assert "analysis.py#L" in result["source_url"] + + +def test_tracked_value_custom_filename(): + """tracked_value() respects custom filename and branch.""" + result = tracked_value( + value=100, + display="$100", + repo="PolicyEngine/analyses", + filename="salt-cap/analysis.py", + branch="dev", + ) + + assert "salt-cap/analysis.py" in result["source_url"] + assert "/blob/dev/" in result["source_url"] + + +def test_tracked_value_validates_as_value_entry(): + """tracked_value() output can be used to construct a ValueEntry.""" + result = tracked_value( + value=42, + display="42", + repo="PolicyEngine/test", + ) + entry = ValueEntry(**result) + assert entry.value == 42 + assert entry.source_line > 0