Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/policyengine/results/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from policyengine.results.schema import (
ChartEntry,
ResultsJson,
ResultsMetadata,
TableEntry,
ValueEntry,
)
from policyengine.results.tracking import tracked_value

__all__ = [
"ChartEntry",
"ResultsJson",
"ResultsMetadata",
"TableEntry",
"ValueEntry",
"tracked_value",
]
115 changes: 115 additions & 0 deletions src/policyengine/results/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""Pydantic schema for results.json — the contract between analysis
repos and blog posts.

Every PolicyEngine blog post references a results.json file produced
by an analysis script. This module validates that the file conforms
to the expected schema so errors are caught at generation time rather
than at build time when resolve-posts tries to render templates.
"""

import json
from pathlib import Path
from typing import Any

from pydantic import BaseModel, model_validator


class ResultsMetadata(BaseModel):
"""Top-level metadata about the analysis."""

title: str
repo: str
slug: str | None = None
commit: str | None = None
generated_at: str | None = None
policyengine_version: str | None = None
dataset: str | None = None
country_id: str | None = None
year: int | None = None


class ValueEntry(BaseModel):
"""A single traceable value in results.json."""

value: Any
display: str
source_line: int
source_url: str


class TableEntry(BaseModel):
"""A table in results.json."""

title: str
headers: list[str]
rows: list[list[str]]
source_line: int
source_url: str

@model_validator(mode="after")
def check_row_widths(self) -> "TableEntry":
n_cols = len(self.headers)
for i, row in enumerate(self.rows):
if len(row) != n_cols:
raise ValueError(
f"Row {i} has {len(row)} columns but headers "
f"has {n_cols}"
)
return self


class ChartEntry(BaseModel):
"""A chart reference in results.json."""

url: str
alt: str
width: int = 1200
height: int = 600
source_line: int
source_url: str

@model_validator(mode="after")
def check_alt_text(self) -> "ChartEntry":
if len(self.alt) < 20:
raise ValueError(
f"Alt text is too short ({len(self.alt)} chars). "
"Include chart type and 2-3 key data points."
)
return self


class ResultsJson(BaseModel):
"""Full results.json schema.

Usage::

from policyengine.results import ResultsJson

results = ResultsJson(
metadata=ResultsMetadata(
title="SALT Cap Repeal",
repo="PolicyEngine/analyses",
),
values={
"budget_impact": ValueEntry(
value=-15.2e9,
display="$15.2 billion",
source_line=47,
source_url="https://github.com/.../analysis.py#L47",
),
},
)
results.write("results.json")
"""

metadata: ResultsMetadata
values: dict[str, ValueEntry] = {}
tables: dict[str, TableEntry] = {}
charts: dict[str, ChartEntry] = {}

def write(self, path: str | Path) -> None:
"""Write validated results.json to disk."""
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
data = self.model_dump(mode="json")
path.write_text(json.dumps(data, indent=2) + "\n")
68 changes: 68 additions & 0 deletions src/policyengine/results/tracking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""Source-line tracking helper for results.json values.

The key traceability guarantee of the blog pipeline is that every
number in a blog post links back to the exact line of code that
produced it. This module provides a helper that captures the
caller's line number automatically via ``inspect``.
"""

import inspect
from typing import Any


def tracked_value(
value: Any,
display: str,
repo: str,
filename: str = "analysis.py",
branch: str = "main",
*,
_stack_offset: int = 1,
) -> dict:
"""Build a results.json value entry with automatic source tracking.

Captures the caller's file and line number so every value in
results.json points to the code that produced it.

Args:
value: The raw numeric value.
display: Human-readable formatted string (e.g. "$15.2 billion").
repo: GitHub org/repo (e.g. "PolicyEngine/analyses").
filename: Script filename within the repo.
branch: Git branch for the source URL.
_stack_offset: How many frames to skip (default 1 = caller).

Returns:
Dict matching the ValueEntry schema::

{
"value": -15200000000,
"display": "$15.2 billion",
"source_line": 47,
"source_url": "https://github.com/.../analysis.py#L47",
}

Example::

from policyengine.results import tracked_value

budget = reform_revenue - baseline_revenue
results["values"]["budget_impact"] = tracked_value(
value=budget,
display=f"${abs(budget)/1e9:.1f} billion",
repo="PolicyEngine/analyses",
)
"""
frame = inspect.stack()[_stack_offset]
line = frame.lineno

source_url = (
f"https://github.com/{repo}/blob/{branch}/{filename}#L{line}"
)

return {
"value": value,
"display": display,
"source_line": line,
"source_url": source_url,
}
Loading
Loading