From bdcdf6e0f6b97d80d4b5a1a218dab4ccf5de6f7c Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Thu, 5 Mar 2026 18:50:32 +0100
Subject: [PATCH 1/3] feat: Add label field to Variable class and extract from
 OpenFisca

OpenFisca variables have a `label` class attribute with human-readable
names (e.g. "Employment income"). This adds `label: str | None = None`
to the Variable model and extracts it via `getattr(var_obj, "label", None)`
in both US and UK model loading. Includes unit and integration tests.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/policyengine/core/variable.py             |   1 +
 .../tax_benefit_models/uk/model.py            |   1 +
 .../tax_benefit_models/us/model.py            |   1 +
 tests/fixtures/variable_label_fixtures.py     |  53 +++++
 tests/test_variable_labels.py                 | 195 ++++++++++++++++++
 5 files changed, 251 insertions(+)
 create mode 100644 tests/fixtures/variable_label_fixtures.py
 create mode 100644 tests/test_variable_labels.py

diff --git a/src/policyengine/core/variable.py b/src/policyengine/core/variable.py
index fa8e50e2..c3afa230 100644
--- a/src/policyengine/core/variable.py
+++ b/src/policyengine/core/variable.py
@@ -8,6 +8,7 @@
 class Variable(BaseModel):
     id: str
     name: str
+    label: str | None = None
     tax_benefit_model_version: TaxBenefitModelVersion
     entity: str
     description: str | None = None
diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py
index 231f6c73..eac05e26 100644
--- a/src/policyengine/tax_benefit_models/uk/model.py
+++ b/src/policyengine/tax_benefit_models/uk/model.py
@@ -169,6 +169,7 @@ def __init__(self, **kwargs: dict):
             variable = Variable(
                 id=self.id + "-" + var_obj.name,
                 name=var_obj.name,
+                label=getattr(var_obj, "label", None),
                 tax_benefit_model_version=self,
                 entity=var_obj.entity.key,
                 description=var_obj.documentation,
diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py
index d8c05704..d146abce 100644
--- a/src/policyengine/tax_benefit_models/us/model.py
+++ b/src/policyengine/tax_benefit_models/us/model.py
@@ -152,6 +152,7 @@ def __init__(self, **kwargs: dict):
             variable = Variable(
                 id=self.id + "-" + var_obj.name,
                 name=var_obj.name,
+                label=getattr(var_obj, "label", None),
                 tax_benefit_model_version=self,
                 entity=var_obj.entity.key,
                 description=var_obj.documentation,
diff --git a/tests/fixtures/variable_label_fixtures.py b/tests/fixtures/variable_label_fixtures.py
new file mode 100644
index 00000000..1ce3572e
--- /dev/null
+++ b/tests/fixtures/variable_label_fixtures.py
@@ -0,0 +1,53 @@
+"""Fixtures for variable label tests."""
+
+from unittest.mock import MagicMock
+
+
+def create_mock_openfisca_variable(
+    name: str,
+    label: str | None = None,
+    entity_key: str = "person",
+    documentation: str | None = None,
+    value_type: type = float,
+    default_value=0,
+) -> MagicMock:
+    """Create a mock OpenFisca variable object.
+
+    Mimics the attributes of an OpenFisca Variable class as seen by
+    ``getattr(var_obj, "label", None)`` in the model loading code.
+    """
+    var = MagicMock()
+    var.name = name
+    var.documentation = documentation
+    var.value_type = value_type
+    var.default_value = default_value
+
+    entity = MagicMock()
+    entity.key = entity_key
+    var.entity = entity
+
+    # OpenFisca variables expose label as a class attribute.
+    # If label is None we delete the attribute so getattr falls back.
+    if label is not None:
+        var.label = label
+    else:
+        del var.label
+
+    return var
+
+
+# Pre-built fixtures
+VAR_WITH_LABEL = create_mock_openfisca_variable(
+    name="employment_income",
+    label="Employment income",
+)
+
+VAR_WITHOUT_LABEL = create_mock_openfisca_variable(
+    name="age",
+    label=None,
+)
+
+VAR_WITH_EMPTY_LABEL = create_mock_openfisca_variable(
+    name="household_weight",
+    label="",
+)
diff --git a/tests/test_variable_labels.py b/tests/test_variable_labels.py
new file mode 100644
index 00000000..cb5b1707
--- /dev/null
+++ b/tests/test_variable_labels.py
@@ -0,0 +1,195 @@
+"""Tests for variable label extraction and storage."""
+
+from policyengine.core.tax_benefit_model import TaxBenefitModel
+from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion
+from policyengine.core.variable import Variable
+from policyengine.tax_benefit_models.uk import uk_latest
+from policyengine.tax_benefit_models.us import us_latest
+from tests.fixtures.variable_label_fixtures import (
+    VAR_WITH_EMPTY_LABEL,
+    VAR_WITH_LABEL,
+    VAR_WITHOUT_LABEL,
+    create_mock_openfisca_variable,
+)
+
+
+# ---------------------------------------------------------------------------
+# Unit tests for Variable model
+# ---------------------------------------------------------------------------
+
+
+class TestVariableLabelField:
+    """Tests for the Variable Pydantic model label field."""
+
+    def _make_version(self) -> TaxBenefitModelVersion:
+        model = TaxBenefitModel(id="test-model")
+        return TaxBenefitModelVersion(
+            model=model,
+            version="1.0.0",
+        )
+
+    def test_label_defaults_to_none(self):
+        """Variable.label should default to None when not provided."""
+        version = self._make_version()
+        var = Variable(
+            id="test-var",
+            name="income_tax",
+            tax_benefit_model_version=version,
+            entity="person",
+        )
+        assert var.label is None
+
+    def test_label_stores_string(self):
+        """Variable.label should accept and store a string value."""
+        version = self._make_version()
+        var = Variable(
+            id="test-var",
+            name="income_tax",
+            label="Income tax",
+            tax_benefit_model_version=version,
+            entity="person",
+        )
+        assert var.label == "Income tax"
+
+    def test_label_accepts_empty_string(self):
+        """Variable.label should accept an empty string."""
+        version = self._make_version()
+        var = Variable(
+            id="test-var",
+            name="income_tax",
+            label="",
+            tax_benefit_model_version=version,
+            entity="person",
+        )
+        assert var.label == ""
+
+
+# ---------------------------------------------------------------------------
+# Unit tests for getattr extraction pattern
+# ---------------------------------------------------------------------------
+
+
+class TestLabelExtraction:
+    """Tests for the getattr(var_obj, 'label', None) extraction pattern."""
+
+    def test_extracts_label_when_present(self):
+        """getattr should return the label when the attribute exists."""
+        assert getattr(VAR_WITH_LABEL, "label", None) == "Employment income"
+
+    def test_returns_none_when_label_missing(self):
+        """getattr should return None when the label attribute is absent."""
+        assert getattr(VAR_WITHOUT_LABEL, "label", None) is None
+
+    def test_returns_empty_string_when_label_empty(self):
+        """getattr should return empty string when label is set to ''."""
+        assert getattr(VAR_WITH_EMPTY_LABEL, "label", None) == ""
+
+    def test_custom_label_value(self):
+        """getattr should return any custom label string."""
+        var = create_mock_openfisca_variable(
+            name="council_tax",
+            label="Council tax",
+        )
+        assert getattr(var, "label", None) == "Council tax"
+
+
+# ---------------------------------------------------------------------------
+# Integration tests against real US model
+# ---------------------------------------------------------------------------
+
+
+class TestUSVariableLabels:
+    """Tests that US model variables carry labels from OpenFisca."""
+
+    def test_employment_income_has_label(self):
+        """employment_income should have a non-empty label."""
+        var = next(
+            (v for v in us_latest.variables if v.name == "employment_income"),
+            None,
+        )
+        assert var is not None, "employment_income not found in US model"
+        assert var.label is not None, "employment_income should have a label"
+        assert len(var.label) > 0, "employment_income label should be non-empty"
+
+    def test_income_tax_has_label(self):
+        """income_tax should have a non-empty label."""
+        var = next(
+            (v for v in us_latest.variables if v.name == "income_tax"),
+            None,
+        )
+        assert var is not None, "income_tax not found in US model"
+        assert var.label is not None, "income_tax should have a label"
+        assert len(var.label) > 0
+
+    def test_majority_of_variables_have_labels(self):
+        """Most US variables should have non-empty labels."""
+        total = len(us_latest.variables)
+        with_label = sum(
+            1
+            for v in us_latest.variables
+            if v.label is not None and len(v.label) > 0
+        )
+        ratio = with_label / total
+        assert ratio > 0.5, (
+            f"Expected >50% of US variables to have labels, "
+            f"got {with_label}/{total} ({ratio:.0%})"
+        )
+
+    def test_label_is_string_type(self):
+        """Variable labels should be strings (not other types)."""
+        for v in us_latest.variables[:100]:
+            if v.label is not None:
+                assert isinstance(v.label, str), (
+                    f"Variable {v.name} label is {type(v.label)}, expected str"
+                )
+
+
+# ---------------------------------------------------------------------------
+# Integration tests against real UK model
+# ---------------------------------------------------------------------------
+
+
+class TestUKVariableLabels:
+    """Tests that UK model variables carry labels from OpenFisca."""
+
+    def test_employment_income_has_label(self):
+        """employment_income should have a non-empty label."""
+        var = next(
+            (v for v in uk_latest.variables if v.name == "employment_income"),
+            None,
+        )
+        assert var is not None, "employment_income not found in UK model"
+        assert var.label is not None, "employment_income should have a label"
+        assert len(var.label) > 0
+
+    def test_income_tax_has_label(self):
+        """income_tax should have a non-empty label."""
+        var = next(
+            (v for v in uk_latest.variables if v.name == "income_tax"),
+            None,
+        )
+        assert var is not None, "income_tax not found in UK model"
+        assert var.label is not None, "income_tax should have a label"
+        assert len(var.label) > 0
+
+    def test_majority_of_variables_have_labels(self):
+        """Most UK variables should have non-empty labels."""
+        total = len(uk_latest.variables)
+        with_label = sum(
+            1
+            for v in uk_latest.variables
+            if v.label is not None and len(v.label) > 0
+        )
+        ratio = with_label / total
+        assert ratio > 0.5, (
+            f"Expected >50% of UK variables to have labels, "
+            f"got {with_label}/{total} ({ratio:.0%})"
+        )
+
+    def test_label_is_string_type(self):
+        """Variable labels should be strings (not other types)."""
+        for v in uk_latest.variables[:100]:
+            if v.label is not None:
+                assert isinstance(v.label, str), (
+                    f"Variable {v.name} label is {type(v.label)}, expected str"
+                )

From 5811c7a191bc96462b0b723ebcf4da25b4291f5e Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Thu, 5 Mar 2026 19:21:50 +0100
Subject: [PATCH 2/3] style: Run ruff format and fix lint errors

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 pr-242-review-findings.md                     | 187 ++++++++++++++++++
 scripts/us_reform_approach_comparison.py      | 104 ++++++++++
 src/policyengine/outputs/decile_impact.py     |   4 +-
 .../outputs/intra_decile_impact.py            |  16 +-
 tests/test_intra_decile_impact.py             |  17 +-
 tests/test_local_authority_impact.py          |   4 +-
 tests/test_models.py                          |  24 ++-
 tests/test_parametric_reforms.py              |  16 +-
 tests/test_us_reform_application.py           |   5 +-
 tests/test_variable_labels.py                 |   5 +-
 10 files changed, 352 insertions(+), 30 deletions(-)
 create mode 100644 pr-242-review-findings.md
 create mode 100644 scripts/us_reform_approach_comparison.py

diff --git a/pr-242-review-findings.md b/pr-242-review-findings.md
new file mode 100644
index 00000000..7defbf41
--- /dev/null
+++ b/pr-242-review-findings.md
@@ -0,0 +1,187 @@
+# PR #242 Review Findings
+
+## Critical
+
+### 2. `Simulation.ensure()` catches all exceptions from `load()`, silently re-runs simulation
+
+**File:** `src/policyengine/core/simulation.py:45-49`
+
+```python
+def ensure(self):
+    cached_result = _cache.get(self.id)
+    if cached_result:
+        self.output_dataset = cached_result.output_dataset
+        return
+    try:
+        self.tax_benefit_model_version.load(self)
+    except Exception:       # <--- catches everything
+        self.run()
+        self.save()
+
+    _cache.add(self.id, self)
+```
+
+The `except Exception` is intended to catch "file not found" when no cached simulation exists, but it also catches corrupted H5 files, permission errors, OOM, and schema mismatches. In all cases it silently re-runs the full simulation with no logging. A corrupted cache file would cause every request to silently recompute from scratch, destroying performance with no indication to the user.
+
+**Fix:** Catch only `FileNotFoundError`. Log other exceptions before falling back to `run()`.
+
+---
+
+## Important
+
+### 3. Five new poverty convenience functions are not exported
+
+**File:** `src/policyengine/outputs/__init__.py`
+
+The following public functions were added to `poverty.py` but are not imported or listed in `__all__`:
+
+- `calculate_uk_poverty_by_age`
+- `calculate_us_poverty_by_age`
+- `calculate_uk_poverty_by_gender`
+- `calculate_us_poverty_by_gender`
+- `calculate_us_poverty_by_race`
+
+Also missing: `AGE_GROUPS`, `GENDER_GROUPS`, `RACE_GROUPS` constants.
+
+Users importing from `policyengine.outputs` (the expected pattern) will not see these functions.
+
+**Fix:** Add the missing imports and `__all__` entries in `outputs/__init__.py`.
+
+---
+
+### 4. Poverty convenience functions overwrite `filter_variable` with group label
+
+**File:** `src/policyengine/outputs/poverty.py:289` (and lines 327, 365, 404, 444)
+
+```python
+for pov in group_results.outputs:
+    pov.filter_variable = group_name  # "child" replaces "age"
+    results.append(pov)
+```
+
+After running the poverty calculation with e.g. `filter_variable="age", filter_variable_leq=17`, the code mutates `filter_variable` from `"age"` to `"child"`. The object becomes internally inconsistent: `filter_variable` says `"child"` but `filter_variable_leq` says `17`. If anyone inspects or re-uses the object, the filter metadata is misleading.
+
+**Fix:** Add a separate field (e.g., `filter_group: str | None = None`) to the `Poverty` class for the group label, rather than overwriting `filter_variable`.
+
+---
+
+### 5. Bare `except Exception` in UK region CSV loaders
+
+**File:** `src/policyengine/countries/uk/regions.py:59, 91`
+
+```python
+def _load_constituencies_from_csv() -> list[dict]:
+    try:
+        from policyengine_core.tools.google_cloud import download
+    except ImportError:
+        return []
+
+    try:
+        csv_path = download(...)
+        df = pd.read_csv(csv_path)
+        return [{"code": row["code"], "name": row["name"]} for _, row in df.iterrows()]
+    except Exception:       # <--- catches everything
+        return []
+```
+
+Same pattern in `_load_local_authorities_from_csv()`. Auth failures, CSV schema changes (`KeyError` on missing `code`/`name` columns), network errors, and corrupt files all silently return `[]`. Downstream, the registry treats this as "no constituencies exist" and the entire UK geographic impact feature silently degrades to empty results.
+
+**Fix:** At minimum, add `logging.warning()` with the exception. Ideally, narrow the catch to expected failure modes (e.g., `IOError`, `OSError`) and let unexpected errors propagate.
+
+---
+
+### 6. UK model fetches PyPI metadata at module import time with no error handling
+
+**File:** `src/policyengine/tax_benefit_models/uk/model.py:40-45`
+
+```python
+pkg_version = version("policyengine-uk")
+
+response = requests.get("https://pypi.org/pypi/policyengine-uk/json")
+data = response.json()
+upload_time = data["releases"][pkg_version][0]["upload_time_iso_8601"]
+```
+
+This runs at module import time (not in a function or `__init__`). No timeout, no error handling, no fallback. If PyPI is down, slow, or unreachable (CI, air-gapped environments, proxy), importing the module hangs or crashes.
+
+The US model already handles this correctly with a lazy-loading function `_get_us_package_metadata()` called from `__init__`.
+
+**Fix:** Move to a lazy-loading function like the US model. Add a timeout and try/except.
+
+---
+
+### 7. `_resolve_id_column` returns nonexistent column name without validation
+
+**File:** `src/policyengine/utils/entity_utils.py:7-19`
+
+```python
+def _resolve_id_column(person_data: pd.DataFrame, entity_name: str) -> str:
+    prefixed = f"person_{entity_name}_id"
+    bare = f"{entity_name}_id"
+    if prefixed in person_data.columns:
+        return prefixed
+    return bare       # <--- returns bare even if it doesn't exist
+```
+
+If neither column exists, returns `bare` anyway. The caller then does `person_data[id_col].values` which raises a cryptic `KeyError: 'household_id'` with no context about what was being looked up or what columns are available.
+
+**Fix:** Check both columns and raise a descriptive `ValueError` if neither exists:
+
+```python
+if bare in person_data.columns:
+    return bare
+raise ValueError(
+    f"Cannot find ID column for entity '{entity_name}'. "
+    f"Expected '{prefixed}' or '{bare}', available columns: {list(person_data.columns)}"
+)
+```
+
+---
+
+### 8. `filter_dataset_by_household_variable` passes unknown entities through unfiltered
+
+**File:** `src/policyengine/utils/entity_utils.py:112-118`
+
+```python
+for entity_name, mdf in entity_data.items():
+    df = pd.DataFrame(mdf)
+    id_col = f"{entity_name}_id"
+    if entity_name in filtered_ids and id_col in df.columns:
+        filtered_df = df[df[id_col].isin(filtered_ids[entity_name])]
+    else:
+        filtered_df = df       # <--- entire entity passes through unfiltered
+```
+
+If an entity isn't in `filtered_ids` (e.g., missing from `group_entities`, or a misspelling), its data passes through completely unfiltered. The resulting dataset has a mix of filtered and unfiltered entities — some scoped to a region, others containing the entire national dataset. Simulation results would be silently wrong.
+
+**Fix:** Log a warning or raise an error when the else branch is taken for a non-"person" entity.
+
+---
+
+### 9. `reform_dict_from_parameter_values` returns `None` despite `dict` return type
+
+**File:** `src/policyengine/utils/parametric_reforms.py:35-36`
+
+```python
+def reform_dict_from_parameter_values(
+    parameter_values: list[ParameterValue],
+) -> dict:
+    if not parameter_values:
+        return None       # <--- type says dict, returns None
+```
+
+The type annotation says `-> dict` but returns `None` for empty/None inputs. Callers must handle `None` as a special case. The function also accepts `None` despite the type hint saying `list[ParameterValue]`.
+
+**Fix:** Either return `{}` instead of `None` for empty inputs, or change the signature to `parameter_values: list[ParameterValue] | None` and `-> dict | None`.
+
+---
+
+### 10. `Poverty.run()` has no direct unit test
+
+**File:** `src/policyengine/outputs/poverty.py:68-131`
+
+The core `Poverty.run()` method has significant logic: entity mapping (`map_to_entity`), filter mask construction with three filter operators (`eq`, `leq`, `geq`), and the weighted headcount calculation. All existing poverty tests in `test_poverty_by_demographics.py` mock the base calculation functions — they never exercise `Poverty.run()` itself.
+
+If there's a bug in filter construction, the `== True` comparison on line 125, or the weighted sum logic, no current test catches it.
+
+**Fix:** Add a test that constructs a mock simulation with known poverty values and demographic data, calls `Poverty.run()` directly, and verifies headcount/rate calculations with each filter type (`eq`, `leq`, `geq`), including the combined adult filter that uses both `geq` and `leq` simultaneously.
diff --git a/scripts/us_reform_approach_comparison.py b/scripts/us_reform_approach_comparison.py
new file mode 100644
index 00000000..ace748b9
--- /dev/null
+++ b/scripts/us_reform_approach_comparison.py
@@ -0,0 +1,104 @@
+"""Minimal reproduction: US reform application — construction-time vs post-construction.
+
+This script demonstrates whether applying reforms via p.update() AFTER
+Microsimulation construction produces the same results as passing reform=
+at construction time for policyengine-us.
+
+We test with two parameters to be thorough:
+  1. Standard deduction (SINGLE) — doubled from ~15,750 to 50,000
+  2. Top bracket rate — raised from 0.37 to 0.50
+"""
+
+from policyengine_core.periods import period as make_period
+from policyengine_us import Microsimulation
+
+# ── Configuration ──────────────────────────────────────────────────────
+YEAR = 2025
+PERIOD_STR = f"{YEAR}-01-01"
+VARIABLE = "income_tax"
+
+REFORMS = {
+    "gov.irs.deductions.standard.amount.SINGLE": 50_000,
+    "gov.irs.income.bracket.rates.7": 0.50,
+}
+
+
+def get_param_value(sim, path, instant):
+    node = sim.tax_benefit_system.parameters
+    for part in path.split("."):
+        node = getattr(node, part)
+    return float(node(instant))
+
+
+def print_params(label, sim):
+    print(f"  [{label}] Parameter values:")
+    for path, target in REFORMS.items():
+        val = get_param_value(sim, path, PERIOD_STR)
+        print(f"    {path} = {val}")
+
+
+# ── Approach C: baseline (no reform) — run first for reference ────────
+print("Building baseline (no reform)...")
+sim_c = Microsimulation()
+tax_c = float(sim_c.calculate(VARIABLE, period=YEAR).sum())
+print("BASELINE (no reform)")
+print_params("C", sim_c)
+print(f"  income_tax (sum): {tax_c:,.2f}\n")
+
+
+# ── Approach A: reform dict at construction time ───────────────────────
+print("Building Approach A (reform= at construction time)...")
+reform_dict = {path: {PERIOD_STR: val} for path, val in REFORMS.items()}
+sim_a = Microsimulation(reform=reform_dict)
+tax_a = float(sim_a.calculate(VARIABLE, period=YEAR).sum())
+print("APPROACH A: reform= at construction time")
+print_params("A", sim_a)
+print(f"  income_tax (sum): {tax_a:,.2f}\n")
+
+
+# ── Approach B: p.update() after construction (UK-style) ───────────────
+print("Building Approach B (p.update() after construction)...")
+sim_b = Microsimulation()
+for path, val in REFORMS.items():
+    p = sim_b.tax_benefit_system.parameters.get_child(path)
+    p.update(value=val, start=make_period(PERIOD_STR))
+tax_b = float(sim_b.calculate(VARIABLE, period=YEAR).sum())
+print("APPROACH B: p.update() after construction (UK-style)")
+print_params("B", sim_b)
+print(f"  income_tax (sum): {tax_b:,.2f}\n")
+
+
+# ── Comparison ─────────────────────────────────────────────────────────
+print("=" * 70)
+print("COMPARISON")
+print(f"  C (baseline / no reform):      {tax_c:>25,.2f}")
+print(f"  A (construction-time reform):  {tax_a:>25,.2f}")
+print(f"  B (post-construction update):  {tax_b:>25,.2f}")
+print()
+print(f"  Delta A vs C (reform effect):  {tax_a - tax_c:>25,.2f}")
+print(f"  Delta B vs C (reform effect):  {tax_b - tax_c:>25,.2f}")
+print(f"  Delta A vs B:                  {tax_a - tax_b:>25,.2f}")
+print()
+
+if abs(tax_a - tax_c) < 1.0:
+    print("WARNING: Approach A shows no difference from baseline.")
+    print(
+        "  The reform may not have taken effect. Check parameter values above."
+    )
+elif abs(tax_a - tax_b) < 0.01:
+    print(
+        "RESULT: A and B match — both approaches work identically for policyengine-us."
+    )
+else:
+    print("RESULT: A and B DIFFER — post-construction p.update() does NOT")
+    print("        produce the same result as construction-time reform=.")
+    print()
+    if abs(tax_b - tax_c) < 1.0:
+        print("  ** B matches baseline C — the p.update() had NO EFFECT on")
+        print(
+            "     the calculated income_tax. The reform was silently ignored."
+        )
+    else:
+        print(
+            "  ** B differs from both A and C — partial/incorrect application."
+        )
diff --git a/src/policyengine/outputs/decile_impact.py b/src/policyengine/outputs/decile_impact.py
index 9d5e2e43..3ff8f3d2 100644
--- a/src/policyengine/outputs/decile_impact.py
+++ b/src/policyengine/outputs/decile_impact.py
@@ -16,7 +16,9 @@ class DecileImpact(Output):
     baseline_simulation: Simulation
     reform_simulation: Simulation
     income_variable: str = "equiv_hbai_household_net_income"
-    decile_variable: str | None = None  # If set, use pre-computed grouping variable
+    decile_variable: str | None = (
+        None  # If set, use pre-computed grouping variable
+    )
     entity: str | None = None
     decile: int
     quantiles: int = 10
diff --git a/src/policyengine/outputs/intra_decile_impact.py b/src/policyengine/outputs/intra_decile_impact.py
index e2b01243..94a0f265 100644
--- a/src/policyengine/outputs/intra_decile_impact.py
+++ b/src/policyengine/outputs/intra_decile_impact.py
@@ -104,7 +104,9 @@ def run(self):
         for lower, upper in zip(BOUNDS[:-1], BOUNDS[1:]):
             in_category = (income_change > lower) & (income_change <= upper)
             in_both = in_decile & in_category
-            proportions.append(float(np.sum(people[in_both]) / people_in_decile))
+            proportions.append(
+                float(np.sum(people[in_both]) / people_in_decile)
+            )
 
         self.lose_more_than_5pct = proportions[0]
         self.lose_less_than_5pct = proportions[1]
@@ -150,11 +152,15 @@ def compute_intra_decile_impacts(
         entity=entity,
         decile=0,
         quantiles=quantiles,
-        lose_more_than_5pct=sum(r.lose_more_than_5pct for r in results) / quantiles,
-        lose_less_than_5pct=sum(r.lose_less_than_5pct for r in results) / quantiles,
+        lose_more_than_5pct=sum(r.lose_more_than_5pct for r in results)
+        / quantiles,
+        lose_less_than_5pct=sum(r.lose_less_than_5pct for r in results)
+        / quantiles,
         no_change=sum(r.no_change for r in results) / quantiles,
-        gain_less_than_5pct=sum(r.gain_less_than_5pct for r in results) / quantiles,
-        gain_more_than_5pct=sum(r.gain_more_than_5pct for r in results) / quantiles,
+        gain_less_than_5pct=sum(r.gain_less_than_5pct for r in results)
+        / quantiles,
+        gain_more_than_5pct=sum(r.gain_more_than_5pct for r in results)
+        / quantiles,
     )
     results.append(overall)
 
diff --git a/tests/test_intra_decile_impact.py b/tests/test_intra_decile_impact.py
index d49c7cf8..098a0b48 100644
--- a/tests/test_intra_decile_impact.py
+++ b/tests/test_intra_decile_impact.py
@@ -20,7 +20,9 @@ def _make_variable_mock(name: str, entity: str) -> MagicMock:
     return var
 
 
-def _make_sim(household_data: dict, variables: list | None = None) -> MagicMock:
+def _make_sim(
+    household_data: dict, variables: list | None = None
+) -> MagicMock:
     """Create a mock Simulation with household-level data."""
     hh_df = MicroDataFrame(
         pd.DataFrame(household_data),
@@ -70,8 +72,12 @@ def test_intra_decile_no_change():
 
     for r in results.outputs:
         assert r.no_change == 1.0 or abs(r.no_change - 1.0) < 1e-9
-        assert r.lose_more_than_5pct == 0.0 or abs(r.lose_more_than_5pct) < 1e-9
-        assert r.gain_more_than_5pct == 0.0 or abs(r.gain_more_than_5pct) < 1e-9
+        assert (
+            r.lose_more_than_5pct == 0.0 or abs(r.lose_more_than_5pct) < 1e-9
+        )
+        assert (
+            r.gain_more_than_5pct == 0.0 or abs(r.gain_more_than_5pct) < 1e-9
+        )
 
 
 def test_intra_decile_all_large_gain():
@@ -102,7 +108,10 @@ def test_intra_decile_all_large_gain():
     )
 
     for r in results.outputs:
-        assert r.gain_more_than_5pct == 1.0 or abs(r.gain_more_than_5pct - 1.0) < 1e-9
+        assert (
+            r.gain_more_than_5pct == 1.0
+            or abs(r.gain_more_than_5pct - 1.0) < 1e-9
+        )
         assert r.no_change == 0.0 or abs(r.no_change) < 1e-9
 
 
diff --git a/tests/test_local_authority_impact.py b/tests/test_local_authority_impact.py
index a872262e..db555713 100644
--- a/tests/test_local_authority_impact.py
+++ b/tests/test_local_authority_impact.py
@@ -66,7 +66,9 @@ def test_basic_local_authority_reweighting():
     assert impact.local_authority_results is not None
     assert len(impact.local_authority_results) == 2
 
-    by_code = {r["local_authority_code"]: r for r in impact.local_authority_results}
+    by_code = {
+        r["local_authority_code"]: r for r in impact.local_authority_results
+    }
 
     la1 = by_code["LA001"]
     # Weighted change: (1*3000 + 1*3000) / 2 = 3000
diff --git a/tests/test_models.py b/tests/test_models.py
index eb509ea9..dabc90c4 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -179,7 +179,9 @@ class TestVariableDefaultValue:
 
     def test_us_age_variable_has_default_value_40(self):
         """US age variable should have default_value of 40."""
-        age_var = next((v for v in us_latest.variables if v.name == "age"), None)
+        age_var = next(
+            (v for v in us_latest.variables if v.name == "age"), None
+        )
         assert age_var is not None, "age variable not found in US model"
         assert age_var.default_value == 40, (
             f"Expected age default_value to be 40, got {age_var.default_value}"
@@ -191,7 +193,9 @@ def test_us_enum_variable_has_string_default_value(self):
         age_group_var = next(
             (v for v in us_latest.variables if v.name == "age_group"), None
         )
-        assert age_group_var is not None, "age_group variable not found in US model"
+        assert age_group_var is not None, (
+            "age_group variable not found in US model"
+        )
         assert age_group_var.default_value == "WORKING_AGE", (
             f"Expected age_group default_value to be 'WORKING_AGE', "
             f"got {age_group_var.default_value}"
@@ -199,12 +203,20 @@ def test_us_enum_variable_has_string_default_value(self):
 
     def test_us_variables_have_value_type(self):
         """US variables should have value_type set."""
-        age_var = next((v for v in us_latest.variables if v.name == "age"), None)
+        age_var = next(
+            (v for v in us_latest.variables if v.name == "age"), None
+        )
         assert age_var is not None, "age variable not found in US model"
-        assert age_var.value_type is not None, "age variable should have value_type"
+        assert age_var.value_type is not None, (
+            "age variable should have value_type"
+        )
 
     def test_uk_age_variable_has_default_value(self):
         """UK age variable should have default_value set."""
-        age_var = next((v for v in uk_latest.variables if v.name == "age"), None)
+        age_var = next(
+            (v for v in uk_latest.variables if v.name == "age"), None
+        )
         assert age_var is not None, "age variable not found in UK model"
-        assert age_var.default_value is not None, "UK age should have default_value"
+        assert age_var.default_value is not None, (
+            "UK age should have default_value"
+        )
diff --git a/tests/test_parametric_reforms.py b/tests/test_parametric_reforms.py
index 418f2fce..3a0eadc5 100644
--- a/tests/test_parametric_reforms.py
+++ b/tests/test_parametric_reforms.py
@@ -196,9 +196,7 @@ def test__given_modifier__then_calls_p_update_for_each_value(self):
 
         mock_simulation = MagicMock()
         mock_param_node = MagicMock()
-        mock_simulation.tax_benefit_system.parameters.get_child.return_value = (
-            mock_param_node
-        )
+        mock_simulation.tax_benefit_system.parameters.get_child.return_value = mock_param_node
 
         param_values = [SINGLE_PARAM_VALUE]
         modifier = simulation_modifier_from_parameter_values(param_values)
@@ -222,9 +220,7 @@ def test__given_multiple_values__then_applies_all_updates(self):
 
         mock_simulation = MagicMock()
         mock_param_node = MagicMock()
-        mock_simulation.tax_benefit_system.parameters.get_child.return_value = (
-            mock_param_node
-        )
+        mock_simulation.tax_benefit_system.parameters.get_child.return_value = mock_param_node
 
         param_values = MULTIPLE_DIFFERENT_PARAMS
         modifier = simulation_modifier_from_parameter_values(param_values)
@@ -246,11 +242,11 @@ def test__given_modifier__then_returns_simulation(self):
 
         mock_simulation = MagicMock()
         mock_param_node = MagicMock()
-        mock_simulation.tax_benefit_system.parameters.get_child.return_value = (
-            mock_param_node
-        )
+        mock_simulation.tax_benefit_system.parameters.get_child.return_value = mock_param_node
 
-        modifier = simulation_modifier_from_parameter_values([SINGLE_PARAM_VALUE])
+        modifier = simulation_modifier_from_parameter_values(
+            [SINGLE_PARAM_VALUE]
+        )
 
         # When
         result = modifier(mock_simulation)
diff --git a/tests/test_us_reform_application.py b/tests/test_us_reform_application.py
index 21b9d01c..2331eccc 100644
--- a/tests/test_us_reform_application.py
+++ b/tests/test_us_reform_application.py
@@ -113,7 +113,10 @@ def test__given_same_policy_twice__then_results_are_deterministic(self):
         result2 = calculate_us_household_impact(household, policy=policy)
 
         # Then
-        assert result1.tax_unit[0]["income_tax"] == result2.tax_unit[0]["income_tax"]
+        assert (
+            result1.tax_unit[0]["income_tax"]
+            == result2.tax_unit[0]["income_tax"]
+        )
 
     def test__given_custom_deduction_value__then_tax_reflects_value(self):
         """Given: Custom standard deduction value
diff --git a/tests/test_variable_labels.py b/tests/test_variable_labels.py
index cb5b1707..8d2c5ff8 100644
--- a/tests/test_variable_labels.py
+++ b/tests/test_variable_labels.py
@@ -12,7 +12,6 @@
     create_mock_openfisca_variable,
 )
 
-
 # ---------------------------------------------------------------------------
 # Unit tests for Variable model
 # ---------------------------------------------------------------------------
@@ -109,7 +108,9 @@ def test_employment_income_has_label(self):
         )
         assert var is not None, "employment_income not found in US model"
         assert var.label is not None, "employment_income should have a label"
-        assert len(var.label) > 0, "employment_income label should be non-empty"
+        assert len(var.label) > 0, (
+            "employment_income label should be non-empty"
+        )
 
     def test_income_tax_has_label(self):
         """income_tax should have a non-empty label."""

From fcb2d14ae19440b724a31000ab326497ce004656 Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Wed, 11 Mar 2026 21:27:50 +0100
Subject: [PATCH 3/3] chore: Remove stray files and reformat with ruff

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 pr-242-review-findings.md                     | 187 ------------------
 scripts/us_reform_approach_comparison.py      | 104 ----------
 src/policyengine/outputs/decile_impact.py     |   4 +-
 .../outputs/intra_decile_impact.py            |  16 +-
 tests/test_intra_decile_impact.py             |  17 +-
 tests/test_local_authority_impact.py          |   4 +-
 tests/test_models.py                          |  24 +--
 tests/test_parametric_reforms.py              |  16 +-
 tests/test_us_reform_application.py           |   5 +-
 tests/test_variable_labels.py                 |  12 +-
 10 files changed, 31 insertions(+), 358 deletions(-)
 delete mode 100644 pr-242-review-findings.md
 delete mode 100644 scripts/us_reform_approach_comparison.py

diff --git a/pr-242-review-findings.md b/pr-242-review-findings.md
deleted file mode 100644
index 7defbf41..00000000
--- a/pr-242-review-findings.md
+++ /dev/null
@@ -1,187 +0,0 @@
-# PR #242 Review Findings
-
-## Critical
-
-### 2. `Simulation.ensure()` catches all exceptions from `load()`, silently re-runs simulation
-
-**File:** `src/policyengine/core/simulation.py:45-49`
-
-```python
-def ensure(self):
-    cached_result = _cache.get(self.id)
-    if cached_result:
-        self.output_dataset = cached_result.output_dataset
-        return
-    try:
-        self.tax_benefit_model_version.load(self)
-    except Exception:       # <--- catches everything
-        self.run()
-        self.save()
-
-    _cache.add(self.id, self)
-```
-
-The `except Exception` is intended to catch "file not found" when no cached simulation exists, but it also catches corrupted H5 files, permission errors, OOM, and schema mismatches. In all cases it silently re-runs the full simulation with no logging. A corrupted cache file would cause every request to silently recompute from scratch, destroying performance with no indication to the user.
-
-**Fix:** Catch only `FileNotFoundError`. Log other exceptions before falling back to `run()`.
-
----
-
-## Important
-
-### 3. Five new poverty convenience functions are not exported
-
-**File:** `src/policyengine/outputs/__init__.py`
-
-The following public functions were added to `poverty.py` but are not imported or listed in `__all__`:
-
-- `calculate_uk_poverty_by_age`
-- `calculate_us_poverty_by_age`
-- `calculate_uk_poverty_by_gender`
-- `calculate_us_poverty_by_gender`
-- `calculate_us_poverty_by_race`
-
-Also missing: `AGE_GROUPS`, `GENDER_GROUPS`, `RACE_GROUPS` constants.
-
-Users importing from `policyengine.outputs` (the expected pattern) will not see these functions.
-
-**Fix:** Add the missing imports and `__all__` entries in `outputs/__init__.py`.
-
----
-
-### 4. Poverty convenience functions overwrite `filter_variable` with group label
-
-**File:** `src/policyengine/outputs/poverty.py:289` (and lines 327, 365, 404, 444)
-
-```python
-for pov in group_results.outputs:
-    pov.filter_variable = group_name  # "child" replaces "age"
-    results.append(pov)
-```
-
-After running the poverty calculation with e.g. `filter_variable="age", filter_variable_leq=17`, the code mutates `filter_variable` from `"age"` to `"child"`. The object becomes internally inconsistent: `filter_variable` says `"child"` but `filter_variable_leq` says `17`. If anyone inspects or re-uses the object, the filter metadata is misleading.
-
-**Fix:** Add a separate field (e.g., `filter_group: str | None = None`) to the `Poverty` class for the group label, rather than overwriting `filter_variable`.
-
----
-
-### 5. Bare `except Exception` in UK region CSV loaders
-
-**File:** `src/policyengine/countries/uk/regions.py:59, 91`
-
-```python
-def _load_constituencies_from_csv() -> list[dict]:
-    try:
-        from policyengine_core.tools.google_cloud import download
-    except ImportError:
-        return []
-
-    try:
-        csv_path = download(...)
-        df = pd.read_csv(csv_path)
-        return [{"code": row["code"], "name": row["name"]} for _, row in df.iterrows()]
-    except Exception:       # <--- catches everything
-        return []
-```
-
-Same pattern in `_load_local_authorities_from_csv()`. Auth failures, CSV schema changes (`KeyError` on missing `code`/`name` columns), network errors, and corrupt files all silently return `[]`. Downstream, the registry treats this as "no constituencies exist" and the entire UK geographic impact feature silently degrades to empty results.
-
-**Fix:** At minimum, add `logging.warning()` with the exception. Ideally, narrow the catch to expected failure modes (e.g., `IOError`, `OSError`) and let unexpected errors propagate.
-
----
-
-### 6. UK model fetches PyPI metadata at module import time with no error handling
-
-**File:** `src/policyengine/tax_benefit_models/uk/model.py:40-45`
-
-```python
-pkg_version = version("policyengine-uk")
-
-response = requests.get("https://pypi.org/pypi/policyengine-uk/json")
-data = response.json()
-upload_time = data["releases"][pkg_version][0]["upload_time_iso_8601"]
-```
-
-This runs at module import time (not in a function or `__init__`). No timeout, no error handling, no fallback. If PyPI is down, slow, or unreachable (CI, air-gapped environments, proxy), importing the module hangs or crashes.
-
-The US model already handles this correctly with a lazy-loading function `_get_us_package_metadata()` called from `__init__`.
-
-**Fix:** Move to a lazy-loading function like the US model. Add a timeout and try/except.
-
----
-
-### 7. `_resolve_id_column` returns nonexistent column name without validation
-
-**File:** `src/policyengine/utils/entity_utils.py:7-19`
-
-```python
-def _resolve_id_column(person_data: pd.DataFrame, entity_name: str) -> str:
-    prefixed = f"person_{entity_name}_id"
-    bare = f"{entity_name}_id"
-    if prefixed in person_data.columns:
-        return prefixed
-    return bare       # <--- returns bare even if it doesn't exist
-```
-
-If neither column exists, returns `bare` anyway. The caller then does `person_data[id_col].values` which raises a cryptic `KeyError: 'household_id'` with no context about what was being looked up or what columns are available.
-
-**Fix:** Check both columns and raise a descriptive `ValueError` if neither exists:
-
-```python
-if bare in person_data.columns:
-    return bare
-raise ValueError(
-    f"Cannot find ID column for entity '{entity_name}'. "
-    f"Expected '{prefixed}' or '{bare}', available columns: {list(person_data.columns)}"
-)
-```
-
----
-
-### 8. `filter_dataset_by_household_variable` passes unknown entities through unfiltered
-
-**File:** `src/policyengine/utils/entity_utils.py:112-118`
-
-```python
-for entity_name, mdf in entity_data.items():
-    df = pd.DataFrame(mdf)
-    id_col = f"{entity_name}_id"
-    if entity_name in filtered_ids and id_col in df.columns:
-        filtered_df = df[df[id_col].isin(filtered_ids[entity_name])]
-    else:
-        filtered_df = df       # <--- entire entity passes through unfiltered
-```
-
-If an entity isn't in `filtered_ids` (e.g., missing from `group_entities`, or a misspelling), its data passes through completely unfiltered. The resulting dataset has a mix of filtered and unfiltered entities — some scoped to a region, others containing the entire national dataset. Simulation results would be silently wrong.
-
-**Fix:** Log a warning or raise an error when the else branch is taken for a non-"person" entity.
-
----
-
-### 9. `reform_dict_from_parameter_values` returns `None` despite `dict` return type
-
-**File:** `src/policyengine/utils/parametric_reforms.py:35-36`
-
-```python
-def reform_dict_from_parameter_values(
-    parameter_values: list[ParameterValue],
-) -> dict:
-    if not parameter_values:
-        return None       # <--- type says dict, returns None
-```
-
-The type annotation says `-> dict` but returns `None` for empty/None inputs. Callers must handle `None` as a special case. The function also accepts `None` despite the type hint saying `list[ParameterValue]`.
-
-**Fix:** Either return `{}` instead of `None` for empty inputs, or change the signature to `parameter_values: list[ParameterValue] | None` and `-> dict | None`.
-
----
-
-### 10. `Poverty.run()` has no direct unit test
-
-**File:** `src/policyengine/outputs/poverty.py:68-131`
-
-The core `Poverty.run()` method has significant logic: entity mapping (`map_to_entity`), filter mask construction with three filter operators (`eq`, `leq`, `geq`), and the weighted headcount calculation. All existing poverty tests in `test_poverty_by_demographics.py` mock the base calculation functions — they never exercise `Poverty.run()` itself.
-
-If there's a bug in filter construction, the `== True` comparison on line 125, or the weighted sum logic, no current test catches it.
-
-**Fix:** Add a test that constructs a mock simulation with known poverty values and demographic data, calls `Poverty.run()` directly, and verifies headcount/rate calculations with each filter type (`eq`, `leq`, `geq`), including the combined adult filter that uses both `geq` and `leq` simultaneously.
diff --git a/scripts/us_reform_approach_comparison.py b/scripts/us_reform_approach_comparison.py
deleted file mode 100644
index ace748b9..00000000
--- a/scripts/us_reform_approach_comparison.py
+++ /dev/null
@@ -1,104 +0,0 @@
-"""Minimal reproduction: US reform application — construction-time vs post-construction.
-
-This script demonstrates whether applying reforms via p.update() AFTER
-Microsimulation construction produces the same results as passing reform=
-at construction time for policyengine-us.
-
-We test with two parameters to be thorough:
-  1. Standard deduction (SINGLE) — doubled from ~15,750 to 50,000
-  2. Top bracket rate — raised from 0.37 to 0.50
-"""
-
-from policyengine_core.periods import period as make_period
-from policyengine_us import Microsimulation
-
-# ── Configuration ──────────────────────────────────────────────────────
-YEAR = 2025
-PERIOD_STR = f"{YEAR}-01-01"
-VARIABLE = "income_tax"
-
-REFORMS = {
-    "gov.irs.deductions.standard.amount.SINGLE": 50_000,
-    "gov.irs.income.bracket.rates.7": 0.50,
-}
-
-
-def get_param_value(sim, path, instant):
-    node = sim.tax_benefit_system.parameters
-    for part in path.split("."):
-        node = getattr(node, part)
-    return float(node(instant))
-
-
-def print_params(label, sim):
-    print(f"  [{label}] Parameter values:")
-    for path, target in REFORMS.items():
-        val = get_param_value(sim, path, PERIOD_STR)
-        print(f"    {path} = {val}")
-
-
-# ── Approach C: baseline (no reform) — run first for reference ────────
-print("Building baseline (no reform)...")
-sim_c = Microsimulation()
-tax_c = float(sim_c.calculate(VARIABLE, period=YEAR).sum())
-print("BASELINE (no reform)")
-print_params("C", sim_c)
-print(f"  income_tax (sum): {tax_c:,.2f}\n")
-
-
-# ── Approach A: reform dict at construction time ───────────────────────
-print("Building Approach A (reform= at construction time)...")
-reform_dict = {path: {PERIOD_STR: val} for path, val in REFORMS.items()}
-sim_a = Microsimulation(reform=reform_dict)
-tax_a = float(sim_a.calculate(VARIABLE, period=YEAR).sum())
-print("APPROACH A: reform= at construction time")
-print_params("A", sim_a)
-print(f"  income_tax (sum): {tax_a:,.2f}\n")
-
-
-# ── Approach B: p.update() after construction (UK-style) ───────────────
-print("Building Approach B (p.update() after construction)...")
-sim_b = Microsimulation()
-for path, val in REFORMS.items():
-    p = sim_b.tax_benefit_system.parameters.get_child(path)
-    p.update(value=val, start=make_period(PERIOD_STR))
-tax_b = float(sim_b.calculate(VARIABLE, period=YEAR).sum())
-print("APPROACH B: p.update() after construction (UK-style)")
-print_params("B", sim_b)
-print(f"  income_tax (sum): {tax_b:,.2f}\n")
-
-
-# ── Comparison ─────────────────────────────────────────────────────────
-print("=" * 70)
-print("COMPARISON")
-print(f"  C (baseline / no reform):      {tax_c:>25,.2f}")
-print(f"  A (construction-time reform):  {tax_a:>25,.2f}")
-print(f"  B (post-construction update):  {tax_b:>25,.2f}")
-print()
-print(f"  Delta A vs C (reform effect):  {tax_a - tax_c:>25,.2f}")
-print(f"  Delta B vs C (reform effect):  {tax_b - tax_c:>25,.2f}")
-print(f"  Delta A vs B:                  {tax_a - tax_b:>25,.2f}")
-print()
-
-if abs(tax_a - tax_c) < 1.0:
-    print("WARNING: Approach A shows no difference from baseline.")
-    print(
-        "  The reform may not have taken effect. Check parameter values above."
-    )
-elif abs(tax_a - tax_b) < 0.01:
-    print(
-        "RESULT: A and B match — both approaches work identically for policyengine-us."
-    )
-else:
-    print("RESULT: A and B DIFFER — post-construction p.update() does NOT")
-    print("        produce the same result as construction-time reform=.")
-    print()
-    if abs(tax_b - tax_c) < 1.0:
-        print("  ** B matches baseline C — the p.update() had NO EFFECT on")
-        print(
-            "     the calculated income_tax. The reform was silently ignored."
-        )
-    else:
-        print(
-            "  ** B differs from both A and C — partial/incorrect application."
-        )
diff --git a/src/policyengine/outputs/decile_impact.py b/src/policyengine/outputs/decile_impact.py
index 3ff8f3d2..9d5e2e43 100644
--- a/src/policyengine/outputs/decile_impact.py
+++ b/src/policyengine/outputs/decile_impact.py
@@ -16,9 +16,7 @@ class DecileImpact(Output):
     baseline_simulation: Simulation
     reform_simulation: Simulation
     income_variable: str = "equiv_hbai_household_net_income"
-    decile_variable: str | None = (
-        None  # If set, use pre-computed grouping variable
-    )
+    decile_variable: str | None = None  # If set, use pre-computed grouping variable
     entity: str | None = None
     decile: int
     quantiles: int = 10
diff --git a/src/policyengine/outputs/intra_decile_impact.py b/src/policyengine/outputs/intra_decile_impact.py
index 94a0f265..e2b01243 100644
--- a/src/policyengine/outputs/intra_decile_impact.py
+++ b/src/policyengine/outputs/intra_decile_impact.py
@@ -104,9 +104,7 @@ def run(self):
         for lower, upper in zip(BOUNDS[:-1], BOUNDS[1:]):
             in_category = (income_change > lower) & (income_change <= upper)
             in_both = in_decile & in_category
-            proportions.append(
-                float(np.sum(people[in_both]) / people_in_decile)
-            )
+            proportions.append(float(np.sum(people[in_both]) / people_in_decile))
 
         self.lose_more_than_5pct = proportions[0]
         self.lose_less_than_5pct = proportions[1]
@@ -152,15 +150,11 @@ def compute_intra_decile_impacts(
         entity=entity,
         decile=0,
         quantiles=quantiles,
-        lose_more_than_5pct=sum(r.lose_more_than_5pct for r in results)
-        / quantiles,
-        lose_less_than_5pct=sum(r.lose_less_than_5pct for r in results)
-        / quantiles,
+        lose_more_than_5pct=sum(r.lose_more_than_5pct for r in results) / quantiles,
+        lose_less_than_5pct=sum(r.lose_less_than_5pct for r in results) / quantiles,
         no_change=sum(r.no_change for r in results) / quantiles,
-        gain_less_than_5pct=sum(r.gain_less_than_5pct for r in results)
-        / quantiles,
-        gain_more_than_5pct=sum(r.gain_more_than_5pct for r in results)
-        / quantiles,
+        gain_less_than_5pct=sum(r.gain_less_than_5pct for r in results) / quantiles,
+        gain_more_than_5pct=sum(r.gain_more_than_5pct for r in results) / quantiles,
     )
     results.append(overall)
 
diff --git a/tests/test_intra_decile_impact.py b/tests/test_intra_decile_impact.py
index 098a0b48..d49c7cf8 100644
--- a/tests/test_intra_decile_impact.py
+++ b/tests/test_intra_decile_impact.py
@@ -20,9 +20,7 @@ def _make_variable_mock(name: str, entity: str) -> MagicMock:
     return var
 
 
-def _make_sim(
-    household_data: dict, variables: list | None = None
-) -> MagicMock:
+def _make_sim(household_data: dict, variables: list | None = None) -> MagicMock:
     """Create a mock Simulation with household-level data."""
     hh_df = MicroDataFrame(
         pd.DataFrame(household_data),
@@ -72,12 +70,8 @@ def test_intra_decile_no_change():
 
     for r in results.outputs:
         assert r.no_change == 1.0 or abs(r.no_change - 1.0) < 1e-9
-        assert (
-            r.lose_more_than_5pct == 0.0 or abs(r.lose_more_than_5pct) < 1e-9
-        )
-        assert (
-            r.gain_more_than_5pct == 0.0 or abs(r.gain_more_than_5pct) < 1e-9
-        )
+        assert r.lose_more_than_5pct == 0.0 or abs(r.lose_more_than_5pct) < 1e-9
+        assert r.gain_more_than_5pct == 0.0 or abs(r.gain_more_than_5pct) < 1e-9
 
 
 def test_intra_decile_all_large_gain():
@@ -108,10 +102,7 @@ def test_intra_decile_all_large_gain():
     )
 
     for r in results.outputs:
-        assert (
-            r.gain_more_than_5pct == 1.0
-            or abs(r.gain_more_than_5pct - 1.0) < 1e-9
-        )
+        assert r.gain_more_than_5pct == 1.0 or abs(r.gain_more_than_5pct - 1.0) < 1e-9
         assert r.no_change == 0.0 or abs(r.no_change) < 1e-9
 
 
diff --git a/tests/test_local_authority_impact.py b/tests/test_local_authority_impact.py
index db555713..a872262e 100644
--- a/tests/test_local_authority_impact.py
+++ b/tests/test_local_authority_impact.py
@@ -66,9 +66,7 @@ def test_basic_local_authority_reweighting():
     assert impact.local_authority_results is not None
     assert len(impact.local_authority_results) == 2
 
-    by_code = {
-        r["local_authority_code"]: r for r in impact.local_authority_results
-    }
+    by_code = {r["local_authority_code"]: r for r in impact.local_authority_results}
 
     la1 = by_code["LA001"]
     # Weighted change: (1*3000 + 1*3000) / 2 = 3000
diff --git a/tests/test_models.py b/tests/test_models.py
index dabc90c4..eb509ea9 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -179,9 +179,7 @@ class TestVariableDefaultValue:
 
     def test_us_age_variable_has_default_value_40(self):
         """US age variable should have default_value of 40."""
-        age_var = next(
-            (v for v in us_latest.variables if v.name == "age"), None
-        )
+        age_var = next((v for v in us_latest.variables if v.name == "age"), None)
         assert age_var is not None, "age variable not found in US model"
         assert age_var.default_value == 40, (
             f"Expected age default_value to be 40, got {age_var.default_value}"
@@ -193,9 +191,7 @@ def test_us_enum_variable_has_string_default_value(self):
         age_group_var = next(
             (v for v in us_latest.variables if v.name == "age_group"), None
         )
-        assert age_group_var is not None, (
-            "age_group variable not found in US model"
-        )
+        assert age_group_var is not None, "age_group variable not found in US model"
         assert age_group_var.default_value == "WORKING_AGE", (
             f"Expected age_group default_value to be 'WORKING_AGE', "
             f"got {age_group_var.default_value}"
@@ -203,20 +199,12 @@ def test_us_enum_variable_has_string_default_value(self):
 
     def test_us_variables_have_value_type(self):
         """US variables should have value_type set."""
-        age_var = next(
-            (v for v in us_latest.variables if v.name == "age"), None
-        )
+        age_var = next((v for v in us_latest.variables if v.name == "age"), None)
         assert age_var is not None, "age variable not found in US model"
-        assert age_var.value_type is not None, (
-            "age variable should have value_type"
-        )
+        assert age_var.value_type is not None, "age variable should have value_type"
 
     def test_uk_age_variable_has_default_value(self):
         """UK age variable should have default_value set."""
-        age_var = next(
-            (v for v in uk_latest.variables if v.name == "age"), None
-        )
+        age_var = next((v for v in uk_latest.variables if v.name == "age"), None)
         assert age_var is not None, "age variable not found in UK model"
-        assert age_var.default_value is not None, (
-            "UK age should have default_value"
-        )
+        assert age_var.default_value is not None, "UK age should have default_value"
diff --git a/tests/test_parametric_reforms.py b/tests/test_parametric_reforms.py
index 3a0eadc5..418f2fce 100644
--- a/tests/test_parametric_reforms.py
+++ b/tests/test_parametric_reforms.py
@@ -196,7 +196,9 @@ def test__given_modifier__then_calls_p_update_for_each_value(self):
 
         mock_simulation = MagicMock()
         mock_param_node = MagicMock()
-        mock_simulation.tax_benefit_system.parameters.get_child.return_value = mock_param_node
+        mock_simulation.tax_benefit_system.parameters.get_child.return_value = (
+            mock_param_node
+        )
 
         param_values = [SINGLE_PARAM_VALUE]
         modifier = simulation_modifier_from_parameter_values(param_values)
@@ -220,7 +222,9 @@ def test__given_multiple_values__then_applies_all_updates(self):
 
         mock_simulation = MagicMock()
         mock_param_node = MagicMock()
-        mock_simulation.tax_benefit_system.parameters.get_child.return_value = mock_param_node
+        mock_simulation.tax_benefit_system.parameters.get_child.return_value = (
+            mock_param_node
+        )
 
         param_values = MULTIPLE_DIFFERENT_PARAMS
         modifier = simulation_modifier_from_parameter_values(param_values)
@@ -242,12 +246,12 @@ def test__given_modifier__then_returns_simulation(self):
 
         mock_simulation = MagicMock()
         mock_param_node = MagicMock()
-        mock_simulation.tax_benefit_system.parameters.get_child.return_value = mock_param_node
-
-        modifier = simulation_modifier_from_parameter_values(
-            [SINGLE_PARAM_VALUE]
+        mock_simulation.tax_benefit_system.parameters.get_child.return_value = (
+            mock_param_node
         )
 
+        modifier = simulation_modifier_from_parameter_values([SINGLE_PARAM_VALUE])
+
         # When
         result = modifier(mock_simulation)
 
diff --git a/tests/test_us_reform_application.py b/tests/test_us_reform_application.py
index 2331eccc..21b9d01c 100644
--- a/tests/test_us_reform_application.py
+++ b/tests/test_us_reform_application.py
@@ -113,10 +113,7 @@ def test__given_same_policy_twice__then_results_are_deterministic(self):
         result2 = calculate_us_household_impact(household, policy=policy)
 
         # Then
-        assert (
-            result1.tax_unit[0]["income_tax"]
-            == result2.tax_unit[0]["income_tax"]
-        )
+        assert result1.tax_unit[0]["income_tax"] == result2.tax_unit[0]["income_tax"]
 
     def test__given_custom_deduction_value__then_tax_reflects_value(self):
         """Given: Custom standard deduction value
diff --git a/tests/test_variable_labels.py b/tests/test_variable_labels.py
index 8d2c5ff8..ac572371 100644
--- a/tests/test_variable_labels.py
+++ b/tests/test_variable_labels.py
@@ -108,9 +108,7 @@ def test_employment_income_has_label(self):
         )
         assert var is not None, "employment_income not found in US model"
         assert var.label is not None, "employment_income should have a label"
-        assert len(var.label) > 0, (
-            "employment_income label should be non-empty"
-        )
+        assert len(var.label) > 0, "employment_income label should be non-empty"
 
     def test_income_tax_has_label(self):
         """income_tax should have a non-empty label."""
@@ -126,9 +124,7 @@ def test_majority_of_variables_have_labels(self):
         """Most US variables should have non-empty labels."""
         total = len(us_latest.variables)
         with_label = sum(
-            1
-            for v in us_latest.variables
-            if v.label is not None and len(v.label) > 0
+            1 for v in us_latest.variables if v.label is not None and len(v.label) > 0
         )
         ratio = with_label / total
         assert ratio > 0.5, (
@@ -177,9 +173,7 @@ def test_majority_of_variables_have_labels(self):
         """Most UK variables should have non-empty labels."""
         total = len(uk_latest.variables)
         with_label = sum(
-            1
-            for v in uk_latest.variables
-            if v.label is not None and len(v.label) > 0
+            1 for v in uk_latest.variables if v.label is not None and len(v.label) > 0
         )
         ratio = with_label / total
         assert ratio > 0.5, (