diff --git a/changelog.d/add-uk-detailed-budget-program-statistics.changed.md b/changelog.d/add-uk-detailed-budget-program-statistics.changed.md new file mode 100644 index 00000000..8ea911da --- /dev/null +++ b/changelog.d/add-uk-detailed-budget-program-statistics.changed.md @@ -0,0 +1 @@ +Add UK program-statistics rows for fuel duty, state pension, employer National Insurance, and tax credits. diff --git a/src/policyengine/tax_benefit_models/uk/analysis.py b/src/policyengine/tax_benefit_models/uk/analysis.py index f37d18be..1b09ca2e 100644 --- a/src/policyengine/tax_benefit_models/uk/analysis.py +++ b/src/policyengine/tax_benefit_models/uk/analysis.py @@ -23,6 +23,31 @@ Poverty, calculate_uk_poverty_rates, ) +from policyengine.utils.errors import format_conditional_error_detail + +# Map of UK program-statistics variable name -> program metadata. The +# entity for each program is derived from the variable's own metadata at +# runtime, so this list does not silently drift if policyengine-uk moves +# a variable between entities. +UK_PROGRAMS: dict[str, dict] = { + "income_tax": {"is_tax": True}, + "national_insurance": {"is_tax": True}, + "vat": {"is_tax": True}, + "council_tax": {"is_tax": True}, + "fuel_duty": {"is_tax": True}, + "ni_employer": {"is_tax": True}, + "universal_credit": {"is_tax": False}, + "child_benefit": {"is_tax": False}, + "pension_credit": {"is_tax": False}, + "income_support": {"is_tax": False}, + # tax_credits overlaps with the separate working_tax_credit and + # child_tax_credit rows. Downstream budget adapters should select the + # row set they need rather than summing all program statistics. + "tax_credits": {"is_tax": False}, + "working_tax_credit": {"is_tax": False}, + "child_tax_credit": {"is_tax": False}, + "state_pension": {"is_tax": False}, +} class PolicyReformAnalysis(BaseModel): @@ -36,11 +61,78 @@ class PolicyReformAnalysis(BaseModel): reform_inequality: Inequality +def _format_missing_program_variables(missing_variables: set[str]) -> str | None: + """Format the optional missing-variable detail for program statistics.""" + return format_conditional_error_detail( + "Missing model variables", + missing_variables, + ) + + +def _uk_program_statistics_config_error_message( + missing_variables: set[str], + missing_outputs: set[tuple[str, str]], +) -> str: + lines = ["UK program statistics config is invalid:"] + + missing_variables_message = _format_missing_program_variables(missing_variables) + if missing_variables_message is not None: + lines.append(missing_variables_message) + + if missing_outputs: + formatted = ", ".join( + f"{program_name} on {entity}" + for program_name, entity in sorted(missing_outputs) + ) + lines.append("Variables not materialized in simulation outputs: " + formatted) + lines.append( + "Add them to the model version's entity_variables or pass them " + "via Simulation.extra_variables before running the simulation." + ) + + return "\n".join(lines) + + +def _validate_program_statistics_config( + baseline_simulation: Simulation, + reform_simulation: Simulation, +) -> None: + """Validate UK program-stat variables before running simulations.""" + missing_variables: set[str] = set() + missing_outputs: set[tuple[str, str]] = set() + + simulations = (baseline_simulation, reform_simulation) + for program_name in UK_PROGRAMS: + for simulation in simulations: + model_version = simulation.tax_benefit_model_version + try: + variable = model_version.get_variable(program_name) + except ValueError: + missing_variables.add(program_name) + continue + + resolved_variables = model_version.resolve_entity_variables(simulation) + if program_name not in resolved_variables.get(variable.entity, []): + missing_outputs.add((program_name, variable.entity)) + + if not missing_variables and not missing_outputs: + return + + raise ValueError( + _uk_program_statistics_config_error_message( + missing_variables, + missing_outputs, + ), + ) + + def economic_impact_analysis( baseline_simulation: Simulation, reform_simulation: Simulation, ) -> PolicyReformAnalysis: """Perform comprehensive analysis of a UK policy reform.""" + _validate_program_statistics_config(baseline_simulation, reform_simulation) + baseline_simulation.ensure() reform_simulation.ensure() @@ -56,29 +148,14 @@ def economic_impact_analysis( reform_simulation=reform_simulation, ) - programs = { - "income_tax": {"is_tax": True}, - "national_insurance": {"is_tax": True}, - "vat": {"is_tax": True}, - "council_tax": {"is_tax": True}, - "universal_credit": {"is_tax": False}, - "child_benefit": {"is_tax": False}, - "pension_credit": {"is_tax": False}, - "income_support": {"is_tax": False}, - "working_tax_credit": {"is_tax": False}, - "child_tax_credit": {"is_tax": False}, - } - + model_version = baseline_simulation.tax_benefit_model_version program_statistics = [] - for program_name, program_info in programs.items(): - entity = baseline_simulation.tax_benefit_model_version.get_variable( - program_name - ).entity + for program_name, program_info in UK_PROGRAMS.items(): stats = ProgramStatistics( baseline_simulation=baseline_simulation, reform_simulation=reform_simulation, program_name=program_name, - entity=entity, + entity=model_version.get_variable(program_name).entity, is_tax=program_info["is_tax"], ) stats.run() diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py index b0e4cceb..976d1339 100644 --- a/src/policyengine/tax_benefit_models/uk/model.py +++ b/src/policyengine/tax_benefit_models/uk/model.py @@ -69,9 +69,11 @@ class PolicyEngineUKLatest(MicrosimulationModelVersion): "income_support", "working_tax_credit", "child_tax_credit", + "state_pension", # Tax "income_tax", "national_insurance", + "ni_employer", ], "benunit": [ # IDs and weights @@ -84,6 +86,7 @@ class PolicyEngineUKLatest(MicrosimulationModelVersion): "child_benefit", "pension_credit", "income_support", + "tax_credits", "working_tax_credit", "child_tax_credit", ], @@ -104,6 +107,7 @@ class PolicyEngineUKLatest(MicrosimulationModelVersion): "household_benefits", "household_tax", "vat", + "fuel_duty", # Housing "rent", "council_tax", diff --git a/tests/fixtures/household_calculator_snapshots/uk_couple_two_kids.json b/tests/fixtures/household_calculator_snapshots/uk_couple_two_kids.json index 49302124..bede1c1b 100644 --- a/tests/fixtures/household_calculator_snapshots/uk_couple_two_kids.json +++ b/tests/fixtures/household_calculator_snapshots/uk_couple_two_kids.json @@ -6,10 +6,12 @@ "benunit.family_type": "COUPLE_WITH_CHILDREN", "benunit.income_support": 0.0, "benunit.pension_credit": 0.0, + "benunit.tax_credits": 0.0, "benunit.universal_credit": 0.0, "benunit.working_tax_credit": 0.0, "household.council_tax": 0.0, "household.equiv_hbai_household_net_income": 52503.68, + "household.fuel_duty": 0.0, "household.hbai_household_net_income": 73505.15, "household.household_benefits": 5880.35, "household.household_count_people": 4.0, @@ -44,6 +46,7 @@ "person[0].is_child": 0.0, "person[0].is_male": 1.0, "person[0].national_insurance": 3110.6, + "person[0].ni_employer": 7501.2, "person[0].pension_credit": 0.0, "person[0].pension_income": 0.0, "person[0].person_id": 0.0, @@ -52,6 +55,7 @@ "person[0].property_income": 0.0, "person[0].savings_interest_income": 0.0, "person[0].self_employment_income": 0.0, + "person[0].state_pension": 0.0, "person[0].total_income": 55000.0, "person[0].universal_credit": 0.0, "person[0].working_tax_credit": 0.0, @@ -71,6 +75,7 @@ "person[1].is_child": 0.0, "person[1].is_male": 1.0, "person[1].national_insurance": 1794.4, + "person[1].ni_employer": 4501.2, "person[1].pension_credit": 0.0, "person[1].pension_income": 0.0, "person[1].person_id": 0.0, @@ -79,6 +84,7 @@ "person[1].property_income": 0.0, "person[1].savings_interest_income": 0.0, "person[1].self_employment_income": 0.0, + "person[1].state_pension": 0.0, "person[1].total_income": 35000.0, "person[1].universal_credit": 0.0, "person[1].working_tax_credit": 0.0, @@ -98,6 +104,7 @@ "person[2].is_child": 1.0, "person[2].is_male": 1.0, "person[2].national_insurance": 0.0, + "person[2].ni_employer": 0.0, "person[2].pension_credit": 0.0, "person[2].pension_income": 0.0, "person[2].person_id": 0.0, @@ -106,6 +113,7 @@ "person[2].property_income": 0.0, "person[2].savings_interest_income": 0.0, "person[2].self_employment_income": 0.0, + "person[2].state_pension": 0.0, "person[2].total_income": 0.0, "person[2].universal_credit": 0.0, "person[2].working_tax_credit": 0.0, @@ -125,6 +133,7 @@ "person[3].is_child": 1.0, "person[3].is_male": 1.0, "person[3].national_insurance": 0.0, + "person[3].ni_employer": 0.0, "person[3].pension_credit": 0.0, "person[3].pension_income": 0.0, "person[3].person_id": 0.0, @@ -133,6 +142,7 @@ "person[3].property_income": 0.0, "person[3].savings_interest_income": 0.0, "person[3].self_employment_income": 0.0, + "person[3].state_pension": 0.0, "person[3].total_income": 0.0, "person[3].universal_credit": 0.0, "person[3].working_tax_credit": 0.0 diff --git a/tests/fixtures/household_calculator_snapshots/uk_single_adult_employment_income.json b/tests/fixtures/household_calculator_snapshots/uk_single_adult_employment_income.json index 5ec94094..27fa1f63 100644 --- a/tests/fixtures/household_calculator_snapshots/uk_single_adult_employment_income.json +++ b/tests/fixtures/household_calculator_snapshots/uk_single_adult_employment_income.json @@ -6,10 +6,12 @@ "benunit.family_type": "SINGLE", "benunit.income_support": 0.0, "benunit.pension_credit": 0.0, + "benunit.tax_credits": 0.0, "benunit.universal_credit": 0.0, "benunit.working_tax_credit": 0.0, "household.council_tax": 0.0, "household.equiv_hbai_household_net_income": 37491.94, + "household.fuel_duty": 0.0, "household.hbai_household_net_income": 25119.6, "household.household_benefits": 0.0, "household.household_count_people": 1.0, @@ -44,6 +46,7 @@ "person[0].is_child": 0.0, "person[0].is_male": 1.0, "person[0].national_insurance": 1394.4, + "person[0].ni_employer": 3751.2, "person[0].pension_credit": 0.0, "person[0].pension_income": 0.0, "person[0].person_id": 0.0, @@ -52,6 +55,7 @@ "person[0].property_income": 0.0, "person[0].savings_interest_income": 0.0, "person[0].self_employment_income": 0.0, + "person[0].state_pension": 0.0, "person[0].total_income": 30000.0, "person[0].universal_credit": 0.0, "person[0].working_tax_credit": 0.0 diff --git a/tests/fixtures/household_calculator_snapshots/uk_single_adult_no_income.json b/tests/fixtures/household_calculator_snapshots/uk_single_adult_no_income.json index 59657e2c..2e7a2db9 100644 --- a/tests/fixtures/household_calculator_snapshots/uk_single_adult_no_income.json +++ b/tests/fixtures/household_calculator_snapshots/uk_single_adult_no_income.json @@ -6,10 +6,12 @@ "benunit.family_type": "SINGLE", "benunit.income_support": 0.0, "benunit.pension_credit": 0.0, + "benunit.tax_credits": 0.0, "benunit.universal_credit": 5079.13, "benunit.working_tax_credit": 0.0, "household.council_tax": 0.0, "household.equiv_hbai_household_net_income": 7580.79, + "household.fuel_duty": 0.0, "household.hbai_household_net_income": 5079.13, "household.household_benefits": 5079.13, "household.household_count_people": 1.0, @@ -44,6 +46,7 @@ "person[0].is_child": 0.0, "person[0].is_male": 1.0, "person[0].national_insurance": 0.0, + "person[0].ni_employer": 0.0, "person[0].pension_credit": 0.0, "person[0].pension_income": 0.0, "person[0].person_id": 0.0, @@ -52,6 +55,7 @@ "person[0].property_income": 0.0, "person[0].savings_interest_income": 0.0, "person[0].self_employment_income": 0.0, + "person[0].state_pension": 0.0, "person[0].total_income": 0.0, "person[0].universal_credit": 5079.13, "person[0].working_tax_credit": 0.0 diff --git a/tests/fixtures/household_calculator_snapshots/uk_single_parent_one_child.json b/tests/fixtures/household_calculator_snapshots/uk_single_parent_one_child.json index 06e55db0..4b5589c0 100644 --- a/tests/fixtures/household_calculator_snapshots/uk_single_parent_one_child.json +++ b/tests/fixtures/household_calculator_snapshots/uk_single_parent_one_child.json @@ -6,10 +6,12 @@ "benunit.family_type": "LONE_PARENT", "benunit.income_support": 0.0, "benunit.pension_credit": 0.0, + "benunit.tax_credits": 0.0, "benunit.universal_credit": 1544.43, "benunit.working_tax_credit": 0.0, "household.council_tax": 0.0, "household.equiv_hbai_household_net_income": 28120.33, + "household.fuel_duty": 0.0, "household.hbai_household_net_income": 24464.69, "household.household_benefits": 2945.09, "household.household_count_people": 2.0, @@ -44,6 +46,7 @@ "person[0].is_child": 0.0, "person[0].is_male": 1.0, "person[0].national_insurance": 994.4, + "person[0].ni_employer": 3001.2, "person[0].pension_credit": 0.0, "person[0].pension_income": 0.0, "person[0].person_id": 0.0, @@ -52,6 +55,7 @@ "person[0].property_income": 0.0, "person[0].savings_interest_income": 0.0, "person[0].self_employment_income": 0.0, + "person[0].state_pension": 0.0, "person[0].total_income": 25000.0, "person[0].universal_credit": 1544.43, "person[0].working_tax_credit": 0.0, @@ -71,6 +75,7 @@ "person[1].is_child": 1.0, "person[1].is_male": 1.0, "person[1].national_insurance": 0.0, + "person[1].ni_employer": 0.0, "person[1].pension_credit": 0.0, "person[1].pension_income": 0.0, "person[1].person_id": 0.0, @@ -79,6 +84,7 @@ "person[1].property_income": 0.0, "person[1].savings_interest_income": 0.0, "person[1].self_employment_income": 0.0, + "person[1].state_pension": 0.0, "person[1].total_income": 0.0, "person[1].universal_credit": 1544.43, "person[1].working_tax_credit": 0.0 diff --git a/tests/test_uk_program_statistics.py b/tests/test_uk_program_statistics.py new file mode 100644 index 00000000..9dd69313 --- /dev/null +++ b/tests/test_uk_program_statistics.py @@ -0,0 +1,240 @@ +import pandas as pd +import pytest +from microdf import MicroDataFrame + +from policyengine.core import OutputCollection, Simulation +from policyengine.outputs import ProgramStatistics +from policyengine.outputs.inequality import Inequality +from policyengine.tax_benefit_models.uk import analysis as uk_analysis +from policyengine.tax_benefit_models.uk.analysis import ( + UK_PROGRAMS, + _validate_program_statistics_config, +) +from policyengine.tax_benefit_models.uk.datasets import ( + PolicyEngineUKDataset, + UKYearData, +) +from policyengine.tax_benefit_models.uk.model import uk_latest + + +def _microdf(data: dict, weights: str) -> MicroDataFrame: + return MicroDataFrame(pd.DataFrame(data), weights=weights) + + +PROGRAM_VALUES = { + "income_tax": [100.0, 0.0], + "national_insurance": [0.0, 200.0], + "vat": [300.0, 0.0], + "council_tax": [0.0, 400.0], + "fuel_duty": [500.0, 0.0], + "ni_employer": [0.0, 600.0], + "universal_credit": [700.0, 0.0], + "child_benefit": [0.0, 800.0], + "pension_credit": [900.0, 0.0], + "income_support": [0.0, 1_000.0], + "tax_credits": [1_100.0, 0.0], + "working_tax_credit": [0.0, 1_200.0], + "child_tax_credit": [1_300.0, 0.0], + "state_pension": [0.0, 1_400.0], +} + + +def _weighted_values( + values: list[float], + multiplier: float, + row_count: int, +) -> list[float]: + return [values[index % len(values)] * multiplier for index in range(row_count)] + + +def _weights(row_count: int) -> list[float]: + if row_count == 2: + return [1.0, 2.0] + return [1.0] * row_count + + +def _program_columns_by_entity( + multiplier: float, + row_count: int, +) -> dict[str, dict[str, list[float]]]: + program_columns_by_entity = {entity: {} for entity in uk_latest.entity_variables} + for program, values in PROGRAM_VALUES.items(): + entity = uk_latest.get_variable(program).entity + program_columns_by_entity[entity][program] = _weighted_values( + values, + multiplier, + row_count, + ) + return program_columns_by_entity + + +def _make_uk_output_simulation( + tmp_path, + simulation_id: str, + multiplier: float, + row_count: int = 2, +): + ids = list(range(1, row_count + 1)) + weights = _weights(row_count) + program_columns_by_entity = _program_columns_by_entity(multiplier, row_count) + data = UKYearData( + person=_microdf( + { + "person_id": ids, + "benunit_id": ids, + "household_id": ids, + "person_weight": weights, + **program_columns_by_entity["person"], + }, + "person_weight", + ), + benunit=_microdf( + { + "benunit_id": ids, + "benunit_weight": weights, + **program_columns_by_entity["benunit"], + }, + "benunit_weight", + ), + household=_microdf( + { + "household_id": ids, + "household_weight": weights, + **program_columns_by_entity["household"], + }, + "household_weight", + ), + ) + dataset = PolicyEngineUKDataset( + id=simulation_id, + name=f"{simulation_id} output", + description="Mocked UK output dataset for program statistics", + filepath=str(tmp_path / f"{simulation_id}.h5"), + year=2026, + is_output_dataset=True, + data=data, + ) + return Simulation( + id=simulation_id, + dataset=dataset, + tax_benefit_model_version=uk_latest, + output_dataset=dataset, + ) + + +def test_uk_program_statistics_config_runs_against_mocked_outputs(tmp_path): + baseline = _make_uk_output_simulation(tmp_path, "baseline", 1.0) + reform = _make_uk_output_simulation(tmp_path, "reform", 2.0) + + _validate_program_statistics_config(baseline, reform) + + model_version = baseline.tax_benefit_model_version + results = {} + for program_name, program_info in UK_PROGRAMS.items(): + stats = ProgramStatistics( + baseline_simulation=baseline, + reform_simulation=reform, + program_name=program_name, + entity=model_version.get_variable(program_name).entity, + is_tax=program_info["is_tax"], + ) + stats.run() + results[program_name] = stats + + assert set(results) == set(UK_PROGRAMS) + assert results["fuel_duty"].baseline_total == 500.0 + assert results["state_pension"].baseline_total == 2_800.0 + assert results["ni_employer"].baseline_total == 1_200.0 + assert results["tax_credits"].baseline_total == 1_100.0 + assert results["fuel_duty"].is_tax is True + assert results["ni_employer"].is_tax is True + assert results["state_pension"].is_tax is False + assert results["tax_credits"].is_tax is False + + +def test_uk_economic_impact_analysis_returns_configured_program_statistics( + tmp_path, + monkeypatch, +): + baseline = _make_uk_output_simulation( + tmp_path, + "baseline", + 1.0, + row_count=101, + ) + reform = _make_uk_output_simulation( + tmp_path, + "reform", + 2.0, + row_count=101, + ) + + monkeypatch.setattr(Simulation, "ensure", lambda self: None) + monkeypatch.setattr( + uk_analysis, + "calculate_decile_impacts", + lambda **kwargs: OutputCollection(outputs=[], dataframe=pd.DataFrame()), + ) + monkeypatch.setattr( + uk_analysis, + "calculate_uk_poverty_rates", + lambda simulation: OutputCollection(outputs=[], dataframe=pd.DataFrame()), + ) + monkeypatch.setattr( + uk_analysis, + "calculate_uk_inequality", + lambda simulation: Inequality( + simulation=simulation, + income_variable="household_net_income", + ), + ) + + result = uk_analysis.economic_impact_analysis(baseline, reform) + + program_names = { + program.program_name for program in result.program_statistics.outputs + } + assert program_names == set(UK_PROGRAMS) + assert set(result.program_statistics.dataframe["program_name"]) == set(UK_PROGRAMS) + + +def test_uk_program_statistics_config_fails_before_simulation_run( + tmp_path, monkeypatch +): + baseline = _make_uk_output_simulation(tmp_path, "baseline", 1.0) + reform = _make_uk_output_simulation(tmp_path, "reform", 2.0) + + entity_variables = { + entity: list(variables) + for entity, variables in uk_latest.entity_variables.items() + } + entity_variables["household"].remove("fuel_duty") + monkeypatch.setattr( + baseline.tax_benefit_model_version, + "entity_variables", + entity_variables, + ) + + with pytest.raises( + ValueError, match="UK program statistics config is invalid" + ) as exc_info: + _validate_program_statistics_config(baseline, reform) + + assert "fuel_duty" in str(exc_info.value) + + +def test_uk_programs_entities_match_model_metadata(): + expected_entities = { + "fuel_duty": "household", + "state_pension": "person", + "ni_employer": "person", + "tax_credits": "benunit", + } + + for program_name in UK_PROGRAMS: + assert program_name in uk_latest.variables_by_name, ( + f"{program_name} is not defined in the UK model" + ) + + for program_name, entity in expected_entities.items(): + assert uk_latest.get_variable(program_name).entity == entity