Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
a9c3781
feat: Add default_value and value_type to Variable class
anth-volk Feb 6, 2026
748983a
test: Add Variable default_value integration tests
anth-volk Feb 7, 2026
34573ad
ci: Remove base branch filter from PR workflow
anth-volk Feb 16, 2026
e1fcac5
feat: Region filtering and calculations
anth-volk Feb 10, 2026
bf2021e
style: Fix linting and formatting issues
anth-volk Feb 10, 2026
0c4cd4b
fix: Add conftest.py for pytest fixture discovery
anth-volk Feb 10, 2026
3b93473
feat: Add regional dataset filtering support
anth-volk Feb 10, 2026
a193878
test: Add unit tests for dataset filtering functionality
anth-volk Feb 10, 2026
4dcb073
fix: Apply US reforms at Microsimulation construction time
anth-volk Feb 11, 2026
242341d
fix: Fix ruff lint errors
anth-volk Feb 16, 2026
05adf13
refactor: Extract shared entity utilities and decompose reform building
anth-volk Feb 16, 2026
2376353
feat: Add poverty_type field to Poverty output class
anth-volk Feb 19, 2026
a9f1908
feat: Add poverty by age group computation functions
anth-volk Feb 19, 2026
876cdf8
feat: Add gender poverty computation functions
anth-volk Feb 19, 2026
e1d3a6b
feat: Add racial poverty computation function (US only)
anth-volk Feb 19, 2026
64c7c99
feat: Add household_state_income_tax to US tax_unit entity_variables
anth-volk Feb 20, 2026
6823285
feat: Add household_income_decile and household_count_people to entit…
anth-volk Feb 20, 2026
646e594
test: Add tests for poverty-by-demographics convenience functions
anth-volk Feb 20, 2026
56510eb
style: Apply ruff and black formatting to test files
anth-volk Feb 20, 2026
cc426f6
feat: Add congressional district impact output class
anth-volk Feb 20, 2026
56d9261
feat: Add UK constituency impact output class
anth-volk Feb 20, 2026
552b4da
feat: Add UK local authority impact output class
anth-volk Feb 20, 2026
23d6846
feat: Add IntraDecileImpact output class and wealth decile support
anth-volk Feb 20, 2026
820c97e
test: Add unit tests for Phase 3 output classes
anth-volk Feb 20, 2026
120c7ab
style: Fix lint — sort imports, remove unused imports
anth-volk Feb 20, 2026
14b407c
fix: Address PR review findings — error handling, validation, exports
anth-volk Mar 3, 2026
60f7fdb
style: Fix lint — sort imports, remove IOError alias
anth-volk Mar 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .github/workflows/pr_code_changes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
name: Code changes
on:
pull_request:
branches:
- main

paths:
- src/**
- tests/**
Expand Down
3 changes: 3 additions & 0 deletions src/policyengine/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from .parameter import Parameter as Parameter
from .parameter_value import ParameterValue as ParameterValue
from .policy import Policy as Policy
from .region import Region as Region
from .region import RegionRegistry as RegionRegistry
from .region import RegionType as RegionType
from .simulation import Simulation as Simulation
from .tax_benefit_model import TaxBenefitModel as TaxBenefitModel
from .tax_benefit_model_version import (
Expand Down
210 changes: 210 additions & 0 deletions src/policyengine/core/region.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
"""Region definitions for geographic simulations.

This module provides the Region and RegionRegistry classes for defining
geographic regions that a tax-benefit model supports. Regions can have:
1. A dedicated dataset (e.g., US states, congressional districts)
2. Filter from a parent region's dataset (e.g., US places/cities, UK countries)
"""

from typing import Literal

from pydantic import BaseModel, Field, PrivateAttr

# Region type literals for US and UK
USRegionType = Literal["national", "state", "congressional_district", "place"]
UKRegionType = Literal[
"national", "country", "constituency", "local_authority"
]
RegionType = USRegionType | UKRegionType


class Region(BaseModel):
"""Geographic region for tax-benefit simulations.

Regions can either have:
1. A dedicated dataset (dataset_path is set, requires_filter is False)
2. Filter from a parent region's dataset (requires_filter is True)

The unique identifier is the code field, which uses a prefixed format:
- National: "us", "uk"
- State: "state/ca", "state/ny"
- Congressional District: "congressional_district/CA-01"
- Place: "place/NJ-57000"
- UK Country: "country/england"
- Constituency: "constituency/Sheffield Central"
- Local Authority: "local_authority/E09000001"
"""

# Core identification
code: str = Field(
...,
description="Unique region code with type prefix (e.g., 'state/ca', 'place/NJ-57000')",
)
label: str = Field(
..., description="Human-readable label (e.g., 'California')"
)
region_type: RegionType = Field(
..., description="Type of region (e.g., 'state', 'place')"
)

# Hierarchy
parent_code: str | None = Field(
default=None,
description="Code of parent region (e.g., 'us' for states, 'state/nj' for places in New Jersey)",
)

# Dataset configuration
dataset_path: str | None = Field(
default=None,
description="GCS path to dedicated dataset (e.g., 'gs://policyengine-us-data/states/CA.h5')",
)

# Filtering configuration (for regions that filter from parent datasets)
requires_filter: bool = Field(
default=False,
description="True if this region filters from a parent dataset rather than having its own",
)
filter_field: str | None = Field(
default=None,
description="Dataset field to filter on (e.g., 'place_fips', 'country')",
)
filter_value: str | None = Field(
default=None,
description="Value to match when filtering (defaults to code suffix if not set)",
)

# Metadata (primarily for US congressional districts)
state_code: str | None = Field(
default=None, description="Two-letter state code (e.g., 'CA', 'NJ')"
)
state_name: str | None = Field(
default=None,
description="Full state name (e.g., 'California', 'New Jersey')",
)

def __hash__(self) -> int:
"""Hash by code for use in sets and dict keys."""
return hash(self.code)

def __eq__(self, other: object) -> bool:
"""Equality by code."""
if not isinstance(other, Region):
return False
return self.code == other.code


class RegionRegistry(BaseModel):
"""Registry of all regions for a country model.

Provides indexed lookups for regions by code and type.
Indices are rebuilt automatically after initialization.
"""

country_id: str = Field(
..., description="Country identifier (e.g., 'us', 'uk')"
)
regions: list[Region] = Field(default_factory=list)

# Private indexed lookups (excluded from serialization)
_by_code: dict[str, Region] = PrivateAttr(default_factory=dict)
_by_type: dict[str, list[Region]] = PrivateAttr(default_factory=dict)

def model_post_init(self, __context: object) -> None:
"""Build lookup indices after initialization."""
self._rebuild_indices()

def _rebuild_indices(self) -> None:
"""Rebuild all lookup indices from the regions list."""
self._by_code = {}
self._by_type = {}

for region in self.regions:
# Index by code
self._by_code[region.code] = region

# Index by type
if region.region_type not in self._by_type:
self._by_type[region.region_type] = []
self._by_type[region.region_type].append(region)

def add_region(self, region: Region) -> None:
"""Add a region to the registry and update indices."""
self.regions.append(region)
self._by_code[region.code] = region
if region.region_type not in self._by_type:
self._by_type[region.region_type] = []
self._by_type[region.region_type].append(region)

def get(self, code: str) -> Region | None:
"""Get a region by its code.

Args:
code: Region code (e.g., 'state/ca', 'place/NJ-57000')

Returns:
The Region if found, None otherwise
"""
return self._by_code.get(code)

def get_by_type(self, region_type: str) -> list[Region]:
"""Get all regions of a given type.

Args:
region_type: Type to filter by (e.g., 'state', 'place')

Returns:
List of regions with the given type
"""
return self._by_type.get(region_type, [])

def get_national(self) -> Region | None:
"""Get the national-level region.

Returns:
The national Region if found, None otherwise
"""
national = self.get_by_type("national")
return national[0] if national else None

def get_children(self, parent_code: str) -> list[Region]:
"""Get all regions with a given parent code.

Args:
parent_code: Parent region code to filter by

Returns:
List of regions with the given parent
"""
return [r for r in self.regions if r.parent_code == parent_code]

def get_dataset_regions(self) -> list[Region]:
"""Get all regions that have dedicated datasets.

Returns:
List of regions with dataset_path set and requires_filter False
"""
return [
r
for r in self.regions
if r.dataset_path is not None and not r.requires_filter
]

def get_filter_regions(self) -> list[Region]:
"""Get all regions that require filtering from parent datasets.

Returns:
List of regions with requires_filter True
"""
return [r for r in self.regions if r.requires_filter]

def __len__(self) -> int:
"""Return the number of regions in the registry."""
return len(self.regions)

def __iter__(self):
"""Iterate over regions."""
return iter(self.regions)

def __contains__(self, code: str) -> bool:
"""Check if a region code exists in the registry."""
return code in self._by_code
21 changes: 21 additions & 0 deletions src/policyengine/core/simulation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from datetime import datetime
from uuid import uuid4

Expand All @@ -9,6 +10,8 @@
from .policy import Policy
from .tax_benefit_model_version import TaxBenefitModelVersion

logger = logging.getLogger(__name__)

_cache: LRUCache["Simulation"] = LRUCache(max_size=100)


Expand All @@ -21,6 +24,16 @@ class Simulation(BaseModel):
dynamic: Dynamic | None = None
dataset: Dataset = None

# Regional filtering parameters
filter_field: str | None = Field(
default=None,
description="Household-level variable to filter dataset by (e.g., 'place_fips', 'country')",
)
filter_value: str | None = Field(
default=None,
description="Value to match when filtering (e.g., '44000', 'ENGLAND')",
)

tax_benefit_model_version: TaxBenefitModelVersion = None
output_dataset: Dataset | None = None

Expand All @@ -34,7 +47,15 @@ def ensure(self):
return
try:
self.tax_benefit_model_version.load(self)
except FileNotFoundError:
self.run()
self.save()
except Exception:
logger.warning(
"Unexpected error loading simulation %s; falling back to run()",
self.id,
exc_info=True,
)
self.run()
self.save()

Expand Down
19 changes: 19 additions & 0 deletions src/policyengine/core/tax_benefit_model_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
if TYPE_CHECKING:
from .parameter import Parameter
from .parameter_value import ParameterValue
from .region import Region, RegionRegistry
from .simulation import Simulation
from .variable import Variable

Expand All @@ -25,6 +26,11 @@ class TaxBenefitModelVersion(BaseModel):
variables: list["Variable"] = Field(default_factory=list)
parameters: list["Parameter"] = Field(default_factory=list)

# Region registry for geographic simulations
region_registry: "RegionRegistry | None" = Field(
default=None, description="Registry of supported geographic regions"
)

@property
def parameter_values(self) -> list["ParameterValue"]:
"""Aggregate all parameter values from all parameters."""
Expand Down Expand Up @@ -83,6 +89,19 @@ def get_variable(self, name: str) -> "Variable":
f"Variable '{name}' not found in {self.model.id} version {self.version}"
)

def get_region(self, code: str) -> "Region | None":
"""Get a region by its code.

Args:
code: Region code (e.g., 'state/ca', 'place/NJ-57000')

Returns:
The Region if found, None if not found or no region registry
"""
if self.region_registry is None:
return None
return self.region_registry.get(code)

def __repr__(self) -> str:
# Give the id and version, and the number of variables, parameters, parameter values
return f"<TaxBenefitModelVersion id={self.id} variables={len(self.variables)} parameters={len(self.parameters)} parameter_values={len(self.parameter_values)}>"
2 changes: 2 additions & 0 deletions src/policyengine/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ class Variable(BaseModel):
description: str | None = None
data_type: type = None
possible_values: list[Any] | None = None
default_value: Any = None
value_type: type | None = None
9 changes: 9 additions & 0 deletions src/policyengine/countries/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""Country-specific region definitions.

This package contains region registries for each supported country.
"""

from .uk.regions import uk_region_registry
from .us.regions import us_region_registry

__all__ = ["us_region_registry", "uk_region_registry"]
5 changes: 5 additions & 0 deletions src/policyengine/countries/uk/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""UK country-specific region definitions."""

from .regions import uk_region_registry

__all__ = ["uk_region_registry"]
Loading