From c89a08867aaf5f4e57d1467e2059fda145b34464 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Tue, 10 Mar 2026 16:02:20 -0600 Subject: [PATCH] Upgrade breakdown enum mismatch from WARNING to ERROR Parameters with keys not in the breakdown variable's possible values now raise ValueError instead of silently logging a warning. This prevents data loss when parameter YAML files use keys that don't match the breakdown enum (e.g., using snap_utility_region keys with a state_code breakdown). Partial coverage (enum values missing from YAML) remains allowed. Closes #444 Co-Authored-By: Claude Opus 4.6 --- .../fix-breakdown-mismatch-error.changed.md | 1 + .../operations/homogenize_parameters.py | 27 ++++-- .../parameters/operations/test_nesting.py | 97 +++++++++++++++++++ 3 files changed, 115 insertions(+), 10 deletions(-) create mode 100644 changelog.d/fix-breakdown-mismatch-error.changed.md diff --git a/changelog.d/fix-breakdown-mismatch-error.changed.md b/changelog.d/fix-breakdown-mismatch-error.changed.md new file mode 100644 index 00000000..2d9a8a0b --- /dev/null +++ b/changelog.d/fix-breakdown-mismatch-error.changed.md @@ -0,0 +1 @@ +Upgrade breakdown enum mismatch from WARNING to ValueError for early detection of parameter key errors. diff --git a/policyengine_core/parameters/operations/homogenize_parameters.py b/policyengine_core/parameters/operations/homogenize_parameters.py index 6d2e830a..04888300 100644 --- a/policyengine_core/parameters/operations/homogenize_parameters.py +++ b/policyengine_core/parameters/operations/homogenize_parameters.py @@ -96,17 +96,24 @@ def homogenize_parameter_node( {"0000-01-01": default_value, "2040-01-01": default_value}, ), ) + possible_values_str = {str(v) for v in possible_values} + extra_children = [] + for child in node.children: + child_key = child.split(".")[-1] + if ( + child_key not in possible_values_str + and str(child_key) not in possible_values_str + ): + extra_children.append(child_key) + if extra_children: + raise ValueError( + f"Parameter {node.name} has children {extra_children} " + f"that are not in the possible values of the breakdown " + f"variable '{first_breakdown}'. Check that the breakdown " + f"metadata references the correct variable and that all " + f"parameter keys are valid enum values." + ) for child in node.children: - if child.split(".")[-1] not in possible_values: - try: - int(child) - is_int = True - except: - is_int = False - if not is_int or str(child) not in node.children: - logging.warning( - f"Parameter {node.name} has a child {child} that is not in the possible values of {first_breakdown}, ignoring." - ) if further_breakdown: node.children[child] = homogenize_parameter_node( node.children[child], breakdown[1:], variables, default_value diff --git a/tests/core/parameters/operations/test_nesting.py b/tests/core/parameters/operations/test_nesting.py index 8a1090ac..97d3dcdc 100644 --- a/tests/core/parameters/operations/test_nesting.py +++ b/tests/core/parameters/operations/test_nesting.py @@ -1,3 +1,6 @@ +import pytest + + def test_parameter_homogenization(): import numpy as np @@ -90,3 +93,97 @@ class family_size(Variable): ] == [1, 0, 0] ).all() + + +def test_breakdown_mismatch_raises_error(): + """Extra parameter keys not in the breakdown enum should raise ValueError.""" + from policyengine_core.entities import Entity + from policyengine_core.model_api import ETERNITY, Enum, Variable + from policyengine_core.parameters import ( + ParameterNode, + homogenize_parameter_structures, + ) + from policyengine_core.taxbenefitsystems import TaxBenefitSystem + + Person = Entity("person", "people", "Person", "A person") + + class Color(Enum): + RED = "Red" + BLUE = "Blue" + + class color(Variable): + value_type = Enum + entity = Person + definition_period = ETERNITY + possible_values = Color + default_value = Color.RED + label = "color" + + # "GREEN" is not in the Color enum — should raise ValueError + root = ParameterNode( + data={ + "value_by_color": { + "RED": {"2021-01-01": 1}, + "GREEN": {"2021-01-01": 2}, + "metadata": { + "breakdown": ["color"], + }, + } + } + ) + + system = TaxBenefitSystem([Person]) + system.add_variables(color) + system.parameters = root + + with pytest.raises(ValueError, match="GREEN"): + homogenize_parameter_structures( + system.parameters, system.variables, default_value=0 + ) + + +def test_breakdown_partial_coverage_is_ok(): + """Missing enum values in parameter YAML should NOT raise an error.""" + from policyengine_core.entities import Entity + from policyengine_core.model_api import ETERNITY, Enum, Variable + from policyengine_core.parameters import ( + ParameterNode, + homogenize_parameter_structures, + ) + from policyengine_core.taxbenefitsystems import TaxBenefitSystem + + Person = Entity("person", "people", "Person", "A person") + + class Color(Enum): + RED = "Red" + BLUE = "Blue" + GREEN = "Green" + + class color(Variable): + value_type = Enum + entity = Person + definition_period = ETERNITY + possible_values = Color + default_value = Color.RED + label = "color" + + # Only RED is present — BLUE and GREEN are missing but that's fine + root = ParameterNode( + data={ + "value_by_color": { + "RED": {"2021-01-01": 1}, + "metadata": { + "breakdown": ["color"], + }, + } + } + ) + + system = TaxBenefitSystem([Person]) + system.add_variables(color) + system.parameters = root + + # Should not raise — partial coverage is allowed + homogenize_parameter_structures( + system.parameters, system.variables, default_value=0 + )