From 77076e27cdda532e8a83c272bc3a27dfea44d5ec Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 14 Mar 2026 06:52:15 -0700 Subject: [PATCH 1/2] Fix SOI dtype error and ensure non-negative retirement/SS values 1. soi.py: Cast SOI "Value" column to float before uprating multiplication, fixing TypeError on newer pandas that reject float-into-int64 assignment. 2. extended_cps.py: Add defensive np.maximum(pred_values, 0) clip in _splice_cps_only_predictions for retirement contribution and SS sub-component variables, preventing negative values from surviving entity-level aggregation. Co-Authored-By: Claude Opus 4.6 --- changelog.d/fix-soi-dtype-and-se-pension.fixed.md | 1 + policyengine_us_data/datasets/cps/extended_cps.py | 3 +++ policyengine_us_data/utils/soi.py | 1 + 3 files changed, 5 insertions(+) create mode 100644 changelog.d/fix-soi-dtype-and-se-pension.fixed.md diff --git a/changelog.d/fix-soi-dtype-and-se-pension.fixed.md b/changelog.d/fix-soi-dtype-and-se-pension.fixed.md new file mode 100644 index 00000000..feb96d4c --- /dev/null +++ b/changelog.d/fix-soi-dtype-and-se-pension.fixed.md @@ -0,0 +1 @@ +Fix SOI uprating dtype error on newer pandas and add defensive non-negativity clip for retirement/SS variables in splice step. diff --git a/policyengine_us_data/datasets/cps/extended_cps.py b/policyengine_us_data/datasets/cps/extended_cps.py index f38d5746..f0b4b82c 100644 --- a/policyengine_us_data/datasets/cps/extended_cps.py +++ b/policyengine_us_data/datasets/cps/extended_cps.py @@ -380,6 +380,9 @@ def _splice_cps_only_predictions( values = data[var][time_period] # First half: keep original CPS values. # Second half: replace with QRF predictions. + # Defensive clip: contributions and benefits cannot be negative. + if var in _RETIREMENT_VARS or var in _SS_SUBCOMPONENT_VARS: + pred_values = np.maximum(pred_values, 0) cps_half = values[:n_half] new_values = np.concatenate([cps_half, pred_values]) data[var] = {time_period: new_values} diff --git a/policyengine_us_data/utils/soi.py b/policyengine_us_data/utils/soi.py index b9755c30..997a8078 100644 --- a/policyengine_us_data/utils/soi.py +++ b/policyengine_us_data/utils/soi.py @@ -164,6 +164,7 @@ def get_soi(year: int) -> pd.DataFrame: } soi = pd.read_csv(CALIBRATION_FOLDER / "soi_targets.csv") soi = soi[soi.Year == soi.Year.max()] + soi["Value"] = soi["Value"].astype(float) uprating_factors = {} for variable in uprating_map: From 518c6e637ee572be08b6ced18cc42c88a19c027f Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 14 Mar 2026 06:56:15 -0700 Subject: [PATCH 2/2] =?UTF-8?q?Remove=20defensive=20clip=20=E2=80=94=20app?= =?UTF-8?q?ly=5Fretirement=5Fconstraints=20already=20handles=20this?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- policyengine_us_data/datasets/cps/extended_cps.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/policyengine_us_data/datasets/cps/extended_cps.py b/policyengine_us_data/datasets/cps/extended_cps.py index f0b4b82c..f38d5746 100644 --- a/policyengine_us_data/datasets/cps/extended_cps.py +++ b/policyengine_us_data/datasets/cps/extended_cps.py @@ -380,9 +380,6 @@ def _splice_cps_only_predictions( values = data[var][time_period] # First half: keep original CPS values. # Second half: replace with QRF predictions. - # Defensive clip: contributions and benefits cannot be negative. - if var in _RETIREMENT_VARS or var in _SS_SUBCOMPONENT_VARS: - pred_values = np.maximum(pred_values, 0) cps_half = values[:n_half] new_values = np.concatenate([cps_half, pred_values]) data[var] = {time_period: new_values}