From f32b8056a5de433c848e2fa7652e918a6dc2f8d9 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Sat, 16 Apr 2022 10:33:09 +0100
Subject: [PATCH 01/27] Add RawACS

---
 openfisca_us/__init__.py                   |  1 +
 openfisca_us/data/__init__.py              |  2 +-
 openfisca_us/data/datasets/__init__.py     |  5 +-
 openfisca_us/data/datasets/acs/__init__.py |  2 +
 openfisca_us/data/datasets/acs/acs.py      | 82 +++++++++++++++++++
 openfisca_us/data/datasets/acs/raw_acs.py  | 92 ++++++++++++++++++++++
 6 files changed, 181 insertions(+), 3 deletions(-)
 create mode 100644 openfisca_us/data/datasets/acs/__init__.py
 create mode 100644 openfisca_us/data/datasets/acs/acs.py
 create mode 100644 openfisca_us/data/datasets/acs/raw_acs.py

diff --git a/openfisca_us/__init__.py b/openfisca_us/__init__.py
index c50e1db6b1c..7c638e14845 100644
--- a/openfisca_us/__init__.py
+++ b/openfisca_us/__init__.py
@@ -9,4 +9,5 @@
 """
 from openfisca_us.system import CountryTaxBenefitSystem
 from openfisca_us.api import Microsimulation, IndividualSim
+from openfisca_us.data import ACS, CPS
 from openfisca_us import reforms
diff --git a/openfisca_us/data/__init__.py b/openfisca_us/data/__init__.py
index 34604780c73..44252f109ee 100644
--- a/openfisca_us/data/__init__.py
+++ b/openfisca_us/data/__init__.py
@@ -1 +1 @@
-from openfisca_us.data.datasets import CPS, RawCPS
+from openfisca_us.data.datasets import CPS, RawCPS, ACS, RawACS
diff --git a/openfisca_us/data/datasets/__init__.py b/openfisca_us/data/datasets/__init__.py
index cfef16c0268..030227a241e 100644
--- a/openfisca_us/data/datasets/__init__.py
+++ b/openfisca_us/data/datasets/__init__.py
@@ -1,3 +1,4 @@
-from openfisca_us.data.datasets.cps import CPS, RawCPS
+from .cps import CPS, RawCPS
+from .acs import ACS, RawACS
 
-DATASETS = [CPS, RawCPS]
+DATASETS = [CPS, RawCPS, ACS, RawACS]
diff --git a/openfisca_us/data/datasets/acs/__init__.py b/openfisca_us/data/datasets/acs/__init__.py
new file mode 100644
index 00000000000..6ed1f62bcf5
--- /dev/null
+++ b/openfisca_us/data/datasets/acs/__init__.py
@@ -0,0 +1,2 @@
+from openfisca_us_data.datasets.acs.raw_acs import RawACS
+from openfisca_us_data.datasets.acs.acs import ACS
diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py
new file mode 100644
index 00000000000..169ab3eeea2
--- /dev/null
+++ b/openfisca_us/data/datasets/acs/acs.py
@@ -0,0 +1,82 @@
+import logging
+from openfisca_tools.data import PublicDataset
+import h5py
+from openfisca_us.data.datasets.acs.raw_acs import RawACS
+from openfisca_us.data.datasets.cps.raw_cps import RawCPS
+from openfisca_us.data.storage import OPENFISCA_US_MICRODATA_FOLDER
+from pandas import DataFrame, Series
+import numpy as np
+
+
+class ACS(PublicDataset):
+    name = "acs"
+    is_openfisca_compatible = True
+
+    def generate(self, year: int) -> None:
+        """Generates the ACS dataset.
+
+        Args:
+            year (int): The year of the raw ACS to use.
+        """
+
+        # Prepare raw ACS tables
+        year = int(year)
+        if year in self.years:
+            self.remove(year)
+        if year not in RawACS.years:
+            RawACS.generate(year)
+
+        raw_data = RawACS.load(year)
+        acs = h5py.File(ACS.file(year), mode="w")
+
+        person, spm_unit, household = [
+            raw_data[entity] for entity in ("person", "spm_unit", "household")
+        ]
+
+        add_ID_variables(acs, person, spm_unit, household)
+        add_SPM_variables(acs, spm_unit)
+
+        raw_data.close()
+        acs.close()
+
+ACS = ACS()
+
+
+def add_ID_variables(
+    acs: h5py.File,
+    person: DataFrame,
+    spm_unit: DataFrame,
+    household: DataFrame,
+):
+    """Add basic ID and weight variables.
+
+    Args:
+        acs (h5py.File): The ACS dataset file.
+        person (DataFrame): The person table of the ACS.
+        spm_unit (DataFrame): The SPM unit table created from the person table
+            of the ACS.
+        household (DataFrame): The household table of the ACS.
+    """
+    # Add primary and foreign keys
+    acs["person_id"] = person.SERIALNO * 1e2 + person.SPORDER
+    acs["person_spm_unit_id"] = person.SPM_ID
+    acs["spm_unit_id"] = spm_unit.SPM_ID
+    # ACS doesn't have tax units.
+    acs["tax_unit_id"] = spm_unit.SPM_ID
+    # Until we add a family table, we'll use the person table.
+    acs["family_id"] = spm_unit.SPM_ID
+    acs["person_household_id"] = person.SERIALNO
+    acs["person_tax_unit_id"] = person.SPM_ID
+    acs["person_family_id"] = person.SPM_ID
+    acs["household_id"] = household.SERIALNO
+
+    # Add weights
+    acs["person_weight"] = person.WT
+
+def add_person_variables(acs: h5py.File, person: DataFrame):
+    pass
+
+
+def add_spm_variables(acs: h5py.File, spm_unit: DataFrame):
+    acs["spm_unit_net_income"] = spm_unit.SPM_RESOURCES
+    acs["poverty_threshold"] = spm_unit.SPM_POVTHRESHOLD
diff --git a/openfisca_us/data/datasets/acs/raw_acs.py b/openfisca_us/data/datasets/acs/raw_acs.py
new file mode 100644
index 00000000000..ed9457649f1
--- /dev/null
+++ b/openfisca_us/data/datasets/acs/raw_acs.py
@@ -0,0 +1,92 @@
+from io import BytesIO
+import logging
+from zipfile import ZipFile
+import pandas as pd
+from openfisca_tools.data import PublicDataset
+import h5py
+import requests
+from openfisca_us.data.datasets.cps.raw_cps import RawCPS
+from openfisca_us.data.storage import OPENFISCA_US_MICRODATA_FOLDER
+from pandas import DataFrame, Series
+import numpy as np
+
+
+class RawACS(PublicDataset):
+    name = "raw_acs"
+    label = "Raw ACS"
+
+    def generate(self, year: int) -> None:
+        year = int(year)
+        if year in self.years:
+            self.remove(year)
+
+        
+        spm_url = f"https://www2.census.gov/programs-surveys/supplemental-poverty-measure/datasets/spm/spm_{year}_pu.dta"
+        person_url = f"https://www2.census.gov/programs-surveys/acs/data/pums/{year}/1-Year/csv_pus.zip"
+        household_url = f"https://www2.census.gov/programs-surveys/acs/data/pums/{year}/1-Year/csv_hus.zip"
+        try:
+            with pd.HDFStore(RawACS.file(year)) as storage:
+                # Person file
+                logging.info(f"Downloading person file")
+                storage["person"] = concat_zipped_csvs(person_url, "psam_pus")
+                # Household file
+                logging.info(f"Downloading household file")
+                storage["household"] = concat_zipped_csvs(household_url, "psam_hus")
+                # SPM unit file
+                logging.info(f"Downloading SPM unit file")
+                spm_person = pd.read_stata(spm_url).fillna(0)
+                spm_person.columns = spm_person.columns.str.upper()
+                storage["spm_unit"] = create_spm_unit_table(spm_person)
+        except Exception as e:
+            RawACS.remove(year)
+            raise ValueError(
+                f"Attempted to extract and save the CSV files, but encountered an error: {e}"
+            )
+
+RawACS = RawACS()
+
+def concat_zipped_csvs(url: str, prefix: str) -> pd.DataFrame:
+    # Creates a DataFrame with the two csvs inside a zip file from a URL.
+    zf = ZipFile(BytesIO(requests.get(url)))
+    a = pd.read_csv(zf.open(prefix + "a.csv"))
+    b = pd.read_csv(zf.open(prefix + "b.csv"))
+    res = pd.concat([a, b]).fillna(0)
+    res.columns = res.columns.str.upper()
+    return res
+
+
+def create_spm_unit_table(person: pd.DataFrame) -> pd.DataFrame:
+    SPM_UNIT_COLUMNS = [
+        "CAPHOUSESUB",
+        "CAPWKCCXPNS",
+        "CHILDCAREXPNS",
+        "EITC",
+        "ENGVAL",
+        "EQUIVSCALE",
+        "FEDTAX",
+        "FEDTAXBC",
+        "FICA",
+        "GEOADJ",
+        "MEDXPNS",
+        "NUMADULTS",
+        "NUMKIDS",
+        "NUMPER",
+        "POOR",
+        "POVTHRESHOLD",
+        "RESOURCES",
+        "SCHLUNCH",
+        "SNAPSUB",
+        "STTAX",
+        "TENMORTSTATUS",
+        "TOTVAL",
+        "WCOHABIT",
+        "WICVAL",
+        "WKXPNS",
+        "WUI_LT15",
+        "ID",
+    ]
+    return (
+        person[["SPM_" + column for column in SPM_UNIT_COLUMNS]]
+        .groupby(person.SPM_ID)
+        .first()
+    )

From 3ae098f8bd168c2c89e4a2b13c838ed1e71060f6 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Sat, 16 Apr 2022 11:09:51 +0100
Subject: [PATCH 02/27] Add person, household files to RawACS

---
 openfisca_us/data/datasets/acs/__init__.py |  4 ++--
 openfisca_us/data/datasets/acs/acs.py      |  8 ++++---
 openfisca_us/data/datasets/acs/raw_acs.py  | 25 ++++++++++++++++------
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/openfisca_us/data/datasets/acs/__init__.py b/openfisca_us/data/datasets/acs/__init__.py
index 6ed1f62bcf5..881fc59af8a 100644
--- a/openfisca_us/data/datasets/acs/__init__.py
+++ b/openfisca_us/data/datasets/acs/__init__.py
@@ -1,2 +1,2 @@
-from openfisca_us_data.datasets.acs.raw_acs import RawACS
-from openfisca_us_data.datasets.acs.acs import ACS
+from openfisca_us.data.datasets.acs.raw_acs import RawACS
+from openfisca_us.data.datasets.acs.acs import ACS
diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py
index 169ab3eeea2..57a0b0cd040 100644
--- a/openfisca_us/data/datasets/acs/acs.py
+++ b/openfisca_us/data/datasets/acs/acs.py
@@ -11,6 +11,8 @@
 class ACS(PublicDataset):
     name = "acs"
     is_openfisca_compatible = True
+    label = "ACS"
+    folder_path = OPENFISCA_US_MICRODATA_FOLDER
 
     def generate(self, year: int) -> None:
         """Generates the ACS dataset.
@@ -33,8 +35,8 @@ def generate(self, year: int) -> None:
             raw_data[entity] for entity in ("person", "spm_unit", "household")
         ]
 
-        add_ID_variables(acs, person, spm_unit, household)
-        add_SPM_variables(acs, spm_unit)
+        add_id_variables(acs, person, spm_unit, household)
+        add_spm_variables(acs, spm_unit)
 
         raw_data.close()
         acs.close()
@@ -42,7 +44,7 @@ def generate(self, year: int) -> None:
 ACS = ACS()
 
 
-def add_ID_variables(
+def add_id_variables(
     acs: h5py.File,
     person: DataFrame,
     spm_unit: DataFrame,
diff --git a/openfisca_us/data/datasets/acs/raw_acs.py b/openfisca_us/data/datasets/acs/raw_acs.py
index ed9457649f1..93ceb6edc7e 100644
--- a/openfisca_us/data/datasets/acs/raw_acs.py
+++ b/openfisca_us/data/datasets/acs/raw_acs.py
@@ -5,15 +5,20 @@
 from openfisca_tools.data import PublicDataset
 import h5py
 import requests
+from tqdm import tqdm
 from openfisca_us.data.datasets.cps.raw_cps import RawCPS
 from openfisca_us.data.storage import OPENFISCA_US_MICRODATA_FOLDER
 from pandas import DataFrame, Series
 import numpy as np
 
+logging.getLogger().setLevel(logging.INFO)
+
 
 class RawACS(PublicDataset):
     name = "raw_acs"
     label = "Raw ACS"
+    is_openfisca_compatible = False
+    folder_path = OPENFISCA_US_MICRODATA_FOLDER
 
     def generate(self, year: int) -> None:
         year = int(year)
@@ -26,12 +31,12 @@ def generate(self, year: int) -> None:
         household_url = f"https://www2.census.gov/programs-surveys/acs/data/pums/{year}/1-Year/csv_hus.zip"
         try:
             with pd.HDFStore(RawACS.file(year)) as storage:
-                # Person file
-                logging.info(f"Downloading person file")
-                storage["person"] = concat_zipped_csvs(person_url, "psam_pus")
                 # Household file
                 logging.info(f"Downloading household file")
                 storage["household"] = concat_zipped_csvs(household_url, "psam_hus")
+                # Person file
+                logging.info(f"Downloading person file")
+                storage["person"] = concat_zipped_csvs(person_url, "psam_pus")
                 # SPM unit file
                 logging.info(f"Downloading SPM unit file")
                 spm_person = pd.read_stata(spm_url).fillna(0)
@@ -47,9 +52,17 @@ def generate(self, year: int) -> None:
 
 def concat_zipped_csvs(url: str, prefix: str) -> pd.DataFrame:
     # Creates a DataFrame with the two csvs inside a zip file from a URL.
-    zf = ZipFile(BytesIO(requests.get(url)))
-    a = pd.read_csv(zf.open(prefix + "a.csv"))
-    b = pd.read_csv(zf.open(prefix + "b.csv"))
+    req = requests.get(url, stream=True)
+    with BytesIO() as f:
+        pbar = tqdm()
+        for chunk in req.iter_content(chunk_size=1024): 
+            if chunk: # filter out keep-alive new chunks
+                pbar.update (len(chunk))
+                f.write(chunk)
+        f.seek(0)
+        zf = ZipFile(f)
+        a = pd.read_csv(zf.open(prefix + "a.csv"))
+        b = pd.read_csv(zf.open(prefix + "b.csv"))
     res = pd.concat([a, b]).fillna(0)
     res.columns = res.columns.str.upper()
     return res

From 5c79d694836ea3914ea689bc218aaf030deb98a1 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Sat, 16 Apr 2022 11:39:41 +0100
Subject: [PATCH 03/27] Add columns

---
 openfisca_us/data/datasets/acs/raw_acs.py | 70 ++++++++++++++++++++---
 1 file changed, 63 insertions(+), 7 deletions(-)

diff --git a/openfisca_us/data/datasets/acs/raw_acs.py b/openfisca_us/data/datasets/acs/raw_acs.py
index 93ceb6edc7e..99f78f214e8 100644
--- a/openfisca_us/data/datasets/acs/raw_acs.py
+++ b/openfisca_us/data/datasets/acs/raw_acs.py
@@ -1,5 +1,6 @@
 from io import BytesIO
 import logging
+from typing import List
 from zipfile import ZipFile
 import pandas as pd
 from openfisca_tools.data import PublicDataset
@@ -13,6 +14,48 @@
 
 logging.getLogger().setLevel(logging.INFO)
 
+PERSON_COLUMNS = [
+    "SERIALNO", # Household ID
+    "SPORDER", # Person number within household
+    "PWGTP", # Person weight
+    "AGEP", # Age
+    "CIT", # Citizenship
+    "MAR", # Marital status
+    "WAGP", # Wage/salary
+    "SSP", # Social security income
+    "SSIP", # Supplemental security income
+    "SEX", # Sex
+    "SEMP", # Self-employment income
+    "SCHL", # Educational attainment
+    "RETP", # Retirement income
+    "PAP", # Public assistance income
+    "OIP", # Other income
+    "PERNP", # Total earnings
+    "PINCP", # Total income
+    "POVPIP", # Income-to-poverty line percentage
+    "RAC1P", # Race
+]
+
+HOUSEHOLD_COLUMNS = [
+    "SERIALNO", # Household ID
+    "PUMA", # PUMA area code
+    "ST", # State code
+    "ADJHSG", # Adjustment factor for housing dollar amounts
+    "ADJINC", # Adjustment factor for income
+    "WGTP", # Household weight
+    "NP", # Number of persons in household
+    "BDSP", # Number of bedrooms
+    "ELEP", # Electricity monthly cost
+    "FULP", # Fuel monthly cost
+    "GASP", # Gas monthly cost
+    "RMSP", # Number of rooms
+    "RNTP", # Monthly rent
+    "TEN", # Tenure
+    "VEH", # Number of vehicles
+    "FINCP", # Total income
+    "GRNTP", # Gross rent
+]
+
 
 class RawACS(PublicDataset):
     name = "raw_acs"
@@ -25,18 +68,19 @@ def generate(self, year: int) -> None:
         if year in self.years:
             self.remove(year)
 
-        
         spm_url = f"https://www2.census.gov/programs-surveys/supplemental-poverty-measure/datasets/spm/spm_{year}_pu.dta"
         person_url = f"https://www2.census.gov/programs-surveys/acs/data/pums/{year}/1-Year/csv_pus.zip"
         household_url = f"https://www2.census.gov/programs-surveys/acs/data/pums/{year}/1-Year/csv_hus.zip"
+
+        # The data dictionary for 2019 can be found here: https://www2.census.gov/programs-surveys/acs/tech_docs/pums/data_dict/PUMS_Data_Dictionary_2019.pdf
         try:
             with pd.HDFStore(RawACS.file(year)) as storage:
                 # Household file
                 logging.info(f"Downloading household file")
-                storage["household"] = concat_zipped_csvs(household_url, "psam_hus")
+                storage["household"] = concat_zipped_csvs(household_url, "psam_hus", HOUSEHOLD_COLUMNS)
                 # Person file
                 logging.info(f"Downloading person file")
-                storage["person"] = concat_zipped_csvs(person_url, "psam_pus")
+                storage["person"] = concat_zipped_csvs(person_url, "psam_pus", PERSON_COLUMNS)
                 # SPM unit file
                 logging.info(f"Downloading SPM unit file")
                 spm_person = pd.read_stata(spm_url).fillna(0)
@@ -50,8 +94,17 @@ def generate(self, year: int) -> None:
 
 RawACS = RawACS()
 
-def concat_zipped_csvs(url: str, prefix: str) -> pd.DataFrame:
-    # Creates a DataFrame with the two csvs inside a zip file from a URL.
+def concat_zipped_csvs(url: str, prefix: str, columns: List[str]) -> pd.DataFrame:
+    """Downloads the ACS microdata, which is a zip file containing two halves in CSV format.
+
+    Args:
+        url (str): The URL of the data server.
+        prefix (str): The prefix of the filenames, before a/b.csv.
+        columns (List[str]): The columns to filter (avoids hitting memory limits).
+
+    Returns:
+        pd.DataFrame: The concatenated DataFrame.
+    """
     req = requests.get(url, stream=True)
     with BytesIO() as f:
         pbar = tqdm()
@@ -61,8 +114,11 @@ def concat_zipped_csvs(url: str, prefix: str) -> pd.DataFrame:
                 f.write(chunk)
         f.seek(0)
         zf = ZipFile(f)
-        a = pd.read_csv(zf.open(prefix + "a.csv"))
-        b = pd.read_csv(zf.open(prefix + "b.csv"))
+        logging.info(f"Loading the first half of the dataset")
+        a = pd.read_csv(zf.open(prefix + "a.csv"))[columns]
+        logging.info(f"Loading the second half of the dataset")
+        b = pd.read_csv(zf.open(prefix + "b.csv"))[columns]
+    logging.info(f"Concatenating datasets")
     res = pd.concat([a, b]).fillna(0)
     res.columns = res.columns.str.upper()
     return res

From 8d7292a0093cd7c8448702c1cca62399288474b9 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Sat, 16 Apr 2022 11:55:04 +0100
Subject: [PATCH 04/27] Fix pandas mis-use

---
 openfisca_us/data/datasets/acs/raw_acs.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/openfisca_us/data/datasets/acs/raw_acs.py b/openfisca_us/data/datasets/acs/raw_acs.py
index 99f78f214e8..a26e1f3e96d 100644
--- a/openfisca_us/data/datasets/acs/raw_acs.py
+++ b/openfisca_us/data/datasets/acs/raw_acs.py
@@ -73,6 +73,7 @@ def generate(self, year: int) -> None:
         household_url = f"https://www2.census.gov/programs-surveys/acs/data/pums/{year}/1-Year/csv_hus.zip"
 
         # The data dictionary for 2019 can be found here: https://www2.census.gov/programs-surveys/acs/tech_docs/pums/data_dict/PUMS_Data_Dictionary_2019.pdf
+
         try:
             with pd.HDFStore(RawACS.file(year)) as storage:
                 # Household file
@@ -115,9 +116,9 @@ def concat_zipped_csvs(url: str, prefix: str, columns: List[str]) -> pd.DataFram
         f.seek(0)
         zf = ZipFile(f)
         logging.info(f"Loading the first half of the dataset")
-        a = pd.read_csv(zf.open(prefix + "a.csv"))[columns]
+        a = pd.read_csv(zf.open(prefix + "a.csv"), usecols=columns)
         logging.info(f"Loading the second half of the dataset")
-        b = pd.read_csv(zf.open(prefix + "b.csv"))[columns]
+        b = pd.read_csv(zf.open(prefix + "b.csv"), usecols=columns)
     logging.info(f"Concatenating datasets")
     res = pd.concat([a, b]).fillna(0)
     res.columns = res.columns.str.upper()

From cf048e7036f679a5df4628ec87ce9db540ca50b1 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Sat, 16 Apr 2022 14:53:23 +0100
Subject: [PATCH 05/27] Add vehicle formula

---
 openfisca_us/data/datasets/acs/acs.py         | 22 +++++++++------
 openfisca_us/data/datasets/acs/raw_acs.py     | 12 ++++++---
 .../demographic/household/vehicles_owned.py   | 11 ++++++++
 .../variables/demographic/person/is_adult.py  | 12 +++++++++
 .../demographic/person/vehicles_owned.py      | 27 +++++++++++++++++++
 5 files changed, 73 insertions(+), 11 deletions(-)
 create mode 100644 openfisca_us/variables/demographic/household/vehicles_owned.py
 create mode 100644 openfisca_us/variables/demographic/person/is_adult.py
 create mode 100644 openfisca_us/variables/demographic/person/vehicles_owned.py

diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py
index 57a0b0cd040..ad4a8edb589 100644
--- a/openfisca_us/data/datasets/acs/acs.py
+++ b/openfisca_us/data/datasets/acs/acs.py
@@ -34,9 +34,15 @@ def generate(self, year: int) -> None:
         person, spm_unit, household = [
             raw_data[entity] for entity in ("person", "spm_unit", "household")
         ]
+        # Add primary and foreign keys
+        household["household_id"] = household.index
+        person["household_id"] = household[["household_id", "SERIALNO"]].set_index("SERIALNO").loc[person.SERIALNO.values].values
+        household = household[household.household_id.isin(person.household_id)]
+        person = person[person.household_id.isin(household.household_id)]
 
         add_id_variables(acs, person, spm_unit, household)
         add_spm_variables(acs, spm_unit)
+        add_household_variables(acs, household)
 
         raw_data.close()
         acs.close()
@@ -59,26 +65,26 @@ def add_id_variables(
             of the ACS.
         household (DataFrame): The household table of the ACS.
     """
-    # Add primary and foreign keys
-    acs["person_id"] = person.SERIALNO * 1e2 + person.SPORDER
+    acs["person_id"] = person.household_id * 1e2 + person.SPORDER
     acs["person_spm_unit_id"] = person.SPM_ID
     acs["spm_unit_id"] = spm_unit.SPM_ID
     # ACS doesn't have tax units.
     acs["tax_unit_id"] = spm_unit.SPM_ID
     # Until we add a family table, we'll use the person table.
     acs["family_id"] = spm_unit.SPM_ID
-    acs["person_household_id"] = person.SERIALNO
+    acs["person_household_id"] = person.household_id
     acs["person_tax_unit_id"] = person.SPM_ID
     acs["person_family_id"] = person.SPM_ID
-    acs["household_id"] = household.SERIALNO
+    acs["household_id"] = household.household_id
 
     # Add weights
-    acs["person_weight"] = person.WT
-
-def add_person_variables(acs: h5py.File, person: DataFrame):
-    pass
+    acs["person_weight"] = person.PWGTP
+    acs["household_weight"] = household.WGTP
 
 
 def add_spm_variables(acs: h5py.File, spm_unit: DataFrame):
     acs["spm_unit_net_income"] = spm_unit.SPM_RESOURCES
     acs["poverty_threshold"] = spm_unit.SPM_POVTHRESHOLD
+
+def add_household_variables(acs: h5py.File, household: DataFrame):
+    acs["household_vehicles_owned"] = household.VEH
diff --git a/openfisca_us/data/datasets/acs/raw_acs.py b/openfisca_us/data/datasets/acs/raw_acs.py
index a26e1f3e96d..d9c8acb5437 100644
--- a/openfisca_us/data/datasets/acs/raw_acs.py
+++ b/openfisca_us/data/datasets/acs/raw_acs.py
@@ -86,7 +86,7 @@ def generate(self, year: int) -> None:
                 logging.info(f"Downloading SPM unit file")
                 spm_person = pd.read_stata(spm_url).fillna(0)
                 spm_person.columns = spm_person.columns.str.upper()
-                storage["spm_unit"] = create_spm_unit_table(spm_person)
+                create_spm_unit_table(storage, spm_person)
         except Exception as e:
             RawACS.remove(year)
             raise ValueError(
@@ -125,7 +125,7 @@ def concat_zipped_csvs(url: str, prefix: str, columns: List[str]) -> pd.DataFram
     return res
 
 
-def create_spm_unit_table(person: pd.DataFrame) -> pd.DataFrame:
+def create_spm_unit_table(storage: pd.HDFStore, person: pd.DataFrame) -> pd.DataFrame:
     SPM_UNIT_COLUMNS = [
         "CAPHOUSESUB",
         "CAPWKCCXPNS",
@@ -155,8 +155,14 @@ def create_spm_unit_table(person: pd.DataFrame) -> pd.DataFrame:
         "WUI_LT15",
         "ID",
     ]
-    return (
+    spm_table = (
         person[["SPM_" + column for column in SPM_UNIT_COLUMNS]]
         .groupby(person.SPM_ID)
         .first()
     )
+
+    person_table = storage["person"]
+    person_table["SPM_ID"] = person.SPM_ID
+
+    storage["person"] = person_table
+    storage["spm_unit"] = spm_table
diff --git a/openfisca_us/variables/demographic/household/vehicles_owned.py b/openfisca_us/variables/demographic/household/vehicles_owned.py
new file mode 100644
index 00000000000..712d2e0cdc5
--- /dev/null
+++ b/openfisca_us/variables/demographic/household/vehicles_owned.py
@@ -0,0 +1,11 @@
+from openfisca_us.model_api import *
+
+
+class household_vehicles_owned(Variable):
+    value_type = float
+    entity = Household
+    label = "Vehicles owned"
+    unit = USD
+    documentation = "Number of vehicles owned by the household"
+    definition_period = YEAR
+
diff --git a/openfisca_us/variables/demographic/person/is_adult.py b/openfisca_us/variables/demographic/person/is_adult.py
new file mode 100644
index 00000000000..993e3b22ec0
--- /dev/null
+++ b/openfisca_us/variables/demographic/person/is_adult.py
@@ -0,0 +1,12 @@
+from openfisca_us.model_api import *
+
+
+class is_adult(Variable):
+    value_type = bool
+    entity = Person
+    label = "Is an adult"
+    documentation = "Whether this person is over 18"
+    definition_period = YEAR
+
+    def formula(person, period, parameters):
+        return person("age", period) >= 18
diff --git a/openfisca_us/variables/demographic/person/vehicles_owned.py b/openfisca_us/variables/demographic/person/vehicles_owned.py
new file mode 100644
index 00000000000..d67057a34bd
--- /dev/null
+++ b/openfisca_us/variables/demographic/person/vehicles_owned.py
@@ -0,0 +1,27 @@
+from numpy import maximum
+from openfisca_us.model_api import *
+from random import randint
+from openfisca_core.populations import Population
+
+class vehicles_owned(Variable):
+    value_type = float
+    entity = Person
+    label = "Vehicles owned"
+    unit = USD
+    documentation = "Number of vehicles owned by this person"
+    definition_period = YEAR
+
+    def formula(person, period, parameters):
+        household = person.household
+        household_vehicles = household("household_vehicles_owned", period)
+        is_adult = person("is_adult", period)
+        max_vehicles = household_vehicles.max()
+        adult_rank = where(is_adult, household.members_position, 100)
+        vehicles = np.zeros_like(is_adult)
+        for i in range(int(max_vehicles)):
+            # Pick a random adult in each household
+            selected_adult = randint(0, adult_rank[is_adult].max())
+            maximum_reached = household.sum(vehicles) >= household_vehicles
+            vehicles += where(maximum_reached.project(), adult_rank == selected_adult, 0)
+        return vehicles
+

From ed4bed58365d32941af0262e0053b995817c7941 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Sat, 16 Apr 2022 15:28:23 +0100
Subject: [PATCH 06/27] Fix formula

---
 openfisca_us/data/datasets/acs/acs.py            | 16 +++++++++-------
 .../demographic/person/vehicles_owned.py         |  7 ++++---
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py
index ad4a8edb589..0f690f35bf5 100644
--- a/openfisca_us/data/datasets/acs/acs.py
+++ b/openfisca_us/data/datasets/acs/acs.py
@@ -35,10 +35,12 @@ def generate(self, year: int) -> None:
             raw_data[entity] for entity in ("person", "spm_unit", "household")
         ]
         # Add primary and foreign keys
-        household["household_id"] = household.index
-        person["household_id"] = household[["household_id", "SERIALNO"]].set_index("SERIALNO").loc[person.SERIALNO.values].values
-        household = household[household.household_id.isin(person.household_id)]
-        person = person[person.household_id.isin(household.household_id)]
+        make_numeric = lambda x: int(x.replace("2019GQ", "0").replace("2019HU", "1"))
+        household.SERIALNO = household.SERIALNO.apply(make_numeric)
+        person.SERIALNO = person.SERIALNO.apply(make_numeric)
+    
+        person = person[person.SERIALNO.isin(household.SERIALNO)]
+        household = household[household.SERIALNO.isin(person.SERIALNO)]
 
         add_id_variables(acs, person, spm_unit, household)
         add_spm_variables(acs, spm_unit)
@@ -65,17 +67,17 @@ def add_id_variables(
             of the ACS.
         household (DataFrame): The household table of the ACS.
     """
-    acs["person_id"] = person.household_id * 1e2 + person.SPORDER
+    acs["person_id"] = person.SERIALNO * 1e2 + person.SPORDER
     acs["person_spm_unit_id"] = person.SPM_ID
     acs["spm_unit_id"] = spm_unit.SPM_ID
     # ACS doesn't have tax units.
     acs["tax_unit_id"] = spm_unit.SPM_ID
     # Until we add a family table, we'll use the person table.
     acs["family_id"] = spm_unit.SPM_ID
-    acs["person_household_id"] = person.household_id
+    acs["person_household_id"] = person.SERIALNO
     acs["person_tax_unit_id"] = person.SPM_ID
     acs["person_family_id"] = person.SPM_ID
-    acs["household_id"] = household.household_id
+    acs["household_id"] = household.SERIALNO
 
     # Add weights
     acs["person_weight"] = person.PWGTP
diff --git a/openfisca_us/variables/demographic/person/vehicles_owned.py b/openfisca_us/variables/demographic/person/vehicles_owned.py
index d67057a34bd..66aab782cb5 100644
--- a/openfisca_us/variables/demographic/person/vehicles_owned.py
+++ b/openfisca_us/variables/demographic/person/vehicles_owned.py
@@ -17,11 +17,12 @@ def formula(person, period, parameters):
         is_adult = person("is_adult", period)
         max_vehicles = household_vehicles.max()
         adult_rank = where(is_adult, household.members_position, 100)
-        vehicles = np.zeros_like(is_adult)
-        for i in range(int(max_vehicles)):
+        vehicles = is_adult * 0
+        for _ in range(int(max_vehicles)):
             # Pick a random adult in each household
             selected_adult = randint(0, adult_rank[is_adult].max())
             maximum_reached = household.sum(vehicles) >= household_vehicles
-            vehicles += where(maximum_reached.project(), adult_rank == selected_adult, 0)
+            should_add_vehicle = ~maximum_reached & (adult_rank == selected_adult)
+            vehicles += where(should_add_vehicle, 1, 0)
         return vehicles
 

From d04e66497a47c68b9dcdc2b2b9171d700d4529b4 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Sat, 16 Apr 2022 15:32:28 +0100
Subject: [PATCH 07/27] Fix formula (working!)

---
 openfisca_us/variables/demographic/person/vehicles_owned.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openfisca_us/variables/demographic/person/vehicles_owned.py b/openfisca_us/variables/demographic/person/vehicles_owned.py
index 66aab782cb5..8fdc69addbb 100644
--- a/openfisca_us/variables/demographic/person/vehicles_owned.py
+++ b/openfisca_us/variables/demographic/person/vehicles_owned.py
@@ -15,12 +15,13 @@ def formula(person, period, parameters):
         household = person.household
         household_vehicles = household("household_vehicles_owned", period)
         is_adult = person("is_adult", period)
+        num_adults_in_household = household.sum(is_adult)
         max_vehicles = household_vehicles.max()
         adult_rank = where(is_adult, household.members_position, 100)
         vehicles = is_adult * 0
         for _ in range(int(max_vehicles)):
             # Pick a random adult in each household
-            selected_adult = randint(0, adult_rank[is_adult].max())
+            selected_adult = randint(0, adult_rank[is_adult].max()) % num_adults_in_household
             maximum_reached = household.sum(vehicles) >= household_vehicles
             should_add_vehicle = ~maximum_reached & (adult_rank == selected_adult)
             vehicles += where(should_add_vehicle, 1, 0)

From b6bb14e6476ef4f8758bff2291d0e3e95b17fc12 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Sat, 16 Apr 2022 17:13:39 +0100
Subject: [PATCH 08/27] Push recent changes

---
 openfisca_us/data/datasets/acs/acs.py                 |  4 +++-
 openfisca_us/data/datasets/acs/raw_acs.py             |  6 +++---
 .../variables/demographic/person/vehicles_owned.py    |  1 +
 .../variables/income/spm_unit/spm_unit_fips.py        | 11 +++++++++++
 openfisca_us/variables/irs/income/sources.py          |  2 +-
 5 files changed, 19 insertions(+), 5 deletions(-)
 create mode 100644 openfisca_us/variables/income/spm_unit/spm_unit_fips.py

diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py
index 0f690f35bf5..3bfc1640d9d 100644
--- a/openfisca_us/data/datasets/acs/acs.py
+++ b/openfisca_us/data/datasets/acs/acs.py
@@ -41,6 +41,7 @@ def generate(self, year: int) -> None:
     
         person = person[person.SERIALNO.isin(household.SERIALNO)]
         household = household[household.SERIALNO.isin(person.SERIALNO)]
+        spm_unit = spm_unit[spm_unit.SPM_ID.isin(person.SPM_ID)]
 
         add_id_variables(acs, person, spm_unit, household)
         add_spm_variables(acs, spm_unit)
@@ -86,7 +87,8 @@ def add_id_variables(
 
 def add_spm_variables(acs: h5py.File, spm_unit: DataFrame):
     acs["spm_unit_net_income"] = spm_unit.SPM_RESOURCES
-    acs["poverty_threshold"] = spm_unit.SPM_POVTHRESHOLD
+    acs["spm_unit_spm_threshold"] = spm_unit.SPM_POVTHRESHOLD
 
 def add_household_variables(acs: h5py.File, household: DataFrame):
     acs["household_vehicles_owned"] = household.VEH
+    acs["fips"] = household.ST
diff --git a/openfisca_us/data/datasets/acs/raw_acs.py b/openfisca_us/data/datasets/acs/raw_acs.py
index d9c8acb5437..8e0219599fb 100644
--- a/openfisca_us/data/datasets/acs/raw_acs.py
+++ b/openfisca_us/data/datasets/acs/raw_acs.py
@@ -161,8 +161,8 @@ def create_spm_unit_table(storage: pd.HDFStore, person: pd.DataFrame) -> pd.Data
         .first()
     )
 
-    person_table = storage["person"]
-    person_table["SPM_ID"] = person.SPM_ID
+    original_person_table = storage["person"]
+    combined_person_table = pd.merge(original_person_table, person, on=["SERIALNO", "SPORDER"])
 
-    storage["person"] = person_table
+    storage["person"] = combined_person_table
     storage["spm_unit"] = spm_table
diff --git a/openfisca_us/variables/demographic/person/vehicles_owned.py b/openfisca_us/variables/demographic/person/vehicles_owned.py
index 8fdc69addbb..c7d0a826e4c 100644
--- a/openfisca_us/variables/demographic/person/vehicles_owned.py
+++ b/openfisca_us/variables/demographic/person/vehicles_owned.py
@@ -12,6 +12,7 @@ class vehicles_owned(Variable):
     definition_period = YEAR
 
     def formula(person, period, parameters):
+        # We randomly split the household's vehicles between its adults
         household = person.household
         household_vehicles = household("household_vehicles_owned", period)
         is_adult = person("is_adult", period)
diff --git a/openfisca_us/variables/income/spm_unit/spm_unit_fips.py b/openfisca_us/variables/income/spm_unit/spm_unit_fips.py
new file mode 100644
index 00000000000..26e9a99b35a
--- /dev/null
+++ b/openfisca_us/variables/income/spm_unit/spm_unit_fips.py
@@ -0,0 +1,11 @@
+from openfisca_us.model_api import *
+
+
+class spm_unit_fips(Variable):
+    value_type = float
+    entity = SPMUnit
+    label = "SPM unit FIPS code"
+    definition_period = YEAR
+
+    def formula(spm_unit, period, parameters):
+        return spm_unit.household("fips", period)
\ No newline at end of file
diff --git a/openfisca_us/variables/irs/income/sources.py b/openfisca_us/variables/irs/income/sources.py
index 3c2f093194e..55f14b4fcde 100644
--- a/openfisca_us/variables/irs/income/sources.py
+++ b/openfisca_us/variables/irs/income/sources.py
@@ -1009,7 +1009,7 @@ class ffpos(Variable):
 
 class fips(Variable):
     value_type = int
-    entity = TaxUnit
+    entity = Household
     definition_period = YEAR
     documentation = "FIPS state code (not used in tax-calculation logic)"
 

From a2f826347143fe1ae8350b79c1dc573e62558313 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sat, 16 Apr 2022 13:06:26 -0700
Subject: [PATCH 09/27] Remove unused imports

---
 openfisca_us/data/datasets/acs/acs.py     |  13 +--
 openfisca_us/data/datasets/acs/raw_acs.py | 105 ++++++++++++----------
 2 files changed, 64 insertions(+), 54 deletions(-)

diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py
index 3bfc1640d9d..15c976313d2 100644
--- a/openfisca_us/data/datasets/acs/acs.py
+++ b/openfisca_us/data/datasets/acs/acs.py
@@ -1,11 +1,8 @@
-import logging
 from openfisca_tools.data import PublicDataset
 import h5py
 from openfisca_us.data.datasets.acs.raw_acs import RawACS
-from openfisca_us.data.datasets.cps.raw_cps import RawCPS
 from openfisca_us.data.storage import OPENFISCA_US_MICRODATA_FOLDER
-from pandas import DataFrame, Series
-import numpy as np
+from pandas import DataFrame
 
 
 class ACS(PublicDataset):
@@ -35,10 +32,12 @@ def generate(self, year: int) -> None:
             raw_data[entity] for entity in ("person", "spm_unit", "household")
         ]
         # Add primary and foreign keys
-        make_numeric = lambda x: int(x.replace("2019GQ", "0").replace("2019HU", "1"))
+        make_numeric = lambda x: int(
+            x.replace("2019GQ", "0").replace("2019HU", "1")
+        )
         household.SERIALNO = household.SERIALNO.apply(make_numeric)
         person.SERIALNO = person.SERIALNO.apply(make_numeric)
-    
+
         person = person[person.SERIALNO.isin(household.SERIALNO)]
         household = household[household.SERIALNO.isin(person.SERIALNO)]
         spm_unit = spm_unit[spm_unit.SPM_ID.isin(person.SPM_ID)]
@@ -50,6 +49,7 @@ def generate(self, year: int) -> None:
         raw_data.close()
         acs.close()
 
+
 ACS = ACS()
 
 
@@ -89,6 +89,7 @@ def add_spm_variables(acs: h5py.File, spm_unit: DataFrame):
     acs["spm_unit_net_income"] = spm_unit.SPM_RESOURCES
     acs["spm_unit_spm_threshold"] = spm_unit.SPM_POVTHRESHOLD
 
+
 def add_household_variables(acs: h5py.File, household: DataFrame):
     acs["household_vehicles_owned"] = household.VEH
     acs["fips"] = household.ST
diff --git a/openfisca_us/data/datasets/acs/raw_acs.py b/openfisca_us/data/datasets/acs/raw_acs.py
index 8e0219599fb..87ce467b987 100644
--- a/openfisca_us/data/datasets/acs/raw_acs.py
+++ b/openfisca_us/data/datasets/acs/raw_acs.py
@@ -4,56 +4,53 @@
 from zipfile import ZipFile
 import pandas as pd
 from openfisca_tools.data import PublicDataset
-import h5py
 import requests
 from tqdm import tqdm
-from openfisca_us.data.datasets.cps.raw_cps import RawCPS
 from openfisca_us.data.storage import OPENFISCA_US_MICRODATA_FOLDER
-from pandas import DataFrame, Series
-import numpy as np
+
 
 logging.getLogger().setLevel(logging.INFO)
 
 PERSON_COLUMNS = [
-    "SERIALNO", # Household ID
-    "SPORDER", # Person number within household
-    "PWGTP", # Person weight
-    "AGEP", # Age
-    "CIT", # Citizenship
-    "MAR", # Marital status
-    "WAGP", # Wage/salary
-    "SSP", # Social security income
-    "SSIP", # Supplemental security income
-    "SEX", # Sex
-    "SEMP", # Self-employment income
-    "SCHL", # Educational attainment
-    "RETP", # Retirement income
-    "PAP", # Public assistance income
-    "OIP", # Other income
-    "PERNP", # Total earnings
-    "PINCP", # Total income
-    "POVPIP", # Income-to-poverty line percentage
-    "RAC1P", # Race
+    "SERIALNO",  # Household ID
+    "SPORDER",  # Person number within household
+    "PWGTP",  # Person weight
+    "AGEP",  # Age
+    "CIT",  # Citizenship
+    "MAR",  # Marital status
+    "WAGP",  # Wage/salary
+    "SSP",  # Social security income
+    "SSIP",  # Supplemental security income
+    "SEX",  # Sex
+    "SEMP",  # Self-employment income
+    "SCHL",  # Educational attainment
+    "RETP",  # Retirement income
+    "PAP",  # Public assistance income
+    "OIP",  # Other income
+    "PERNP",  # Total earnings
+    "PINCP",  # Total income
+    "POVPIP",  # Income-to-poverty line percentage
+    "RAC1P",  # Race
 ]
 
 HOUSEHOLD_COLUMNS = [
-    "SERIALNO", # Household ID
-    "PUMA", # PUMA area code
-    "ST", # State code
-    "ADJHSG", # Adjustment factor for housing dollar amounts
-    "ADJINC", # Adjustment factor for income
-    "WGTP", # Household weight
-    "NP", # Number of persons in household
-    "BDSP", # Number of bedrooms
-    "ELEP", # Electricity monthly cost
-    "FULP", # Fuel monthly cost
-    "GASP", # Gas monthly cost
-    "RMSP", # Number of rooms
-    "RNTP", # Monthly rent
-    "TEN", # Tenure
-    "VEH", # Number of vehicles
-    "FINCP", # Total income
-    "GRNTP", # Gross rent
+    "SERIALNO",  # Household ID
+    "PUMA",  # PUMA area code
+    "ST",  # State code
+    "ADJHSG",  # Adjustment factor for housing dollar amounts
+    "ADJINC",  # Adjustment factor for income
+    "WGTP",  # Household weight
+    "NP",  # Number of persons in household
+    "BDSP",  # Number of bedrooms
+    "ELEP",  # Electricity monthly cost
+    "FULP",  # Fuel monthly cost
+    "GASP",  # Gas monthly cost
+    "RMSP",  # Number of rooms
+    "RNTP",  # Monthly rent
+    "TEN",  # Tenure
+    "VEH",  # Number of vehicles
+    "FINCP",  # Total income
+    "GRNTP",  # Gross rent
 ]
 
 
@@ -78,10 +75,14 @@ def generate(self, year: int) -> None:
             with pd.HDFStore(RawACS.file(year)) as storage:
                 # Household file
                 logging.info(f"Downloading household file")
-                storage["household"] = concat_zipped_csvs(household_url, "psam_hus", HOUSEHOLD_COLUMNS)
+                storage["household"] = concat_zipped_csvs(
+                    household_url, "psam_hus", HOUSEHOLD_COLUMNS
+                )
                 # Person file
                 logging.info(f"Downloading person file")
-                storage["person"] = concat_zipped_csvs(person_url, "psam_pus", PERSON_COLUMNS)
+                storage["person"] = concat_zipped_csvs(
+                    person_url, "psam_pus", PERSON_COLUMNS
+                )
                 # SPM unit file
                 logging.info(f"Downloading SPM unit file")
                 spm_person = pd.read_stata(spm_url).fillna(0)
@@ -93,9 +94,13 @@ def generate(self, year: int) -> None:
                 f"Attempted to extract and save the CSV files, but encountered an error: {e}"
             )
 
+
 RawACS = RawACS()
 
-def concat_zipped_csvs(url: str, prefix: str, columns: List[str]) -> pd.DataFrame:
+
+def concat_zipped_csvs(
+    url: str, prefix: str, columns: List[str]
+) -> pd.DataFrame:
     """Downloads the ACS microdata, which is a zip file containing two halves in CSV format.
 
     Args:
@@ -109,9 +114,9 @@ def concat_zipped_csvs(url: str, prefix: str, columns: List[str]) -> pd.DataFram
     req = requests.get(url, stream=True)
     with BytesIO() as f:
         pbar = tqdm()
-        for chunk in req.iter_content(chunk_size=1024): 
-            if chunk: # filter out keep-alive new chunks
-                pbar.update (len(chunk))
+        for chunk in req.iter_content(chunk_size=1024):
+            if chunk:  # filter out keep-alive new chunks
+                pbar.update(len(chunk))
                 f.write(chunk)
         f.seek(0)
         zf = ZipFile(f)
@@ -125,7 +130,9 @@ def concat_zipped_csvs(url: str, prefix: str, columns: List[str]) -> pd.DataFram
     return res
 
 
-def create_spm_unit_table(storage: pd.HDFStore, person: pd.DataFrame) -> pd.DataFrame:
+def create_spm_unit_table(
+    storage: pd.HDFStore, person: pd.DataFrame
+) -> pd.DataFrame:
     SPM_UNIT_COLUMNS = [
         "CAPHOUSESUB",
         "CAPWKCCXPNS",
@@ -162,7 +169,9 @@ def create_spm_unit_table(storage: pd.HDFStore, person: pd.DataFrame) -> pd.Data
     )
 
     original_person_table = storage["person"]
-    combined_person_table = pd.merge(original_person_table, person, on=["SERIALNO", "SPORDER"])
+    combined_person_table = pd.merge(
+        original_person_table, person, on=["SERIALNO", "SPORDER"]
+    )
 
     storage["person"] = combined_person_table
     storage["spm_unit"] = spm_table

From b1199b29146d4b3ea799dc9848605bb59fb19c63 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sat, 16 Apr 2022 13:11:15 -0700
Subject: [PATCH 10/27] Match key types

---
 openfisca_us/data/datasets/acs/raw_acs.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/openfisca_us/data/datasets/acs/raw_acs.py b/openfisca_us/data/datasets/acs/raw_acs.py
index 87ce467b987..5c145037ed9 100644
--- a/openfisca_us/data/datasets/acs/raw_acs.py
+++ b/openfisca_us/data/datasets/acs/raw_acs.py
@@ -169,8 +169,17 @@ def create_spm_unit_table(
     )
 
     original_person_table = storage["person"]
+    # Ensure that join keys are the same type.
+    JOIN_COLUMNS = ["SERIALNO", "SPORDER"]
+    original_person_table[JOIN_COLUMNS] = original_person_table[
+        JOIN_COLUMNS
+    ].astype(int)
+    person[JOIN_COLUMNS] = person[JOIN_COLUMNS].astype(int)
+    # Add SPM_ID from the SPM person table to the original person table.
     combined_person_table = pd.merge(
-        original_person_table, person, on=["SERIALNO", "SPORDER"]
+        original_person_table,
+        person[JOIN_COLUMNS + ["SPM_ID"]],
+        on=JOIN_COLUMNS,
     )
 
     storage["person"] = combined_person_table

From 1769ebe877ed0b221a23e2deb8442ce37f288b11 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sat, 16 Apr 2022 13:12:41 -0700
Subject: [PATCH 11/27] Bump changelog

---
 changelog.yaml | 320 +++++++++++++++++++++++++------------------------
 1 file changed, 162 insertions(+), 158 deletions(-)

diff --git a/changelog.yaml b/changelog.yaml
index cf10abacb76..2159c7fc3b4 100644
--- a/changelog.yaml
+++ b/changelog.yaml
@@ -1,524 +1,528 @@
 - changes:
     added:
-    - First prototype version with a standard deduction variable.
+      - First prototype version with a standard deduction variable.
   date: 2021-06-28 00:00:00
   version: 0.0.1
 - bump: minor
   changes:
     added:
-    - Prototype with some tax implementations.
+      - Prototype with some tax implementations.
   date: 2021-12-25 00:00:00
 - bump: minor
   changes:
     added:
-    - Tax variables, some benefit variables.
+      - Tax variables, some benefit variables.
   date: 2021-12-25 00:00:01
 - bump: minor
   changes:
     added:
-    - Lifeline benefit.
+      - Lifeline benefit.
   date: 2021-12-25 00:00:02
 - bump: patch
   changes:
     added:
-    - Automated tests.
+      - Automated tests.
   date: 2021-12-25 00:00:03
 - bump: minor
   changes:
     added:
-    - TANF eligibility, broken down into demographic and financial variables, with
-      financial separated by current enrollment in program.
-    - Demographic TANF eligibility per IL rules.
+      - TANF eligibility, broken down into demographic and financial variables, with
+        financial separated by current enrollment in program.
+      - Demographic TANF eligibility per IL rules.
   date: 2021-12-26 00:00:00
 - bump: minor
   changes:
     added:
-    - Medicaid income thresholds for California.
+      - Medicaid income thresholds for California.
   date: 2021-12-27 00:00:00
 - bump: minor
   changes:
     added:
-    - Alternative Minimum Tax (AMT) income and liability logic.
-    - Development tools for auto-generating unit tests for Tax-Calculator functions.
+      - Alternative Minimum Tax (AMT) income and liability logic.
+      - Development tools for auto-generating unit tests for Tax-Calculator functions.
   date: 2021-12-28 00:00:00
 - bump: minor
   changes:
     added:
-    - Gains Tax (capital gains treatment) logic and parameters.
+      - Gains Tax (capital gains treatment) logic and parameters.
   date: 2021-12-28 00:00:01
 - bump: minor
   changes:
     added:
-    - Minimum benefit logic for SNAP.
+      - Minimum benefit logic for SNAP.
   date: 2021-12-28 00:00:02
 - bump: minor
   changes:
     added:
-    - Social Security taxation logic.
+      - Social Security taxation logic.
   date: 2021-12-28 00:00:03
 - bump: minor
   changes:
     added:
-    - Income-to-SMI (state median income) ratio.
+      - Income-to-SMI (state median income) ratio.
   date: 2021-12-28 00:00:04
 - bump: minor
   changes:
     added:
-    - American Opportunity (tax) Credit.
-    - Lifetime Learning (tax) Credit.
+      - American Opportunity (tax) Credit.
+      - Lifetime Learning (tax) Credit.
   date: 2021-12-30 00:00:00
 - bump: minor
   changes:
     added:
-    - Elderly and Disabled (tax) Credit.
+      - Elderly and Disabled (tax) Credit.
   date: 2021-12-30 00:00:01
 - bump: minor
   changes:
     added:
-    - Formula for Medicaid person type, based on age and dependents.
-    - Variable for whether a person meets their Medicaid income eligibility requirement.
+      - Formula for Medicaid person type, based on age and dependents.
+      - Variable for whether a person meets their Medicaid income eligibility requirement.
   date: 2021-12-31 00:00:00
 - bump: minor
   changes:
     added:
-    - SNAP eligibility based on federal net and gross income limits.
-    - Unit and integration tests for SNAP variables.
+      - SNAP eligibility based on federal net and gross income limits.
+      - Unit and integration tests for SNAP variables.
   date: 2022-01-03 00:00:00
 - bump: minor
   changes:
     added:
-    - Federal SNAP asset tests logic
+      - Federal SNAP asset tests logic
   date: 2022-01-03 00:00:01
 - bump: minor
   changes:
     added:
-    - CCDF subsidy top-level logic
+      - CCDF subsidy top-level logic
   date: 2022-01-03 00:00:02
 - bump: minor
   changes:
     added:
-    - Categorical eligibility for SNAP, including broad-based categorical eligibility
-      via low-cost TANF programs that effectively extend SNAP's asset and income limits.
+      - Categorical eligibility for SNAP, including broad-based categorical eligibility
+        via low-cost TANF programs that effectively extend SNAP's asset and income limits.
     changed:
-    - Refactored SNAP code.
+      - Refactored SNAP code.
   date: 2022-01-04 00:00:00
 - bump: patch
   changes:
     changed:
-    - Use USDA elderly and disabled definitions in SNAP calculations.
+      - Use USDA elderly and disabled definitions in SNAP calculations.
   date: 2022-01-06 00:00:00
 - bump: minor
   changes:
     added:
-    - Total child care market rate.
+      - Total child care market rate.
   date: 2022-01-06 00:00:01
 - bump: minor
   changes:
     added:
-    - Update child care market rate to annual.
+      - Update child care market rate to annual.
   date: 2022-01-06 00:00:02
 - bump: patch
   changes:
     added:
-    - Formulas for `childcare_hours_per_week` and `spm_unit_size`.
-    - Unit tests and units for some variables.
+      - Formulas for `childcare_hours_per_week` and `spm_unit_size`.
+      - Unit tests and units for some variables.
     changed:
-    - Reorganized variables.
+      - Reorganized variables.
   date: 2022-01-07 00:00:00
 - bump: patch
   changes:
     changed:
-    - Removes the `u` prefix from all variable label strings.
+      - Removes the `u` prefix from all variable label strings.
   date: 2022-01-08 00:00:00
 - bump: patch
   changes:
     added:
-    - Units to all tax variables.
+      - Units to all tax variables.
     changed:
-    - Adds one line between tests in yaml files.
-    - Use consistent imports in variable Python files.
+      - Adds one line between tests in yaml files.
+      - Use consistent imports in variable Python files.
     removed:
-    - C-TAM benefit variables in tax Python files.
-    - Erroneous formula for `eic` variable.
+      - C-TAM benefit variables in tax Python files.
+      - Erroneous formula for `eic` variable.
   date: 2022-01-08 00:00:01
 - bump: minor
   changes:
     added:
-    - Formula for initial TANF eligibility.
-    - 'Two new variables: `tanf_gross_earned_income` and `tanf_gross_unearned_income`.'
-    - Variable & parameter for `initial_employment_deduction`.
-    - Integration tests for TANF cash aid from TANF IL website.
+      - Formula for initial TANF eligibility.
+      - "Two new variables: `tanf_gross_earned_income` and `tanf_gross_unearned_income`."
+      - Variable & parameter for `initial_employment_deduction`.
+      - Integration tests for TANF cash aid from TANF IL website.
     changed:
-    - '`tanf_countable_income` now includes unearned income and earned income deduction.'
+      - "`tanf_countable_income` now includes unearned income and earned income deduction."
   date: 2022-01-09 00:00:00
 - bump: patch
   changes:
     fixed:
-    - Test runner failed to test string values.
+      - Test runner failed to test string values.
   date: 2022-01-12 00:00:00
 - bump: patch
   changes:
     added:
-    - Metadata for SNAP eligibility parameters.
+      - Metadata for SNAP eligibility parameters.
     fixed:
-    - Parameter misname in SNAP formula.
+      - Parameter misname in SNAP formula.
   date: 2022-01-14 00:00:00
 - bump: minor
   changes:
     added:
-    - Add CCDF copay formula.
+      - Add CCDF copay formula.
   date: 2022-01-14 00:00:01
 - bump: minor
   changes:
     added:
-    - Formula for SSI based on eligibility and amount if eligible.
+      - Formula for SSI based on eligibility and amount if eligible.
   date: 2022-01-14 00:00:02
 - bump: minor
   changes:
     fixed:
-    - Update CCDF subsidy formula.
+      - Update CCDF subsidy formula.
   date: 2022-01-15 00:00:00
 - bump: patch
   changes:
     fixed:
-    - Added links to version tag diffs in changelog.
+      - Added links to version tag diffs in changelog.
   date: 2022-01-15 00:00:01
 - bump: minor
   changes:
     added:
-    - Logic for SNAP excess medical deduction and dependent care deduction.
-    - Limit SNAP earned income deduction to earned income.
-    - Jupyter Book documentation on SNAP.
-    - Updated SNAP parameters.
-    - 'Empty variables for calculating SNAP: `employment_income`, `self_employment_income`,
-      `dividend_income`, `interest_income`, `childcare_expenses`, and `medical_out_of_pocket_expenses`.'
+      - Logic for SNAP excess medical deduction and dependent care deduction.
+      - Limit SNAP earned income deduction to earned income.
+      - Jupyter Book documentation on SNAP.
+      - Updated SNAP parameters.
+      - "Empty variables for calculating SNAP: `employment_income`, `self_employment_income`,
+        `dividend_income`, `interest_income`, `childcare_expenses`, and `medical_out_of_pocket_expenses`."
     changed:
-    - Significant refactoring of SNAP code.
-    - Use openfisca-tools for `add` and `aggr` functions, and pass lists of variables
-      to these function.
-    - Rename min/max SNAP benefit parameters and variables to use `allotment`.
+      - Significant refactoring of SNAP code.
+      - Use openfisca-tools for `add` and `aggr` functions, and pass lists of variables
+        to these function.
+      - Rename min/max SNAP benefit parameters and variables to use `allotment`.
   date: 2022-01-17 00:00:00
 - bump: patch
   changes:
     changed:
-    - Add metadata for variables and parameters used in SNAP calculations.
-    - Renames two parameters involved in SNAP deductions from `threshold` to `disregard`.
+      - Add metadata for variables and parameters used in SNAP calculations.
+      - Renames two parameters involved in SNAP deductions from `threshold` to `disregard`.
   date: 2022-01-17 00:00:01
 - bump: minor
   changes:
     added:
-    - Child Tax Credit (including adult dependents) parameters, logic and tests.
+      - Child Tax Credit (including adult dependents) parameters, logic and tests.
   date: 2022-01-17 00:00:02
 - bump: minor
   changes:
     added:
-    - Categorical eligibility to school meal subsidies.
-    - Documentation notebook on school meal subsidies.
-    - Parameterized income sources for school meal subsidies.
+      - Categorical eligibility to school meal subsidies.
+      - Documentation notebook on school meal subsidies.
+      - Parameterized income sources for school meal subsidies.
     changed:
-    - Count school meal subsidies by school enrollment rather than age.
-    - Remove `spm_unit_` prefix from school meal variables.
+      - Count school meal subsidies by school enrollment rather than age.
+      - Remove `spm_unit_` prefix from school meal variables.
   date: 2022-01-25 00:00:00
 - bump: minor
   changes:
     added:
-    - Child Tax Credit (and historical policy).
-    - Non-refundable and refundable credit handling in tax logic.
-    - Metadata for education credits and the EITC.
+      - Child Tax Credit (and historical policy).
+      - Non-refundable and refundable credit handling in tax logic.
+      - Metadata for education credits and the EITC.
     fixed:
-    - Bugs in head/spouse detection and nonrefundable credits.
+      - Bugs in head/spouse detection and nonrefundable credits.
   date: 2022-01-28 00:00:00
 - bump: patch
   changes:
     added:
-    - Metadata and variable aliases for key tax variables.
-    - Employment, self-employment, interest and dividend income as inputs to tax logic.
+      - Metadata and variable aliases for key tax variables.
+      - Employment, self-employment, interest and dividend income as inputs to tax logic.
   date: 2022-02-02 00:00:00
 - bump: patch
   changes:
     added:
-    - Added formula for TANF variable `continuous_tanf_eligibility`
-    - Added integration test for continuous TANF eligibility to `integration.yaml`
+      - Added formula for TANF variable `continuous_tanf_eligibility`
+      - Added integration test for continuous TANF eligibility to `integration.yaml`
   date: 2022-02-06 00:00:00
 - bump: minor
   changes:
     added:
-    - SNAP emergency allotments for California.
-    - SNAP unearned income example in JupyterBook docs.
+      - SNAP emergency allotments for California.
+      - SNAP unearned income example in JupyterBook docs.
   date: 2022-02-06 00:00:01
 - bump: minor
   changes:
     added:
-    - California Clean Vehicle Rebate Project.
+      - California Clean Vehicle Rebate Project.
   date: 2022-02-07 00:00:00
 - bump: minor
   changes:
     added:
-    - Guaranteed income / cash assistance pilot income variable. This counts as unearned
-      income for SNAP, uncounted for taxes and other benefits.
+      - Guaranteed income / cash assistance pilot income variable. This counts as unearned
+        income for SNAP, uncounted for taxes and other benefits.
   date: 2022-02-07 00:00:01
 - bump: patch
   changes:
     fixed:
-    - EITC logic and parameters for non-3-child tax units.
+      - EITC logic and parameters for non-3-child tax units.
   date: 2022-02-08 00:00:00
 - bump: patch
   changes:
     added:
-    - PolicyEngine metadata and notebook for Lifeline program.
-    - Formula for `irs_gross_income`, which Lifeline uses to calculate income-based
-      eligibility.
+      - PolicyEngine metadata and notebook for Lifeline program.
+      - Formula for `irs_gross_income`, which Lifeline uses to calculate income-based
+        eligibility.
   date: 2022-02-08 00:00:01
 - bump: patch
   changes:
     fixed:
-    - Add Lifeline notebook to table of contents.
+      - Add Lifeline notebook to table of contents.
   date: 2022-02-08 00:00:02
 - bump: minor
   changes:
     added:
-    - Income limits for 5 Maryland Medicaid coverage groups.
+      - Income limits for 5 Maryland Medicaid coverage groups.
   date: 2022-02-09 00:00:00
 - bump: minor
   changes:
     added:
-    - WIC program.
+      - WIC program.
     fixed:
-    - Include guaranteed income / cash assistance in market income.
+      - Include guaranteed income / cash assistance in market income.
   date: 2022-02-09 00:00:01
 - bump: patch
   changes:
     fixed:
-    - Change WIC display name from `WIC benefit value` to `WIC`.
+      - Change WIC display name from `WIC benefit value` to `WIC`.
   date: 2022-02-09 00:00:02
 - bump: patch
   changes:
     fixed:
-    - Specify WIC's unit as USD.
+      - Specify WIC's unit as USD.
   date: 2022-02-09 00:00:03
 - bump: patch
   changes:
     fixed:
-    - Remove guaranteed income / cash assistance from benefits.
+      - Remove guaranteed income / cash assistance from benefits.
   date: 2022-02-09 00:00:04
 - bump: patch
   changes:
     added:
-    - Categorical breakdown metadata infrastructure from OpenFisca-Tools.
+      - Categorical breakdown metadata infrastructure from OpenFisca-Tools.
   date: 2022-02-10 00:00:00
 - bump: patch
   changes:
     added:
-    - Chained CPI-U (monthly and August-only) parameters.
-    - Metadata for SNAP max allotment.
+      - Chained CPI-U (monthly and August-only) parameters.
+      - Metadata for SNAP max allotment.
   date: 2022-02-13 00:00:00
 - bump: patch
   changes:
     changed:
-    - OpenFisca-Tools constraint widened to the current major version.
+      - OpenFisca-Tools constraint widened to the current major version.
   date: 2022-02-16 00:00:00
 - bump: minor
   changes:
     added:
-    - Uprated tax parameters for federal income tax.
+      - Uprated tax parameters for federal income tax.
   date: 2022-02-21 00:00:00
 - bump: minor
   changes:
     added:
-    - Affordable Connectivity Program.
+      - Affordable Connectivity Program.
     changed:
-    - Split school meal subsidies into free and reduced-price.
+      - Split school meal subsidies into free and reduced-price.
   date: 2022-02-21 00:00:01
 - bump: minor
   changes:
     added:
-    - Rural Tribal supplement for Lifeline.
+      - Rural Tribal supplement for Lifeline.
     changed:
-    - Restructure ACP and EBB Tribal amounts to work with PolicyEngine.
+      - Restructure ACP and EBB Tribal amounts to work with PolicyEngine.
   date: 2022-02-21 00:00:02
 - bump: patch
   changes:
     changed:
-    - Edited labels for ACP and SNAP normal allotment.
+      - Edited labels for ACP and SNAP normal allotment.
   date: 2022-02-21 00:00:03
 - bump: patch
   changes:
     fixed:
-    - Subtract Lifeline from broadband cost before calculating ACP and EBB.
+      - Subtract Lifeline from broadband cost before calculating ACP and EBB.
   date: 2022-02-27 00:00:00
 - bump: patch
   changes:
     added:
-    - Code coverage badge to README.md.
-    - Reminder for pull requests to run `make format && make documentation`.
-    - CPI-uprated values for WIC average payments.
+      - Code coverage badge to README.md.
+      - Reminder for pull requests to run `make format && make documentation`.
+      - CPI-uprated values for WIC average payments.
     changed:
-    - Child Tax Credit names renamed to `ctc`.
-    - Child and Dependent Care Credit names renamed to `cdcc`.
+      - Child Tax Credit names renamed to `ctc`.
+      - Child and Dependent Care Credit names renamed to `cdcc`.
     fixed:
-    - EITC maximum age in 2021 changed from 125 to infinity.
+      - EITC maximum age in 2021 changed from 125 to infinity.
   date: 2022-02-28 00:00:00
 - bump: minor
   changes:
     added:
-    - Supplemental Security Income for individuals.
-    - Social Security input variables, counted as unearned income for several programs.
+      - Supplemental Security Income for individuals.
+      - Social Security input variables, counted as unearned income for several programs.
   date: 2022-03-04 00:00:00
 - bump: patch
   changes:
     changed:
-    - Adjust variable labels for consistency.
+      - Adjust variable labels for consistency.
   date: 2022-03-04 00:00:01
 - bump: minor
   changes:
     added:
-    - SNAP aggregate benefits and participation.
+      - SNAP aggregate benefits and participation.
   date: 2022-03-05 00:00:00
 - bump: patch
   changes:
     changed:
-    - Point `e02400` to `social_security` (for PolicyEngine).
+      - Point `e02400` to `social_security` (for PolicyEngine).
   date: 2022-03-07 00:00:00
 - bump: patch
   changes:
     added:
-    - '`spm_unit_weight` variable.'
+      - "`spm_unit_weight` variable."
     fixed:
-    - SNAP now uses the additional amounts where main rates are not available.
+      - SNAP now uses the additional amounts where main rates are not available.
   date: 2022-03-07 00:00:01
 - bump: patch
   changes:
     changed:
-    - '`is_married` moved from person-level to family-level, with a formula added.'
+      - "`is_married` moved from person-level to family-level, with a formula added."
   date: 2022-03-08 00:00:00
 - bump: patch
   changes:
     changed:
-    - IRS-published uprated income tax parameters for 2019-22.
+      - IRS-published uprated income tax parameters for 2019-22.
   date: 2022-03-09 00:00:00
 - bump: patch
   changes:
     added:
-    - February 2022 chained CPI-U.
+      - February 2022 chained CPI-U.
     changed:
-    - Simplified WIC uprating.
+      - Simplified WIC uprating.
   date: 2022-03-11 00:00:00
 - bump: patch
   changes:
     fixed:
-    - EITC uses the correct phase-in rate.
+      - EITC uses the correct phase-in rate.
   date: 2022-03-13 00:00:00
 - bump: patch
   changes:
     changed:
-    - Tax folder re-organised to improve modularity.
+      - Tax folder re-organised to improve modularity.
     fixed:
-    - A bug in AMT calculations.
+      - A bug in AMT calculations.
   date: 2022-03-16 21:22:44
 - bump: patch
   changes:
     fixed:
-    - Push action on GitHub correctly publishes.
+      - Push action on GitHub correctly publishes.
   date: 2022-03-16 20:29:58
 - bump: patch
   changes:
     fixed:
-    - Push action on GitHub correctly publishes.
+      - Push action on GitHub correctly publishes.
   date: 2022-03-16 21:22:44
 - bump: minor
   changes:
     changed:
-    - Added multiple parameters for California's TANF system.
-    - Refactored the TANF structure for easier implementation of other state TANF
-      programs.
+      - Added multiple parameters for California's TANF system.
+      - Refactored the TANF structure for easier implementation of other state TANF
+        programs.
   date: 2022-03-27 18:49:02
 - bump: patch
   changes:
     added:
-    - Page on TANF to documentation.
+      - Page on TANF to documentation.
   date: 2022-03-28 10:40:42
 - bump: patch
   changes:
     fixed:
-    - Versioning action didn't update `setup.py`.
+      - Versioning action didn't update `setup.py`.
   date: 2022-03-28 10:55:27
 - bump: minor
   changes:
     changed:
-    - Added `is_eitc_qualifying_child` variable to improve EITC child logic.
-    - Split `is_in_school` into `is_in_k12_school` and `is_full_time_student`.
+      - Added `is_eitc_qualifying_child` variable to improve EITC child logic.
+      - Split `is_in_school` into `is_in_k12_school` and `is_full_time_student`.
   date: 2022-03-28 11:34:53
 - bump: minor
   changes:
     added:
-    - Net income limits for SNAP BBCE (TANF) program.
-    - Legislative references for SNAP income limits.
+      - Net income limits for SNAP BBCE (TANF) program.
+      - Legislative references for SNAP income limits.
     removed:
-    - 165% SNAP gross income limit for separate elderly and disabled households (unused).
+      - 165% SNAP gross income limit for separate elderly and disabled households (unused).
   date: 2022-03-30 01:17:38
 - bump: minor
   changes:
     added:
-    - CDCC parameters for eligibility and metadata.
+      - CDCC parameters for eligibility and metadata.
     fixed:
-    - A bug where the CDCC would phase down too quickly.
+      - A bug where the CDCC would phase down too quickly.
   date: 2022-03-30 11:46:11
 - bump: patch
   changes:
     added:
-    - Parameter metadata for tax credits and payroll taxes.
+      - Parameter metadata for tax credits and payroll taxes.
   date: 2022-03-30 13:12:44
 - bump: patch
   changes:
     added:
-    - Added full-time college student variable.
+      - Added full-time college student variable.
   date: 2022-03-30 18:53:00
 - bump: minor
   changes:
     added:
-    - HUD adjusted income and dependent variables and logic.
+      - HUD adjusted income and dependent variables and logic.
   date: 2022-04-05 19:04:10
 - bump: patch
   changes:
     fixed:
-    - Point TANF parameter to state instead of region.
+      - Point TANF parameter to state instead of region.
   date: 2022-04-06 10:35:14
 - bump: minor
   changes:
     added:
-    - More recent Social Security payroll tax cap parameter values.
-    - Separate parameters for employer payroll taxes and self-employment taxes.
-    - Parameter for self-employment net earnings disregard.
-    - Unit tests and legislative references for payroll and self-employment tax variables.
+      - More recent Social Security payroll tax cap parameter values.
+      - Separate parameters for employer payroll taxes and self-employment taxes.
+      - Parameter for self-employment net earnings disregard.
+      - Unit tests and legislative references for payroll and self-employment tax variables.
     changed:
-    - Reorganized payroll and self-employment tax parameters and variables.
-    - Replaced large parameters with infinity and made number formatting consistent.
+      - Reorganized payroll and self-employment tax parameters and variables.
+      - Replaced large parameters with infinity and made number formatting consistent.
     removed:
-    - Reform-only `social_security.add_taxable_earnings` parameter.
-    - Unused `exact` variable.
-    - Variable for `social_security_taxes` (moved logic to `refundable_child_tax_credit`).
+      - Reform-only `social_security.add_taxable_earnings` parameter.
+      - Unused `exact` variable.
+      - Variable for `social_security_taxes` (moved logic to `refundable_child_tax_credit`).
   date: 2022-04-07 06:08:18
 - bump: patch
   changes:
     fixed:
-    - Refundable CTC formula works properly when phase-in rate increased (comments
-      added).
+      - Refundable CTC formula works properly when phase-in rate increased (comments
+        added).
   date: 2022-04-12 18:38:49
 - bump: minor
   changes:
     added:
-    - Capped non-refundable credits variable.
-    - Shortened labels for tax variables.
+      - Capped non-refundable credits variable.
+      - Shortened labels for tax variables.
   date: 2022-04-13 12:58:29
 - bump: minor
   changes:
     added:
-    - Microdata now handled entirely within OpenFisca-US.
+      - Microdata now handled entirely within OpenFisca-US.
   date: 2022-04-14 08:19:40
 - bump: patch
   changes:
     added:
-    - Legislative references for CDCC parameters.
+      - Legislative references for CDCC parameters.
     fixed:
-    - CDCC uses maximum dependent parameter.
+      - CDCC uses maximum dependent parameter.
   date: 2022-04-15 14:23:11
+- bump: minor
+  changes:
+    added:
+      - American Community Survey input.

From 9f276dacb55ad4e010528cff1b87f53f4abebfb3 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sat, 16 Apr 2022 13:28:25 -0700
Subject: [PATCH 12/27] make format and print types

---
 openfisca_us/data/datasets/acs/acs.py                 |  7 ++++---
 openfisca_us/data/datasets/acs/raw_acs.py             |  6 ++++--
 .../variables/demographic/household/vehicles_owned.py |  1 -
 .../variables/demographic/person/vehicles_owned.py    | 11 ++++++++---
 .../variables/income/spm_unit/spm_unit_fips.py        |  2 +-
 5 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py
index 15c976313d2..76e19dc2426 100644
--- a/openfisca_us/data/datasets/acs/acs.py
+++ b/openfisca_us/data/datasets/acs/acs.py
@@ -32,9 +32,10 @@ def generate(self, year: int) -> None:
             raw_data[entity] for entity in ("person", "spm_unit", "household")
         ]
         # Add primary and foreign keys
-        make_numeric = lambda x: int(
-            x.replace("2019GQ", "0").replace("2019HU", "1")
-        )
+
+        def make_numeric(x):
+            return int(x.replace("2019GQ", "0").replace("2019HU", "1"))
+
         household.SERIALNO = household.SERIALNO.apply(make_numeric)
         person.SERIALNO = person.SERIALNO.apply(make_numeric)
 
diff --git a/openfisca_us/data/datasets/acs/raw_acs.py b/openfisca_us/data/datasets/acs/raw_acs.py
index 5c145037ed9..69180f88850 100644
--- a/openfisca_us/data/datasets/acs/raw_acs.py
+++ b/openfisca_us/data/datasets/acs/raw_acs.py
@@ -171,10 +171,12 @@ def create_spm_unit_table(
     original_person_table = storage["person"]
     # Ensure that join keys are the same type.
     JOIN_COLUMNS = ["SERIALNO", "SPORDER"]
+    print(original_person_table[JOIN_COLUMNS].dtypes)
+    print(person[JOIN_COLUMNS].dtypes)
     original_person_table[JOIN_COLUMNS] = original_person_table[
         JOIN_COLUMNS
-    ].astype(int)
-    person[JOIN_COLUMNS] = person[JOIN_COLUMNS].astype(int)
+    ].astype(str)
+    person[JOIN_COLUMNS] = person[JOIN_COLUMNS].astype(str)
     # Add SPM_ID from the SPM person table to the original person table.
     combined_person_table = pd.merge(
         original_person_table,
diff --git a/openfisca_us/variables/demographic/household/vehicles_owned.py b/openfisca_us/variables/demographic/household/vehicles_owned.py
index 712d2e0cdc5..1dd8125571e 100644
--- a/openfisca_us/variables/demographic/household/vehicles_owned.py
+++ b/openfisca_us/variables/demographic/household/vehicles_owned.py
@@ -8,4 +8,3 @@ class household_vehicles_owned(Variable):
     unit = USD
     documentation = "Number of vehicles owned by the household"
     definition_period = YEAR
-
diff --git a/openfisca_us/variables/demographic/person/vehicles_owned.py b/openfisca_us/variables/demographic/person/vehicles_owned.py
index c7d0a826e4c..ce32ecf6e95 100644
--- a/openfisca_us/variables/demographic/person/vehicles_owned.py
+++ b/openfisca_us/variables/demographic/person/vehicles_owned.py
@@ -3,6 +3,7 @@
 from random import randint
 from openfisca_core.populations import Population
 
+
 class vehicles_owned(Variable):
     value_type = float
     entity = Person
@@ -22,9 +23,13 @@ def formula(person, period, parameters):
         vehicles = is_adult * 0
         for _ in range(int(max_vehicles)):
             # Pick a random adult in each household
-            selected_adult = randint(0, adult_rank[is_adult].max()) % num_adults_in_household
+            selected_adult = (
+                randint(0, adult_rank[is_adult].max())
+                % num_adults_in_household
+            )
             maximum_reached = household.sum(vehicles) >= household_vehicles
-            should_add_vehicle = ~maximum_reached & (adult_rank == selected_adult)
+            should_add_vehicle = ~maximum_reached & (
+                adult_rank == selected_adult
+            )
             vehicles += where(should_add_vehicle, 1, 0)
         return vehicles
-
diff --git a/openfisca_us/variables/income/spm_unit/spm_unit_fips.py b/openfisca_us/variables/income/spm_unit/spm_unit_fips.py
index 26e9a99b35a..5353f5014e3 100644
--- a/openfisca_us/variables/income/spm_unit/spm_unit_fips.py
+++ b/openfisca_us/variables/income/spm_unit/spm_unit_fips.py
@@ -8,4 +8,4 @@ class spm_unit_fips(Variable):
     definition_period = YEAR
 
     def formula(spm_unit, period, parameters):
-        return spm_unit.household("fips", period)
\ No newline at end of file
+        return spm_unit.household("fips", period)

From 2ae4c2718f21e5da4e1189d7debe34bd47aef07f Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Mon, 18 Apr 2022 16:13:51 +0100
Subject: [PATCH 13/27] Add FIPS handling

---
 .../demographic/geographic/state_name.py      | 57 +++++++++++++++++++
 openfisca_us/variables/irs/income/sources.py  |  1 +
 2 files changed, 58 insertions(+)

diff --git a/openfisca_us/variables/demographic/geographic/state_name.py b/openfisca_us/variables/demographic/geographic/state_name.py
index 4bf51168c97..9fea58de658 100644
--- a/openfisca_us/variables/demographic/geographic/state_name.py
+++ b/openfisca_us/variables/demographic/geographic/state_name.py
@@ -73,3 +73,60 @@ class state_name(Variable):
     entity = Household
     label = "State"
     definition_period = ETERNITY
+
+    def formula(household, period, parameters):
+        fips = household("fips", period)
+        return pd.Series(fips).map({
+            1: StateName.AL,
+            2: StateName.AK,
+            4: StateName.AZ,
+            5: StateName.AR,
+            6: StateName.CA,
+            8: StateName.CO,
+            9: StateName.CT,
+            10: StateName.DE,
+            11: StateName.DC,
+            12: StateName.FL,
+            13: StateName.GA,
+            15: StateName.HI,
+            16: StateName.ID,
+            17: StateName.IL,
+            18: StateName.IN,
+            19: StateName.IA,
+            20: StateName.KS,
+            21: StateName.KY,
+            22: StateName.LA,
+            23: StateName.ME,
+            24: StateName.MD,
+            25: StateName.MA,
+            26: StateName.MI,
+            27: StateName.MN,
+            28: StateName.MS,
+            29: StateName.MO,
+            30: StateName.MT,
+            31: StateName.NE,
+            32: StateName.NV,
+            33: StateName.NH,
+            34: StateName.NJ,
+            35: StateName.NM,
+            36: StateName.NY,
+            37: StateName.NC,
+            38: StateName.ND,
+            39: StateName.OH,
+            40: StateName.OK,
+            41: StateName.OR,
+            42: StateName.PA,
+            44: StateName.RI,
+            45: StateName.SC,
+            46: StateName.SD,
+            47: StateName.TN,
+            48: StateName.TX,
+            49: StateName.UT,
+            50: StateName.VT,
+            51: StateName.VA,
+            53: StateName.WA,
+            54: StateName.WV,
+            55: StateName.WI,
+            56: StateName.WY,
+            72: StateName.PR,
+        }).values
diff --git a/openfisca_us/variables/irs/income/sources.py b/openfisca_us/variables/irs/income/sources.py
index 55f14b4fcde..32f392b1d11 100644
--- a/openfisca_us/variables/irs/income/sources.py
+++ b/openfisca_us/variables/irs/income/sources.py
@@ -1012,6 +1012,7 @@ class fips(Variable):
     entity = Household
     definition_period = YEAR
     documentation = "FIPS state code (not used in tax-calculation logic)"
+    default_value = 1
 
 
 class h_seq(Variable):

From 8d7768d5e96e6351f834f53f31ea0462506b8f5b Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Mon, 18 Apr 2022 16:14:00 +0100
Subject: [PATCH 14/27] API fixes

---
 openfisca_us/api/microsimulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openfisca_us/api/microsimulation.py b/openfisca_us/api/microsimulation.py
index 3645ab3c406..32b13d8710c 100644
--- a/openfisca_us/api/microsimulation.py
+++ b/openfisca_us/api/microsimulation.py
@@ -9,7 +9,7 @@ class Microsimulation(GeneralMicrosimulation):
     entities = entities
     default_dataset = CPS
 
-    def __init__(self, reform=(), dataset: type = CPS, year: int = None):
+    def __init__(self, reform=(), dataset: type = CPS, year: int = None, **kwargs):
         if dataset == CPS and len(CPS.years) == 0:
             CPS.generate(2020)
 

From be0cbac89586d127ff9d17842c48472b4499eaca Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Mon, 18 Apr 2022 16:14:08 +0100
Subject: [PATCH 15/27] Add unit test for FIPS in CA

---
 .../policy/baseline/demographic/geographic/state_name.yaml | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 openfisca_us/tests/policy/baseline/demographic/geographic/state_name.yaml

diff --git a/openfisca_us/tests/policy/baseline/demographic/geographic/state_name.yaml b/openfisca_us/tests/policy/baseline/demographic/geographic/state_name.yaml
new file mode 100644
index 00000000000..3a2311773ed
--- /dev/null
+++ b/openfisca_us/tests/policy/baseline/demographic/geographic/state_name.yaml
@@ -0,0 +1,7 @@
+- name: California is decoded correctly from FIPS code.
+  period: 2020
+  absolute_error_margin: 0
+  input:
+    fips: 6
+  output:
+    state_name: CA

From cb36e7b8b9997799683449606d7e9e48d5c94001 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Mon, 18 Apr 2022 16:14:15 +0100
Subject: [PATCH 16/27] Add age

---
 openfisca_us/data/datasets/acs/acs.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py
index 3bfc1640d9d..77b37f5dcaf 100644
--- a/openfisca_us/data/datasets/acs/acs.py
+++ b/openfisca_us/data/datasets/acs/acs.py
@@ -36,14 +36,18 @@ def generate(self, year: int) -> None:
         ]
         # Add primary and foreign keys
         make_numeric = lambda x: int(x.replace("2019GQ", "0").replace("2019HU", "1"))
-        household.SERIALNO = household.SERIALNO.apply(make_numeric)
-        person.SERIALNO = person.SERIALNO.apply(make_numeric)
+        household.SERIALNO = household.SERIALNO.apply(make_numeric).astype(int)
+        person.SERIALNO = person.SERIALNO.apply(make_numeric).astype(int)
+        person.SPORDER = person.SPORDER.astype(int)
+        person.SPM_ID = person.SPM_ID.astype(int)
+        spm_unit.SPM_ID = spm_unit.SPM_ID.astype(int)
     
         person = person[person.SERIALNO.isin(household.SERIALNO)]
         household = household[household.SERIALNO.isin(person.SERIALNO)]
         spm_unit = spm_unit[spm_unit.SPM_ID.isin(person.SPM_ID)]
 
         add_id_variables(acs, person, spm_unit, household)
+        add_person_variables(acs, person)
         add_spm_variables(acs, spm_unit)
         add_household_variables(acs, household)
 
@@ -84,6 +88,8 @@ def add_id_variables(
     acs["person_weight"] = person.PWGTP
     acs["household_weight"] = household.WGTP
 
+def add_person_variables(acs: h5py.File, person: DataFrame):
+    acs["age"] = person.AGEP
 
 def add_spm_variables(acs: h5py.File, spm_unit: DataFrame):
     acs["spm_unit_net_income"] = spm_unit.SPM_RESOURCES

From e2db30d862eafca5fff58070a8f61f042f72f5e6 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Mon, 18 Apr 2022 16:14:25 +0100
Subject: [PATCH 17/27] Add basic income parameters with metadata

---
 .../basic_income/amount_by_age.yaml           | 47 +++++++++++++++++++
 .../ubi_center/basic_income/basic_income.py   | 15 ++++++
 2 files changed, 62 insertions(+)
 create mode 100644 openfisca_us/parameters/contrib/ubi_center/basic_income/amount_by_age.yaml
 create mode 100644 openfisca_us/variables/contrib/ubi_center/basic_income/basic_income.py

diff --git a/openfisca_us/parameters/contrib/ubi_center/basic_income/amount_by_age.yaml b/openfisca_us/parameters/contrib/ubi_center/basic_income/amount_by_age.yaml
new file mode 100644
index 00000000000..7f9dc4f0f08
--- /dev/null
+++ b/openfisca_us/parameters/contrib/ubi_center/basic_income/amount_by_age.yaml
@@ -0,0 +1,47 @@
+description: Basic income amounts by age.
+brackets:
+  - threshold:
+      2010-01-01: 0
+    amount:
+      description: Unconditional payment to children.
+      values:
+        2010-01-01: 0
+      metadata:
+        label: Child basic income
+        unit: currency-USD
+        period: year
+        name: child_bi
+  - threshold:
+      description: Age at which individuals receive the working-age adult payment, rather than the child payment.
+      values:
+        2010-01-01: 18
+      metadata:
+        label: Basic income child age
+        unit: year
+        name: adult_bi_age
+    amount:
+      description: Unconditional payment to working-age adults.
+      values:
+        2010-01-01: 0
+      metadata:
+        label: Adult basic income
+        unit: currency-USD
+        period: year
+        name: adult_bi
+  - threshold:
+      description: Age at which individuals receive the senior citizen payment, rather than the working-age adult payment.
+      values:
+        2010-01-01: 65
+      metadata:
+        label: Senior citizen basic income
+        unit: year
+        name: senior_bi_age
+    amount:
+      description: Unconditional payment to senior citizens.
+      values:
+        2010-01-01: 0
+      metadata:
+        label: Senior citizen basic income
+        unit: currency-USD
+        period: year
+        name: senior_bi
diff --git a/openfisca_us/variables/contrib/ubi_center/basic_income/basic_income.py b/openfisca_us/variables/contrib/ubi_center/basic_income/basic_income.py
new file mode 100644
index 00000000000..579bbbcc40e
--- /dev/null
+++ b/openfisca_us/variables/contrib/ubi_center/basic_income/basic_income.py
@@ -0,0 +1,15 @@
+from openfisca_us.model_api import *
+
+
+class basic_income(Variable):
+    value_type = float
+    entity = Person
+    label = "Basic income"
+    unit = USD
+    documentation = "Total basic income payments for this person."
+    definition_period = YEAR
+
+    def formula(person, period, parameters):
+        bi = parameters(period).contrib.ubi_center.basic_income
+        age = person("age", period)
+        return bi.amount_by_age.calc(age)
\ No newline at end of file

From f768a50c14297b81b7d8c7443cdba0857ce798da Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Mon, 18 Apr 2022 16:14:34 +0100
Subject: [PATCH 18/27] Fix weight and net income

---
 .../variables/demographic/weights/person_weight.py  |  3 +++
 .../demographic/weights/spm_unit_weight.py          |  4 ++++
 .../spm_unit/spm_unit_is_in_deep_spm_poverty.py     | 13 +++++++++++++
 .../income/spm_unit/spm_unit_net_income.py          |  5 +++++
 4 files changed, 25 insertions(+)
 create mode 100644 openfisca_us/variables/income/spm_unit/spm_unit_is_in_deep_spm_poverty.py

diff --git a/openfisca_us/variables/demographic/weights/person_weight.py b/openfisca_us/variables/demographic/weights/person_weight.py
index a2a9b276a73..86019fea0c0 100644
--- a/openfisca_us/variables/demographic/weights/person_weight.py
+++ b/openfisca_us/variables/demographic/weights/person_weight.py
@@ -6,3 +6,6 @@ class person_weight(Variable):
     entity = Person
     label = "Person weight"
     definition_period = YEAR
+
+    def formula(person, period, parameters):
+        return person.household("household_weight", period)
\ No newline at end of file
diff --git a/openfisca_us/variables/demographic/weights/spm_unit_weight.py b/openfisca_us/variables/demographic/weights/spm_unit_weight.py
index 44c1918fdc8..7f35cb95c60 100644
--- a/openfisca_us/variables/demographic/weights/spm_unit_weight.py
+++ b/openfisca_us/variables/demographic/weights/spm_unit_weight.py
@@ -6,3 +6,7 @@ class spm_unit_weight(Variable):
     entity = SPMUnit
     label = "SPM unit weight"
     definition_period = YEAR
+
+    def formula(spm_unit, period, parameters):
+        # Use household weights if not provided
+        return spm_unit.household("household_weight", period)
diff --git a/openfisca_us/variables/income/spm_unit/spm_unit_is_in_deep_spm_poverty.py b/openfisca_us/variables/income/spm_unit/spm_unit_is_in_deep_spm_poverty.py
new file mode 100644
index 00000000000..f3477012ac9
--- /dev/null
+++ b/openfisca_us/variables/income/spm_unit/spm_unit_is_in_deep_spm_poverty.py
@@ -0,0 +1,13 @@
+from openfisca_us.model_api import *
+
+
+class spm_unit_is_in_deep_spm_poverty(Variable):
+    value_type = bool
+    entity = SPMUnit
+    label = "SPM unit in deep SPM poverty"
+    definition_period = YEAR
+
+    def formula(spm_unit, period, parameters):
+        income = spm_unit("spm_unit_net_income", period)
+        poverty_threshold = spm_unit("spm_unit_spm_threshold", period) / 2
+        return income < poverty_threshold
diff --git a/openfisca_us/variables/income/spm_unit/spm_unit_net_income.py b/openfisca_us/variables/income/spm_unit/spm_unit_net_income.py
index f2802ff5137..992b321fdc5 100644
--- a/openfisca_us/variables/income/spm_unit/spm_unit_net_income.py
+++ b/openfisca_us/variables/income/spm_unit/spm_unit_net_income.py
@@ -9,6 +9,11 @@ class spm_unit_net_income(Variable):
     unit = USD
 
     def formula(spm_unit, period, parameters):
+        reported_net_income = spm_unit("spm_unit_net_income_reported", period)
+        if reported_net_income.sum() > 0:
+            # If we have reported net income, use that instead for now. This
+            # is only until the full microsimulation can be run.
+            return reported_net_income
         market_income = spm_unit("spm_unit_market_income", period)
         benefits = spm_unit("spm_unit_benefits", period)
         taxes = spm_unit("spm_unit_taxes", period)

From 3e360ed282f4c016599552e24a5278826f9637a3 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Mon, 18 Apr 2022 21:13:46 +0100
Subject: [PATCH 19/27] Default FIPS -> 6

---
 openfisca_us/variables/irs/income/sources.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openfisca_us/variables/irs/income/sources.py b/openfisca_us/variables/irs/income/sources.py
index 32f392b1d11..80bf0fedd5a 100644
--- a/openfisca_us/variables/irs/income/sources.py
+++ b/openfisca_us/variables/irs/income/sources.py
@@ -1012,7 +1012,7 @@ class fips(Variable):
     entity = Household
     definition_period = YEAR
     documentation = "FIPS state code (not used in tax-calculation logic)"
-    default_value = 1
+    default_value = 6
 
 
 class h_seq(Variable):

From 97610eb3bfd9e980bfb867ca4bdb16bd93dd1c4b Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Tue, 19 Apr 2022 09:33:08 +0100
Subject: [PATCH 20/27] Add state

---
 openfisca_us/data/datasets/acs/acs.py              |  2 +-
 openfisca_us/data/datasets/cps/cps.py              |  3 +++
 .../variables/demographic/spm_unit/spm_unit_id.py  | 14 ++++++++++++++
 setup.py                                           |  1 +
 4 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 openfisca_us/variables/demographic/spm_unit/spm_unit_id.py

diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py
index 50f6cd36772..6502af325c4 100644
--- a/openfisca_us/data/datasets/acs/acs.py
+++ b/openfisca_us/data/datasets/acs/acs.py
@@ -99,4 +99,4 @@ def add_spm_variables(acs: h5py.File, spm_unit: DataFrame):
 
 def add_household_variables(acs: h5py.File, household: DataFrame):
     acs["household_vehicles_owned"] = household.VEH
-    acs["fips"] = household.ST
+    acs["fips"] = acs["household_fips"] = household.ST
diff --git a/openfisca_us/data/datasets/cps/cps.py b/openfisca_us/data/datasets/cps/cps.py
index 509c82163c8..0994026ca3e 100644
--- a/openfisca_us/data/datasets/cps/cps.py
+++ b/openfisca_us/data/datasets/cps/cps.py
@@ -46,6 +46,7 @@ def generate(self, year: int):
         add_personal_variables(cps, person)
         add_personal_income_variables(cps, person)
         add_spm_variables(cps, spm_unit)
+        add_household_variables(cps, household)
 
         raw_data.close()
         cps.close()
@@ -166,5 +167,7 @@ def add_spm_variables(cps: h5py.File, spm_unit: DataFrame):
 
     cps["reduced_price_school_meals"] = cps["free_school_meals"][...] * 0
 
+def add_household_variables(cps: h5py.File, household: DataFrame):
+    cps["fips"] = household.GESTFIPS
 
 CPS = CPS()
diff --git a/openfisca_us/variables/demographic/spm_unit/spm_unit_id.py b/openfisca_us/variables/demographic/spm_unit/spm_unit_id.py
new file mode 100644
index 00000000000..10f201f14a4
--- /dev/null
+++ b/openfisca_us/variables/demographic/spm_unit/spm_unit_id.py
@@ -0,0 +1,14 @@
+from openfisca_us.model_api import *
+
+class spm_unit_id(Variable):
+    value_type = int
+    entity = SPMUnit
+    label = "SPM unit ID"
+    definition_period = YEAR
+
+class person_spm_unit_id(Variable):
+    value_type = int
+    entity = Person
+    label = "SPM unit ID"
+    definition_period = YEAR
+
diff --git a/setup.py b/setup.py
index eaba6c1c931..4370dda64e3 100644
--- a/setup.py
+++ b/setup.py
@@ -53,6 +53,7 @@
             "coverage",
             "plotly",
             "yaml-changelog>=0.1.6",
+            "python-us",
         ],
     },
     python_requires=">=3.7,<3.8",

From 069f3667de8c53349336b3afbf997366be8557f1 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Tue, 19 Apr 2022 09:38:25 +0100
Subject: [PATCH 21/27] Add ACS tests

---
 .../tests/microsimulation/data/acs/test_acs.py  | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 openfisca_us/tests/microsimulation/data/acs/test_acs.py

diff --git a/openfisca_us/tests/microsimulation/data/acs/test_acs.py b/openfisca_us/tests/microsimulation/data/acs/test_acs.py
new file mode 100644
index 00000000000..fc120aa18dd
--- /dev/null
+++ b/openfisca_us/tests/microsimulation/data/acs/test_acs.py
@@ -0,0 +1,17 @@
+from openfisca_us.data import ACS
+import pytest
+from openfisca_us import Microsimulation
+
+ACS_YEARS = [2019]
+
+
+@pytest.mark.dependency(name="acs")
+@pytest.mark.parametrize("year", ACS_YEARS)
+def test_cps_dataset_generates(year):
+    ACS.generate(year)
+
+
+@pytest.mark.dependency(depends=["acs"])
+@pytest.mark.parametrize("year", ACS_YEARS)
+def test_cps_openfisca_us_compatible(year):
+    Microsimulation(dataset=ACS, year=year).calc("tax")

From aa051af332dd64fb5587193fba4edac1aa461827 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Tue, 19 Apr 2022 09:38:48 +0100
Subject: [PATCH 22/27] Format

---
 openfisca_us/api/microsimulation.py           |   4 +-
 openfisca_us/data/datasets/acs/acs.py         |   2 +
 openfisca_us/data/datasets/cps/cps.py         |   2 +
 .../ubi_center/basic_income/basic_income.py   |   2 +-
 .../demographic/geographic/state_name.py      | 114 +++++++++---------
 .../demographic/spm_unit/spm_unit_id.py       |   3 +-
 .../demographic/weights/person_weight.py      |   2 +-
 7 files changed, 71 insertions(+), 58 deletions(-)

diff --git a/openfisca_us/api/microsimulation.py b/openfisca_us/api/microsimulation.py
index 32b13d8710c..a821787206b 100644
--- a/openfisca_us/api/microsimulation.py
+++ b/openfisca_us/api/microsimulation.py
@@ -9,7 +9,9 @@ class Microsimulation(GeneralMicrosimulation):
     entities = entities
     default_dataset = CPS
 
-    def __init__(self, reform=(), dataset: type = CPS, year: int = None, **kwargs):
+    def __init__(
+        self, reform=(), dataset: type = CPS, year: int = None, **kwargs
+    ):
         if dataset == CPS and len(CPS.years) == 0:
             CPS.generate(2020)
 
diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py
index 6502af325c4..52fac6bb861 100644
--- a/openfisca_us/data/datasets/acs/acs.py
+++ b/openfisca_us/data/datasets/acs/acs.py
@@ -89,9 +89,11 @@ def add_id_variables(
     acs["person_weight"] = person.PWGTP
     acs["household_weight"] = household.WGTP
 
+
 def add_person_variables(acs: h5py.File, person: DataFrame):
     acs["age"] = person.AGEP
 
+
 def add_spm_variables(acs: h5py.File, spm_unit: DataFrame):
     acs["spm_unit_net_income"] = spm_unit.SPM_RESOURCES
     acs["spm_unit_spm_threshold"] = spm_unit.SPM_POVTHRESHOLD
diff --git a/openfisca_us/data/datasets/cps/cps.py b/openfisca_us/data/datasets/cps/cps.py
index 0994026ca3e..81fcdd724e9 100644
--- a/openfisca_us/data/datasets/cps/cps.py
+++ b/openfisca_us/data/datasets/cps/cps.py
@@ -167,7 +167,9 @@ def add_spm_variables(cps: h5py.File, spm_unit: DataFrame):
 
     cps["reduced_price_school_meals"] = cps["free_school_meals"][...] * 0
 
+
 def add_household_variables(cps: h5py.File, household: DataFrame):
     cps["fips"] = household.GESTFIPS
 
+
 CPS = CPS()
diff --git a/openfisca_us/variables/contrib/ubi_center/basic_income/basic_income.py b/openfisca_us/variables/contrib/ubi_center/basic_income/basic_income.py
index 579bbbcc40e..e429154c7cc 100644
--- a/openfisca_us/variables/contrib/ubi_center/basic_income/basic_income.py
+++ b/openfisca_us/variables/contrib/ubi_center/basic_income/basic_income.py
@@ -12,4 +12,4 @@ class basic_income(Variable):
     def formula(person, period, parameters):
         bi = parameters(period).contrib.ubi_center.basic_income
         age = person("age", period)
-        return bi.amount_by_age.calc(age)
\ No newline at end of file
+        return bi.amount_by_age.calc(age)
diff --git a/openfisca_us/variables/demographic/geographic/state_name.py b/openfisca_us/variables/demographic/geographic/state_name.py
index 9fea58de658..516b37305ab 100644
--- a/openfisca_us/variables/demographic/geographic/state_name.py
+++ b/openfisca_us/variables/demographic/geographic/state_name.py
@@ -76,57 +76,63 @@ class state_name(Variable):
 
     def formula(household, period, parameters):
         fips = household("fips", period)
-        return pd.Series(fips).map({
-            1: StateName.AL,
-            2: StateName.AK,
-            4: StateName.AZ,
-            5: StateName.AR,
-            6: StateName.CA,
-            8: StateName.CO,
-            9: StateName.CT,
-            10: StateName.DE,
-            11: StateName.DC,
-            12: StateName.FL,
-            13: StateName.GA,
-            15: StateName.HI,
-            16: StateName.ID,
-            17: StateName.IL,
-            18: StateName.IN,
-            19: StateName.IA,
-            20: StateName.KS,
-            21: StateName.KY,
-            22: StateName.LA,
-            23: StateName.ME,
-            24: StateName.MD,
-            25: StateName.MA,
-            26: StateName.MI,
-            27: StateName.MN,
-            28: StateName.MS,
-            29: StateName.MO,
-            30: StateName.MT,
-            31: StateName.NE,
-            32: StateName.NV,
-            33: StateName.NH,
-            34: StateName.NJ,
-            35: StateName.NM,
-            36: StateName.NY,
-            37: StateName.NC,
-            38: StateName.ND,
-            39: StateName.OH,
-            40: StateName.OK,
-            41: StateName.OR,
-            42: StateName.PA,
-            44: StateName.RI,
-            45: StateName.SC,
-            46: StateName.SD,
-            47: StateName.TN,
-            48: StateName.TX,
-            49: StateName.UT,
-            50: StateName.VT,
-            51: StateName.VA,
-            53: StateName.WA,
-            54: StateName.WV,
-            55: StateName.WI,
-            56: StateName.WY,
-            72: StateName.PR,
-        }).values
+        return (
+            pd.Series(fips)
+            .map(
+                {
+                    1: StateName.AL,
+                    2: StateName.AK,
+                    4: StateName.AZ,
+                    5: StateName.AR,
+                    6: StateName.CA,
+                    8: StateName.CO,
+                    9: StateName.CT,
+                    10: StateName.DE,
+                    11: StateName.DC,
+                    12: StateName.FL,
+                    13: StateName.GA,
+                    15: StateName.HI,
+                    16: StateName.ID,
+                    17: StateName.IL,
+                    18: StateName.IN,
+                    19: StateName.IA,
+                    20: StateName.KS,
+                    21: StateName.KY,
+                    22: StateName.LA,
+                    23: StateName.ME,
+                    24: StateName.MD,
+                    25: StateName.MA,
+                    26: StateName.MI,
+                    27: StateName.MN,
+                    28: StateName.MS,
+                    29: StateName.MO,
+                    30: StateName.MT,
+                    31: StateName.NE,
+                    32: StateName.NV,
+                    33: StateName.NH,
+                    34: StateName.NJ,
+                    35: StateName.NM,
+                    36: StateName.NY,
+                    37: StateName.NC,
+                    38: StateName.ND,
+                    39: StateName.OH,
+                    40: StateName.OK,
+                    41: StateName.OR,
+                    42: StateName.PA,
+                    44: StateName.RI,
+                    45: StateName.SC,
+                    46: StateName.SD,
+                    47: StateName.TN,
+                    48: StateName.TX,
+                    49: StateName.UT,
+                    50: StateName.VT,
+                    51: StateName.VA,
+                    53: StateName.WA,
+                    54: StateName.WV,
+                    55: StateName.WI,
+                    56: StateName.WY,
+                    72: StateName.PR,
+                }
+            )
+            .values
+        )
diff --git a/openfisca_us/variables/demographic/spm_unit/spm_unit_id.py b/openfisca_us/variables/demographic/spm_unit/spm_unit_id.py
index 10f201f14a4..c516a6d643e 100644
--- a/openfisca_us/variables/demographic/spm_unit/spm_unit_id.py
+++ b/openfisca_us/variables/demographic/spm_unit/spm_unit_id.py
@@ -1,14 +1,15 @@
 from openfisca_us.model_api import *
 
+
 class spm_unit_id(Variable):
     value_type = int
     entity = SPMUnit
     label = "SPM unit ID"
     definition_period = YEAR
 
+
 class person_spm_unit_id(Variable):
     value_type = int
     entity = Person
     label = "SPM unit ID"
     definition_period = YEAR
-
diff --git a/openfisca_us/variables/demographic/weights/person_weight.py b/openfisca_us/variables/demographic/weights/person_weight.py
index 86019fea0c0..9ca2ae46d18 100644
--- a/openfisca_us/variables/demographic/weights/person_weight.py
+++ b/openfisca_us/variables/demographic/weights/person_weight.py
@@ -8,4 +8,4 @@ class person_weight(Variable):
     definition_period = YEAR
 
     def formula(person, period, parameters):
-        return person.household("household_weight", period)
\ No newline at end of file
+        return person.household("household_weight", period)

From 9f2f99f18fc90a940ee451beca56413601744d17 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Tue, 19 Apr 2022 09:49:27 +0100
Subject: [PATCH 23/27] Fix dep

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 4370dda64e3..b83e13ae9e5 100644
--- a/setup.py
+++ b/setup.py
@@ -53,7 +53,7 @@
             "coverage",
             "plotly",
             "yaml-changelog>=0.1.6",
-            "python-us",
+            "us",
         ],
     },
     python_requires=">=3.7,<3.8",

From 26b4ed2c506510cb4912c1a06c60c254aeebe6ae Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Tue, 19 Apr 2022 09:52:23 +0100
Subject: [PATCH 24/27] Add employment income

---
 openfisca_us/data/datasets/acs/acs.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py
index 52fac6bb861..d3ff13a23fd 100644
--- a/openfisca_us/data/datasets/acs/acs.py
+++ b/openfisca_us/data/datasets/acs/acs.py
@@ -92,6 +92,8 @@ def add_id_variables(
 
 def add_person_variables(acs: h5py.File, person: DataFrame):
     acs["age"] = person.AGEP
+    acs["employment_income"] = person.WAGP
+    acs["self_employment_income"] = person.SEMP
 
 
 def add_spm_variables(acs: h5py.File, spm_unit: DataFrame):

From 1291e033386d5affc4c084c68d82e16902861ad5 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Tue, 19 Apr 2022 10:09:54 +0100
Subject: [PATCH 25/27] Add type

---
 openfisca_us/data/datasets/acs/acs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py
index d3ff13a23fd..7177c3f701e 100644
--- a/openfisca_us/data/datasets/acs/acs.py
+++ b/openfisca_us/data/datasets/acs/acs.py
@@ -36,8 +36,8 @@ def generate(self, year: int) -> None:
         def make_numeric(x):
             return int(x.replace("2019GQ", "0").replace("2019HU", "1"))
 
-        household.SERIALNO = household.SERIALNO.apply(make_numeric)
-        person.SERIALNO = person.SERIALNO.apply(make_numeric)
+        household.SERIALNO = household.SERIALNO.apply(make_numeric).astype(int)
+        person.SERIALNO = person.SERIALNO.apply(make_numeric).astype(int)
         person.SPORDER = person.SPORDER.astype(int)
         person.SPM_ID = person.SPM_ID.astype(int)
         spm_unit.SPM_ID = spm_unit.SPM_ID.astype(int)

From 168d7e672b7595e1bb486edc312272ed8e871a3f Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Tue, 19 Apr 2022 11:03:19 +0100
Subject: [PATCH 26/27] Fix linking bugs

---
 openfisca_us/data/datasets/acs/acs.py     | 17 ++++++++++-----
 openfisca_us/data/datasets/acs/raw_acs.py | 26 +++++++++++++++++------
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py
index 7177c3f701e..b0a0a80e4c5 100644
--- a/openfisca_us/data/datasets/acs/acs.py
+++ b/openfisca_us/data/datasets/acs/acs.py
@@ -1,3 +1,4 @@
+import logging
 from openfisca_tools.data import PublicDataset
 import h5py
 from openfisca_us.data.datasets.acs.raw_acs import RawACS
@@ -33,17 +34,23 @@ def generate(self, year: int) -> None:
         ]
         # Add primary and foreign keys
 
-        def make_numeric(x):
-            return int(x.replace("2019GQ", "0").replace("2019HU", "1"))
-
-        household.SERIALNO = household.SERIALNO.apply(make_numeric).astype(int)
-        person.SERIALNO = person.SERIALNO.apply(make_numeric).astype(int)
+        household.SERIALNO = household.SERIALNO.astype(int)
+        person.SERIALNO = person.SERIALNO.astype(int)
         person.SPORDER = person.SPORDER.astype(int)
         person.SPM_ID = person.SPM_ID.astype(int)
         spm_unit.SPM_ID = spm_unit.SPM_ID.astype(int)
 
+        logging.info(
+            f"Persons with a linked household {person.SERIALNO.isin(household.SERIALNO).mean():.1%}"
+        )
         person = person[person.SERIALNO.isin(household.SERIALNO)]
+        logging.info(
+            f"Households with a linked person {household.SERIALNO.isin(person.SERIALNO).mean():.1%}"
+        )
         household = household[household.SERIALNO.isin(person.SERIALNO)]
+        logging.info(
+            f"SPM units with a linked person {spm_unit.SPM_ID.isin(person.SPM_ID).mean():.1%}"
+        )
         spm_unit = spm_unit[spm_unit.SPM_ID.isin(person.SPM_ID)]
 
         add_id_variables(acs, person, spm_unit, household)
diff --git a/openfisca_us/data/datasets/acs/raw_acs.py b/openfisca_us/data/datasets/acs/raw_acs.py
index 69180f88850..0d14319715a 100644
--- a/openfisca_us/data/datasets/acs/raw_acs.py
+++ b/openfisca_us/data/datasets/acs/raw_acs.py
@@ -75,14 +75,27 @@ def generate(self, year: int) -> None:
             with pd.HDFStore(RawACS.file(year)) as storage:
                 # Household file
                 logging.info(f"Downloading household file")
-                storage["household"] = concat_zipped_csvs(
+                household = concat_zipped_csvs(
                     household_url, "psam_hus", HOUSEHOLD_COLUMNS
                 )
+                # Remove group quarters (zero weight)
+                household = household[
+                    ~household.SERIALNO.str.contains("2019GQ")
+                ]
+                household["SERIALNO"] = household["SERIALNO"].apply(
+                    lambda x: int(x.replace("2019HU", ""))
+                )
+                storage["household"] = household
                 # Person file
                 logging.info(f"Downloading person file")
-                storage["person"] = concat_zipped_csvs(
+                person = concat_zipped_csvs(
                     person_url, "psam_pus", PERSON_COLUMNS
                 )
+                person = person[~person.SERIALNO.str.contains("2019GQ")]
+                person["SERIALNO"] = person["SERIALNO"].apply(
+                    lambda x: int(x.replace("2019HU", ""))
+                )
+                storage["person"] = person
                 # SPM unit file
                 logging.info(f"Downloading SPM unit file")
                 spm_person = pd.read_stata(spm_url).fillna(0)
@@ -90,9 +103,10 @@ def generate(self, year: int) -> None:
                 create_spm_unit_table(storage, spm_person)
         except Exception as e:
             RawACS.remove(year)
-            raise ValueError(
+            logging.error(
                 f"Attempted to extract and save the CSV files, but encountered an error: {e}"
             )
+            raise e
 
 
 RawACS = RawACS()
@@ -171,12 +185,10 @@ def create_spm_unit_table(
     original_person_table = storage["person"]
     # Ensure that join keys are the same type.
     JOIN_COLUMNS = ["SERIALNO", "SPORDER"]
-    print(original_person_table[JOIN_COLUMNS].dtypes)
-    print(person[JOIN_COLUMNS].dtypes)
     original_person_table[JOIN_COLUMNS] = original_person_table[
         JOIN_COLUMNS
-    ].astype(str)
-    person[JOIN_COLUMNS] = person[JOIN_COLUMNS].astype(str)
+    ].astype(int)
+    person[JOIN_COLUMNS] = person[JOIN_COLUMNS].astype(int)
     # Add SPM_ID from the SPM person table to the original person table.
     combined_person_table = pd.merge(
         original_person_table,

From 3a24bbb225ca3a4d0caff8a6861dc4295abb488d Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Tue, 19 Apr 2022 11:12:28 +0100
Subject: [PATCH 27/27] Remove datasets before building

---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index 9ecfbcc6111..de644bf86b1 100644
--- a/Makefile
+++ b/Makefile
@@ -13,6 +13,7 @@ test:
 documentation:
 	jb build docs/book
 build:
+	rm openfisca_us/data/storage/*.h5
 	python setup.py sdist bdist_wheel
 
 changelog: