diff --git a/Makefile b/Makefile index 9ecfbcc6111..de644bf86b1 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ test: documentation: jb build docs/book build: + rm openfisca_us/data/storage/*.h5 python setup.py sdist bdist_wheel changelog: diff --git a/changelog.yaml b/changelog.yaml index c546c6ccc61..98d4a7adaf6 100644 --- a/changelog.yaml +++ b/changelog.yaml @@ -1,531 +1,535 @@ - changes: added: - - First prototype version with a standard deduction variable. + - First prototype version with a standard deduction variable. date: 2021-06-28 00:00:00 version: 0.0.1 - bump: minor changes: added: - - Prototype with some tax implementations. + - Prototype with some tax implementations. date: 2021-12-25 00:00:00 - bump: minor changes: added: - - Tax variables, some benefit variables. + - Tax variables, some benefit variables. date: 2021-12-25 00:00:01 - bump: minor changes: added: - - Lifeline benefit. + - Lifeline benefit. date: 2021-12-25 00:00:02 - bump: patch changes: added: - - Automated tests. + - Automated tests. date: 2021-12-25 00:00:03 - bump: minor changes: added: - - TANF eligibility, broken down into demographic and financial variables, with - financial separated by current enrollment in program. - - Demographic TANF eligibility per IL rules. + - TANF eligibility, broken down into demographic and financial variables, with + financial separated by current enrollment in program. + - Demographic TANF eligibility per IL rules. date: 2021-12-26 00:00:00 - bump: minor changes: added: - - Medicaid income thresholds for California. + - Medicaid income thresholds for California. date: 2021-12-27 00:00:00 - bump: minor changes: added: - - Alternative Minimum Tax (AMT) income and liability logic. - - Development tools for auto-generating unit tests for Tax-Calculator functions. + - Alternative Minimum Tax (AMT) income and liability logic. + - Development tools for auto-generating unit tests for Tax-Calculator functions. date: 2021-12-28 00:00:00 - bump: minor changes: added: - - Gains Tax (capital gains treatment) logic and parameters. + - Gains Tax (capital gains treatment) logic and parameters. date: 2021-12-28 00:00:01 - bump: minor changes: added: - - Minimum benefit logic for SNAP. + - Minimum benefit logic for SNAP. date: 2021-12-28 00:00:02 - bump: minor changes: added: - - Social Security taxation logic. + - Social Security taxation logic. date: 2021-12-28 00:00:03 - bump: minor changes: added: - - Income-to-SMI (state median income) ratio. + - Income-to-SMI (state median income) ratio. date: 2021-12-28 00:00:04 - bump: minor changes: added: - - American Opportunity (tax) Credit. - - Lifetime Learning (tax) Credit. + - American Opportunity (tax) Credit. + - Lifetime Learning (tax) Credit. date: 2021-12-30 00:00:00 - bump: minor changes: added: - - Elderly and Disabled (tax) Credit. + - Elderly and Disabled (tax) Credit. date: 2021-12-30 00:00:01 - bump: minor changes: added: - - Formula for Medicaid person type, based on age and dependents. - - Variable for whether a person meets their Medicaid income eligibility requirement. + - Formula for Medicaid person type, based on age and dependents. + - Variable for whether a person meets their Medicaid income eligibility requirement. date: 2021-12-31 00:00:00 - bump: minor changes: added: - - SNAP eligibility based on federal net and gross income limits. - - Unit and integration tests for SNAP variables. + - SNAP eligibility based on federal net and gross income limits. + - Unit and integration tests for SNAP variables. date: 2022-01-03 00:00:00 - bump: minor changes: added: - - Federal SNAP asset tests logic + - Federal SNAP asset tests logic date: 2022-01-03 00:00:01 - bump: minor changes: added: - - CCDF subsidy top-level logic + - CCDF subsidy top-level logic date: 2022-01-03 00:00:02 - bump: minor changes: added: - - Categorical eligibility for SNAP, including broad-based categorical eligibility - via low-cost TANF programs that effectively extend SNAP's asset and income limits. + - Categorical eligibility for SNAP, including broad-based categorical eligibility + via low-cost TANF programs that effectively extend SNAP's asset and income limits. changed: - - Refactored SNAP code. + - Refactored SNAP code. date: 2022-01-04 00:00:00 - bump: patch changes: changed: - - Use USDA elderly and disabled definitions in SNAP calculations. + - Use USDA elderly and disabled definitions in SNAP calculations. date: 2022-01-06 00:00:00 - bump: minor changes: added: - - Total child care market rate. + - Total child care market rate. date: 2022-01-06 00:00:01 - bump: minor changes: added: - - Update child care market rate to annual. + - Update child care market rate to annual. date: 2022-01-06 00:00:02 - bump: patch changes: added: - - Formulas for `childcare_hours_per_week` and `spm_unit_size`. - - Unit tests and units for some variables. + - Formulas for `childcare_hours_per_week` and `spm_unit_size`. + - Unit tests and units for some variables. changed: - - Reorganized variables. + - Reorganized variables. date: 2022-01-07 00:00:00 - bump: patch changes: changed: - - Removes the `u` prefix from all variable label strings. + - Removes the `u` prefix from all variable label strings. date: 2022-01-08 00:00:00 - bump: patch changes: added: - - Units to all tax variables. + - Units to all tax variables. changed: - - Adds one line between tests in yaml files. - - Use consistent imports in variable Python files. + - Adds one line between tests in yaml files. + - Use consistent imports in variable Python files. removed: - - C-TAM benefit variables in tax Python files. - - Erroneous formula for `eic` variable. + - C-TAM benefit variables in tax Python files. + - Erroneous formula for `eic` variable. date: 2022-01-08 00:00:01 - bump: minor changes: added: - - Formula for initial TANF eligibility. - - 'Two new variables: `tanf_gross_earned_income` and `tanf_gross_unearned_income`.' - - Variable & parameter for `initial_employment_deduction`. - - Integration tests for TANF cash aid from TANF IL website. + - Formula for initial TANF eligibility. + - "Two new variables: `tanf_gross_earned_income` and `tanf_gross_unearned_income`." + - Variable & parameter for `initial_employment_deduction`. + - Integration tests for TANF cash aid from TANF IL website. changed: - - '`tanf_countable_income` now includes unearned income and earned income deduction.' + - "`tanf_countable_income` now includes unearned income and earned income deduction." date: 2022-01-09 00:00:00 - bump: patch changes: fixed: - - Test runner failed to test string values. + - Test runner failed to test string values. date: 2022-01-12 00:00:00 - bump: patch changes: added: - - Metadata for SNAP eligibility parameters. + - Metadata for SNAP eligibility parameters. fixed: - - Parameter misname in SNAP formula. + - Parameter misname in SNAP formula. date: 2022-01-14 00:00:00 - bump: minor changes: added: - - Add CCDF copay formula. + - Add CCDF copay formula. date: 2022-01-14 00:00:01 - bump: minor changes: added: - - Formula for SSI based on eligibility and amount if eligible. + - Formula for SSI based on eligibility and amount if eligible. date: 2022-01-14 00:00:02 - bump: minor changes: fixed: - - Update CCDF subsidy formula. + - Update CCDF subsidy formula. date: 2022-01-15 00:00:00 - bump: patch changes: fixed: - - Added links to version tag diffs in changelog. + - Added links to version tag diffs in changelog. date: 2022-01-15 00:00:01 - bump: minor changes: added: - - Logic for SNAP excess medical deduction and dependent care deduction. - - Limit SNAP earned income deduction to earned income. - - Jupyter Book documentation on SNAP. - - Updated SNAP parameters. - - 'Empty variables for calculating SNAP: `employment_income`, `self_employment_income`, - `dividend_income`, `interest_income`, `childcare_expenses`, and `medical_out_of_pocket_expenses`.' + - Logic for SNAP excess medical deduction and dependent care deduction. + - Limit SNAP earned income deduction to earned income. + - Jupyter Book documentation on SNAP. + - Updated SNAP parameters. + - "Empty variables for calculating SNAP: `employment_income`, `self_employment_income`, + `dividend_income`, `interest_income`, `childcare_expenses`, and `medical_out_of_pocket_expenses`." changed: - - Significant refactoring of SNAP code. - - Use openfisca-tools for `add` and `aggr` functions, and pass lists of variables - to these function. - - Rename min/max SNAP benefit parameters and variables to use `allotment`. + - Significant refactoring of SNAP code. + - Use openfisca-tools for `add` and `aggr` functions, and pass lists of variables + to these function. + - Rename min/max SNAP benefit parameters and variables to use `allotment`. date: 2022-01-17 00:00:00 - bump: patch changes: changed: - - Add metadata for variables and parameters used in SNAP calculations. - - Renames two parameters involved in SNAP deductions from `threshold` to `disregard`. + - Add metadata for variables and parameters used in SNAP calculations. + - Renames two parameters involved in SNAP deductions from `threshold` to `disregard`. date: 2022-01-17 00:00:01 - bump: minor changes: added: - - Child Tax Credit (including adult dependents) parameters, logic and tests. + - Child Tax Credit (including adult dependents) parameters, logic and tests. date: 2022-01-17 00:00:02 - bump: minor changes: added: - - Categorical eligibility to school meal subsidies. - - Documentation notebook on school meal subsidies. - - Parameterized income sources for school meal subsidies. + - Categorical eligibility to school meal subsidies. + - Documentation notebook on school meal subsidies. + - Parameterized income sources for school meal subsidies. changed: - - Count school meal subsidies by school enrollment rather than age. - - Remove `spm_unit_` prefix from school meal variables. + - Count school meal subsidies by school enrollment rather than age. + - Remove `spm_unit_` prefix from school meal variables. date: 2022-01-25 00:00:00 - bump: minor changes: added: - - Child Tax Credit (and historical policy). - - Non-refundable and refundable credit handling in tax logic. - - Metadata for education credits and the EITC. + - Child Tax Credit (and historical policy). + - Non-refundable and refundable credit handling in tax logic. + - Metadata for education credits and the EITC. fixed: - - Bugs in head/spouse detection and nonrefundable credits. + - Bugs in head/spouse detection and nonrefundable credits. date: 2022-01-28 00:00:00 - bump: patch changes: added: - - Metadata and variable aliases for key tax variables. - - Employment, self-employment, interest and dividend income as inputs to tax logic. + - Metadata and variable aliases for key tax variables. + - Employment, self-employment, interest and dividend income as inputs to tax logic. date: 2022-02-02 00:00:00 - bump: patch changes: added: - - Added formula for TANF variable `continuous_tanf_eligibility` - - Added integration test for continuous TANF eligibility to `integration.yaml` + - Added formula for TANF variable `continuous_tanf_eligibility` + - Added integration test for continuous TANF eligibility to `integration.yaml` date: 2022-02-06 00:00:00 - bump: minor changes: added: - - SNAP emergency allotments for California. - - SNAP unearned income example in JupyterBook docs. + - SNAP emergency allotments for California. + - SNAP unearned income example in JupyterBook docs. date: 2022-02-06 00:00:01 - bump: minor changes: added: - - California Clean Vehicle Rebate Project. + - California Clean Vehicle Rebate Project. date: 2022-02-07 00:00:00 - bump: minor changes: added: - - Guaranteed income / cash assistance pilot income variable. This counts as unearned - income for SNAP, uncounted for taxes and other benefits. + - Guaranteed income / cash assistance pilot income variable. This counts as unearned + income for SNAP, uncounted for taxes and other benefits. date: 2022-02-07 00:00:01 - bump: patch changes: fixed: - - EITC logic and parameters for non-3-child tax units. + - EITC logic and parameters for non-3-child tax units. date: 2022-02-08 00:00:00 - bump: patch changes: added: - - PolicyEngine metadata and notebook for Lifeline program. - - Formula for `irs_gross_income`, which Lifeline uses to calculate income-based - eligibility. + - PolicyEngine metadata and notebook for Lifeline program. + - Formula for `irs_gross_income`, which Lifeline uses to calculate income-based + eligibility. date: 2022-02-08 00:00:01 - bump: patch changes: fixed: - - Add Lifeline notebook to table of contents. + - Add Lifeline notebook to table of contents. date: 2022-02-08 00:00:02 - bump: minor changes: added: - - Income limits for 5 Maryland Medicaid coverage groups. + - Income limits for 5 Maryland Medicaid coverage groups. date: 2022-02-09 00:00:00 - bump: minor changes: added: - - WIC program. + - WIC program. fixed: - - Include guaranteed income / cash assistance in market income. + - Include guaranteed income / cash assistance in market income. date: 2022-02-09 00:00:01 - bump: patch changes: fixed: - - Change WIC display name from `WIC benefit value` to `WIC`. + - Change WIC display name from `WIC benefit value` to `WIC`. date: 2022-02-09 00:00:02 - bump: patch changes: fixed: - - Specify WIC's unit as USD. + - Specify WIC's unit as USD. date: 2022-02-09 00:00:03 - bump: patch changes: fixed: - - Remove guaranteed income / cash assistance from benefits. + - Remove guaranteed income / cash assistance from benefits. date: 2022-02-09 00:00:04 - bump: patch changes: added: - - Categorical breakdown metadata infrastructure from OpenFisca-Tools. + - Categorical breakdown metadata infrastructure from OpenFisca-Tools. date: 2022-02-10 00:00:00 - bump: patch changes: added: - - Chained CPI-U (monthly and August-only) parameters. - - Metadata for SNAP max allotment. + - Chained CPI-U (monthly and August-only) parameters. + - Metadata for SNAP max allotment. date: 2022-02-13 00:00:00 - bump: patch changes: changed: - - OpenFisca-Tools constraint widened to the current major version. + - OpenFisca-Tools constraint widened to the current major version. date: 2022-02-16 00:00:00 - bump: minor changes: added: - - Uprated tax parameters for federal income tax. + - Uprated tax parameters for federal income tax. date: 2022-02-21 00:00:00 - bump: minor changes: added: - - Affordable Connectivity Program. + - Affordable Connectivity Program. changed: - - Split school meal subsidies into free and reduced-price. + - Split school meal subsidies into free and reduced-price. date: 2022-02-21 00:00:01 - bump: minor changes: added: - - Rural Tribal supplement for Lifeline. + - Rural Tribal supplement for Lifeline. changed: - - Restructure ACP and EBB Tribal amounts to work with PolicyEngine. + - Restructure ACP and EBB Tribal amounts to work with PolicyEngine. date: 2022-02-21 00:00:02 - bump: patch changes: changed: - - Edited labels for ACP and SNAP normal allotment. + - Edited labels for ACP and SNAP normal allotment. date: 2022-02-21 00:00:03 - bump: patch changes: fixed: - - Subtract Lifeline from broadband cost before calculating ACP and EBB. + - Subtract Lifeline from broadband cost before calculating ACP and EBB. date: 2022-02-27 00:00:00 - bump: patch changes: added: - - Code coverage badge to README.md. - - Reminder for pull requests to run `make format && make documentation`. - - CPI-uprated values for WIC average payments. + - Code coverage badge to README.md. + - Reminder for pull requests to run `make format && make documentation`. + - CPI-uprated values for WIC average payments. changed: - - Child Tax Credit names renamed to `ctc`. - - Child and Dependent Care Credit names renamed to `cdcc`. + - Child Tax Credit names renamed to `ctc`. + - Child and Dependent Care Credit names renamed to `cdcc`. fixed: - - EITC maximum age in 2021 changed from 125 to infinity. + - EITC maximum age in 2021 changed from 125 to infinity. date: 2022-02-28 00:00:00 - bump: minor changes: added: - - Supplemental Security Income for individuals. - - Social Security input variables, counted as unearned income for several programs. + - Supplemental Security Income for individuals. + - Social Security input variables, counted as unearned income for several programs. date: 2022-03-04 00:00:00 - bump: patch changes: changed: - - Adjust variable labels for consistency. + - Adjust variable labels for consistency. date: 2022-03-04 00:00:01 - bump: minor changes: added: - - SNAP aggregate benefits and participation. + - SNAP aggregate benefits and participation. date: 2022-03-05 00:00:00 - bump: patch changes: changed: - - Point `e02400` to `social_security` (for PolicyEngine). + - Point `e02400` to `social_security` (for PolicyEngine). date: 2022-03-07 00:00:00 - bump: patch changes: added: - - '`spm_unit_weight` variable.' + - "`spm_unit_weight` variable." fixed: - - SNAP now uses the additional amounts where main rates are not available. + - SNAP now uses the additional amounts where main rates are not available. date: 2022-03-07 00:00:01 - bump: patch changes: changed: - - '`is_married` moved from person-level to family-level, with a formula added.' + - "`is_married` moved from person-level to family-level, with a formula added." date: 2022-03-08 00:00:00 - bump: patch changes: changed: - - IRS-published uprated income tax parameters for 2019-22. + - IRS-published uprated income tax parameters for 2019-22. date: 2022-03-09 00:00:00 - bump: patch changes: added: - - February 2022 chained CPI-U. + - February 2022 chained CPI-U. changed: - - Simplified WIC uprating. + - Simplified WIC uprating. date: 2022-03-11 00:00:00 - bump: patch changes: fixed: - - EITC uses the correct phase-in rate. + - EITC uses the correct phase-in rate. date: 2022-03-13 00:00:00 - bump: patch changes: changed: - - Tax folder re-organised to improve modularity. + - Tax folder re-organised to improve modularity. fixed: - - A bug in AMT calculations. + - A bug in AMT calculations. date: 2022-03-16 21:22:44 - bump: patch changes: fixed: - - Push action on GitHub correctly publishes. + - Push action on GitHub correctly publishes. date: 2022-03-16 20:29:58 - bump: patch changes: fixed: - - Push action on GitHub correctly publishes. + - Push action on GitHub correctly publishes. date: 2022-03-16 21:22:44 - bump: minor changes: changed: - - Added multiple parameters for California's TANF system. - - Refactored the TANF structure for easier implementation of other state TANF - programs. + - Added multiple parameters for California's TANF system. + - Refactored the TANF structure for easier implementation of other state TANF + programs. date: 2022-03-27 18:49:02 - bump: patch changes: added: - - Page on TANF to documentation. + - Page on TANF to documentation. date: 2022-03-28 10:40:42 - bump: patch changes: fixed: - - Versioning action didn't update `setup.py`. + - Versioning action didn't update `setup.py`. date: 2022-03-28 10:55:27 - bump: minor changes: changed: - - Added `is_eitc_qualifying_child` variable to improve EITC child logic. - - Split `is_in_school` into `is_in_k12_school` and `is_full_time_student`. + - Added `is_eitc_qualifying_child` variable to improve EITC child logic. + - Split `is_in_school` into `is_in_k12_school` and `is_full_time_student`. date: 2022-03-28 11:34:53 - bump: minor changes: added: - - Net income limits for SNAP BBCE (TANF) program. - - Legislative references for SNAP income limits. + - Net income limits for SNAP BBCE (TANF) program. + - Legislative references for SNAP income limits. removed: - - 165% SNAP gross income limit for separate elderly and disabled households (unused). + - 165% SNAP gross income limit for separate elderly and disabled households (unused). date: 2022-03-30 01:17:38 - bump: minor changes: added: - - CDCC parameters for eligibility and metadata. + - CDCC parameters for eligibility and metadata. fixed: - - A bug where the CDCC would phase down too quickly. + - A bug where the CDCC would phase down too quickly. date: 2022-03-30 11:46:11 - bump: patch changes: added: - - Parameter metadata for tax credits and payroll taxes. + - Parameter metadata for tax credits and payroll taxes. date: 2022-03-30 13:12:44 - bump: patch changes: added: - - Added full-time college student variable. + - Added full-time college student variable. date: 2022-03-30 18:53:00 - bump: minor changes: added: - - HUD adjusted income and dependent variables and logic. + - HUD adjusted income and dependent variables and logic. date: 2022-04-05 19:04:10 - bump: patch changes: fixed: - - Point TANF parameter to state instead of region. + - Point TANF parameter to state instead of region. date: 2022-04-06 10:35:14 - bump: minor changes: added: - - More recent Social Security payroll tax cap parameter values. - - Separate parameters for employer payroll taxes and self-employment taxes. - - Parameter for self-employment net earnings disregard. - - Unit tests and legislative references for payroll and self-employment tax variables. + - More recent Social Security payroll tax cap parameter values. + - Separate parameters for employer payroll taxes and self-employment taxes. + - Parameter for self-employment net earnings disregard. + - Unit tests and legislative references for payroll and self-employment tax variables. changed: - - Reorganized payroll and self-employment tax parameters and variables. - - Replaced large parameters with infinity and made number formatting consistent. + - Reorganized payroll and self-employment tax parameters and variables. + - Replaced large parameters with infinity and made number formatting consistent. removed: - - Reform-only `social_security.add_taxable_earnings` parameter. - - Unused `exact` variable. - - Variable for `social_security_taxes` (moved logic to `refundable_child_tax_credit`). + - Reform-only `social_security.add_taxable_earnings` parameter. + - Unused `exact` variable. + - Variable for `social_security_taxes` (moved logic to `refundable_child_tax_credit`). date: 2022-04-07 06:08:18 - bump: patch changes: fixed: - - Refundable CTC formula works properly when phase-in rate increased (comments - added). + - Refundable CTC formula works properly when phase-in rate increased (comments + added). date: 2022-04-12 18:38:49 - bump: minor changes: added: - - Capped non-refundable credits variable. - - Shortened labels for tax variables. + - Capped non-refundable credits variable. + - Shortened labels for tax variables. date: 2022-04-13 12:58:29 - bump: minor changes: added: - - Microdata now handled entirely within OpenFisca-US. + - Microdata now handled entirely within OpenFisca-US. date: 2022-04-14 08:19:40 - bump: patch changes: added: - - Legislative references for CDCC parameters. + - Legislative references for CDCC parameters. fixed: - - CDCC uses maximum dependent parameter. + - CDCC uses maximum dependent parameter. date: 2022-04-15 14:23:11 - bump: patch changes: added: - - Unit tests for age variables. + - Unit tests for age variables. fixed: - - Tax unit head and spouse flag logic. + - Tax unit head and spouse flag logic. date: 2022-04-15 18:10:27 +- bump: minor + changes: + added: + - American Community Survey input. diff --git a/openfisca_us/__init__.py b/openfisca_us/__init__.py index c50e1db6b1c..7c638e14845 100644 --- a/openfisca_us/__init__.py +++ b/openfisca_us/__init__.py @@ -9,4 +9,5 @@ """ from openfisca_us.system import CountryTaxBenefitSystem from openfisca_us.api import Microsimulation, IndividualSim +from openfisca_us.data import ACS, CPS from openfisca_us import reforms diff --git a/openfisca_us/api/microsimulation.py b/openfisca_us/api/microsimulation.py index 3645ab3c406..a821787206b 100644 --- a/openfisca_us/api/microsimulation.py +++ b/openfisca_us/api/microsimulation.py @@ -9,7 +9,9 @@ class Microsimulation(GeneralMicrosimulation): entities = entities default_dataset = CPS - def __init__(self, reform=(), dataset: type = CPS, year: int = None): + def __init__( + self, reform=(), dataset: type = CPS, year: int = None, **kwargs + ): if dataset == CPS and len(CPS.years) == 0: CPS.generate(2020) diff --git a/openfisca_us/data/__init__.py b/openfisca_us/data/__init__.py index 34604780c73..44252f109ee 100644 --- a/openfisca_us/data/__init__.py +++ b/openfisca_us/data/__init__.py @@ -1 +1 @@ -from openfisca_us.data.datasets import CPS, RawCPS +from openfisca_us.data.datasets import CPS, RawCPS, ACS, RawACS diff --git a/openfisca_us/data/datasets/__init__.py b/openfisca_us/data/datasets/__init__.py index cfef16c0268..030227a241e 100644 --- a/openfisca_us/data/datasets/__init__.py +++ b/openfisca_us/data/datasets/__init__.py @@ -1,3 +1,4 @@ -from openfisca_us.data.datasets.cps import CPS, RawCPS +from .cps import CPS, RawCPS +from .acs import ACS, RawACS -DATASETS = [CPS, RawCPS] +DATASETS = [CPS, RawCPS, ACS, RawACS] diff --git a/openfisca_us/data/datasets/acs/__init__.py b/openfisca_us/data/datasets/acs/__init__.py new file mode 100644 index 00000000000..881fc59af8a --- /dev/null +++ b/openfisca_us/data/datasets/acs/__init__.py @@ -0,0 +1,2 @@ +from openfisca_us.data.datasets.acs.raw_acs import RawACS +from openfisca_us.data.datasets.acs.acs import ACS diff --git a/openfisca_us/data/datasets/acs/acs.py b/openfisca_us/data/datasets/acs/acs.py new file mode 100644 index 00000000000..b0a0a80e4c5 --- /dev/null +++ b/openfisca_us/data/datasets/acs/acs.py @@ -0,0 +1,113 @@ +import logging +from openfisca_tools.data import PublicDataset +import h5py +from openfisca_us.data.datasets.acs.raw_acs import RawACS +from openfisca_us.data.storage import OPENFISCA_US_MICRODATA_FOLDER +from pandas import DataFrame + + +class ACS(PublicDataset): + name = "acs" + is_openfisca_compatible = True + label = "ACS" + folder_path = OPENFISCA_US_MICRODATA_FOLDER + + def generate(self, year: int) -> None: + """Generates the ACS dataset. + + Args: + year (int): The year of the raw ACS to use. + """ + + # Prepare raw ACS tables + year = int(year) + if year in self.years: + self.remove(year) + if year not in RawACS.years: + RawACS.generate(year) + + raw_data = RawACS.load(year) + acs = h5py.File(ACS.file(year), mode="w") + + person, spm_unit, household = [ + raw_data[entity] for entity in ("person", "spm_unit", "household") + ] + # Add primary and foreign keys + + household.SERIALNO = household.SERIALNO.astype(int) + person.SERIALNO = person.SERIALNO.astype(int) + person.SPORDER = person.SPORDER.astype(int) + person.SPM_ID = person.SPM_ID.astype(int) + spm_unit.SPM_ID = spm_unit.SPM_ID.astype(int) + + logging.info( + f"Persons with a linked household {person.SERIALNO.isin(household.SERIALNO).mean():.1%}" + ) + person = person[person.SERIALNO.isin(household.SERIALNO)] + logging.info( + f"Households with a linked person {household.SERIALNO.isin(person.SERIALNO).mean():.1%}" + ) + household = household[household.SERIALNO.isin(person.SERIALNO)] + logging.info( + f"SPM units with a linked person {spm_unit.SPM_ID.isin(person.SPM_ID).mean():.1%}" + ) + spm_unit = spm_unit[spm_unit.SPM_ID.isin(person.SPM_ID)] + + add_id_variables(acs, person, spm_unit, household) + add_person_variables(acs, person) + add_spm_variables(acs, spm_unit) + add_household_variables(acs, household) + + raw_data.close() + acs.close() + + +ACS = ACS() + + +def add_id_variables( + acs: h5py.File, + person: DataFrame, + spm_unit: DataFrame, + household: DataFrame, +): + """Add basic ID and weight variables. + + Args: + acs (h5py.File): The ACS dataset file. + person (DataFrame): The person table of the ACS. + spm_unit (DataFrame): The SPM unit table created from the person table + of the ACS. + household (DataFrame): The household table of the ACS. + """ + acs["person_id"] = person.SERIALNO * 1e2 + person.SPORDER + acs["person_spm_unit_id"] = person.SPM_ID + acs["spm_unit_id"] = spm_unit.SPM_ID + # ACS doesn't have tax units. + acs["tax_unit_id"] = spm_unit.SPM_ID + # Until we add a family table, we'll use the person table. + acs["family_id"] = spm_unit.SPM_ID + acs["person_household_id"] = person.SERIALNO + acs["person_tax_unit_id"] = person.SPM_ID + acs["person_family_id"] = person.SPM_ID + acs["household_id"] = household.SERIALNO + + # Add weights + acs["person_weight"] = person.PWGTP + acs["household_weight"] = household.WGTP + + +def add_person_variables(acs: h5py.File, person: DataFrame): + acs["age"] = person.AGEP + acs["employment_income"] = person.WAGP + acs["self_employment_income"] = person.SEMP + + +def add_spm_variables(acs: h5py.File, spm_unit: DataFrame): + acs["spm_unit_net_income"] = spm_unit.SPM_RESOURCES + acs["spm_unit_spm_threshold"] = spm_unit.SPM_POVTHRESHOLD + + +def add_household_variables(acs: h5py.File, household: DataFrame): + acs["household_vehicles_owned"] = household.VEH + acs["fips"] = acs["household_fips"] = household.ST diff --git a/openfisca_us/data/datasets/acs/raw_acs.py b/openfisca_us/data/datasets/acs/raw_acs.py new file mode 100644 index 00000000000..0d14319715a --- /dev/null +++ b/openfisca_us/data/datasets/acs/raw_acs.py @@ -0,0 +1,200 @@ +from io import BytesIO +import logging +from typing import List +from zipfile import ZipFile +import pandas as pd +from openfisca_tools.data import PublicDataset +import requests +from tqdm import tqdm +from openfisca_us.data.storage import OPENFISCA_US_MICRODATA_FOLDER + + +logging.getLogger().setLevel(logging.INFO) + +PERSON_COLUMNS = [ + "SERIALNO", # Household ID + "SPORDER", # Person number within household + "PWGTP", # Person weight + "AGEP", # Age + "CIT", # Citizenship + "MAR", # Marital status + "WAGP", # Wage/salary + "SSP", # Social security income + "SSIP", # Supplemental security income + "SEX", # Sex + "SEMP", # Self-employment income + "SCHL", # Educational attainment + "RETP", # Retirement income + "PAP", # Public assistance income + "OIP", # Other income + "PERNP", # Total earnings + "PINCP", # Total income + "POVPIP", # Income-to-poverty line percentage + "RAC1P", # Race +] + +HOUSEHOLD_COLUMNS = [ + "SERIALNO", # Household ID + "PUMA", # PUMA area code + "ST", # State code + "ADJHSG", # Adjustment factor for housing dollar amounts + "ADJINC", # Adjustment factor for income + "WGTP", # Household weight + "NP", # Number of persons in household + "BDSP", # Number of bedrooms + "ELEP", # Electricity monthly cost + "FULP", # Fuel monthly cost + "GASP", # Gas monthly cost + "RMSP", # Number of rooms + "RNTP", # Monthly rent + "TEN", # Tenure + "VEH", # Number of vehicles + "FINCP", # Total income + "GRNTP", # Gross rent +] + + +class RawACS(PublicDataset): + name = "raw_acs" + label = "Raw ACS" + is_openfisca_compatible = False + folder_path = OPENFISCA_US_MICRODATA_FOLDER + + def generate(self, year: int) -> None: + year = int(year) + if year in self.years: + self.remove(year) + + spm_url = f"https://www2.census.gov/programs-surveys/supplemental-poverty-measure/datasets/spm/spm_{year}_pu.dta" + person_url = f"https://www2.census.gov/programs-surveys/acs/data/pums/{year}/1-Year/csv_pus.zip" + household_url = f"https://www2.census.gov/programs-surveys/acs/data/pums/{year}/1-Year/csv_hus.zip" + + # The data dictionary for 2019 can be found here: https://www2.census.gov/programs-surveys/acs/tech_docs/pums/data_dict/PUMS_Data_Dictionary_2019.pdf + + try: + with pd.HDFStore(RawACS.file(year)) as storage: + # Household file + logging.info(f"Downloading household file") + household = concat_zipped_csvs( + household_url, "psam_hus", HOUSEHOLD_COLUMNS + ) + # Remove group quarters (zero weight) + household = household[ + ~household.SERIALNO.str.contains("2019GQ") + ] + household["SERIALNO"] = household["SERIALNO"].apply( + lambda x: int(x.replace("2019HU", "")) + ) + storage["household"] = household + # Person file + logging.info(f"Downloading person file") + person = concat_zipped_csvs( + person_url, "psam_pus", PERSON_COLUMNS + ) + person = person[~person.SERIALNO.str.contains("2019GQ")] + person["SERIALNO"] = person["SERIALNO"].apply( + lambda x: int(x.replace("2019HU", "")) + ) + storage["person"] = person + # SPM unit file + logging.info(f"Downloading SPM unit file") + spm_person = pd.read_stata(spm_url).fillna(0) + spm_person.columns = spm_person.columns.str.upper() + create_spm_unit_table(storage, spm_person) + except Exception as e: + RawACS.remove(year) + logging.error( + f"Attempted to extract and save the CSV files, but encountered an error: {e}" + ) + raise e + + +RawACS = RawACS() + + +def concat_zipped_csvs( + url: str, prefix: str, columns: List[str] +) -> pd.DataFrame: + """Downloads the ACS microdata, which is a zip file containing two halves in CSV format. + + Args: + url (str): The URL of the data server. + prefix (str): The prefix of the filenames, before a/b.csv. + columns (List[str]): The columns to filter (avoids hitting memory limits). + + Returns: + pd.DataFrame: The concatenated DataFrame. + """ + req = requests.get(url, stream=True) + with BytesIO() as f: + pbar = tqdm() + for chunk in req.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + pbar.update(len(chunk)) + f.write(chunk) + f.seek(0) + zf = ZipFile(f) + logging.info(f"Loading the first half of the dataset") + a = pd.read_csv(zf.open(prefix + "a.csv"), usecols=columns) + logging.info(f"Loading the second half of the dataset") + b = pd.read_csv(zf.open(prefix + "b.csv"), usecols=columns) + logging.info(f"Concatenating datasets") + res = pd.concat([a, b]).fillna(0) + res.columns = res.columns.str.upper() + return res + + +def create_spm_unit_table( + storage: pd.HDFStore, person: pd.DataFrame +) -> pd.DataFrame: + SPM_UNIT_COLUMNS = [ + "CAPHOUSESUB", + "CAPWKCCXPNS", + "CHILDCAREXPNS", + "EITC", + "ENGVAL", + "EQUIVSCALE", + "FEDTAX", + "FEDTAXBC", + "FICA", + "GEOADJ", + "MEDXPNS", + "NUMADULTS", + "NUMKIDS", + "NUMPER", + "POOR", + "POVTHRESHOLD", + "RESOURCES", + "SCHLUNCH", + "SNAPSUB", + "STTAX", + "TENMORTSTATUS", + "TOTVAL", + "WCOHABIT", + "WICVAL", + "WKXPNS", + "WUI_LT15", + "ID", + ] + spm_table = ( + person[["SPM_" + column for column in SPM_UNIT_COLUMNS]] + .groupby(person.SPM_ID) + .first() + ) + + original_person_table = storage["person"] + # Ensure that join keys are the same type. + JOIN_COLUMNS = ["SERIALNO", "SPORDER"] + original_person_table[JOIN_COLUMNS] = original_person_table[ + JOIN_COLUMNS + ].astype(int) + person[JOIN_COLUMNS] = person[JOIN_COLUMNS].astype(int) + # Add SPM_ID from the SPM person table to the original person table. + combined_person_table = pd.merge( + original_person_table, + person[JOIN_COLUMNS + ["SPM_ID"]], + on=JOIN_COLUMNS, + ) + + storage["person"] = combined_person_table + storage["spm_unit"] = spm_table diff --git a/openfisca_us/data/datasets/cps/cps.py b/openfisca_us/data/datasets/cps/cps.py index 509c82163c8..81fcdd724e9 100644 --- a/openfisca_us/data/datasets/cps/cps.py +++ b/openfisca_us/data/datasets/cps/cps.py @@ -46,6 +46,7 @@ def generate(self, year: int): add_personal_variables(cps, person) add_personal_income_variables(cps, person) add_spm_variables(cps, spm_unit) + add_household_variables(cps, household) raw_data.close() cps.close() @@ -167,4 +168,8 @@ def add_spm_variables(cps: h5py.File, spm_unit: DataFrame): cps["reduced_price_school_meals"] = cps["free_school_meals"][...] * 0 +def add_household_variables(cps: h5py.File, household: DataFrame): + cps["fips"] = household.GESTFIPS + + CPS = CPS() diff --git a/openfisca_us/parameters/contrib/ubi_center/basic_income/amount_by_age.yaml b/openfisca_us/parameters/contrib/ubi_center/basic_income/amount_by_age.yaml new file mode 100644 index 00000000000..7f9dc4f0f08 --- /dev/null +++ b/openfisca_us/parameters/contrib/ubi_center/basic_income/amount_by_age.yaml @@ -0,0 +1,47 @@ +description: Basic income amounts by age. +brackets: + - threshold: + 2010-01-01: 0 + amount: + description: Unconditional payment to children. + values: + 2010-01-01: 0 + metadata: + label: Child basic income + unit: currency-USD + period: year + name: child_bi + - threshold: + description: Age at which individuals receive the working-age adult payment, rather than the child payment. + values: + 2010-01-01: 18 + metadata: + label: Basic income child age + unit: year + name: adult_bi_age + amount: + description: Unconditional payment to working-age adults. + values: + 2010-01-01: 0 + metadata: + label: Adult basic income + unit: currency-USD + period: year + name: adult_bi + - threshold: + description: Age at which individuals receive the senior citizen payment, rather than the working-age adult payment. + values: + 2010-01-01: 65 + metadata: + label: Senior citizen basic income + unit: year + name: senior_bi_age + amount: + description: Unconditional payment to senior citizens. + values: + 2010-01-01: 0 + metadata: + label: Senior citizen basic income + unit: currency-USD + period: year + name: senior_bi diff --git a/openfisca_us/tests/microsimulation/data/acs/test_acs.py b/openfisca_us/tests/microsimulation/data/acs/test_acs.py new file mode 100644 index 00000000000..fc120aa18dd --- /dev/null +++ b/openfisca_us/tests/microsimulation/data/acs/test_acs.py @@ -0,0 +1,17 @@ +from openfisca_us.data import ACS +import pytest +from openfisca_us import Microsimulation + +ACS_YEARS = [2019] + + +@pytest.mark.dependency(name="acs") +@pytest.mark.parametrize("year", ACS_YEARS) +def test_cps_dataset_generates(year): + ACS.generate(year) + + +@pytest.mark.dependency(depends=["acs"]) +@pytest.mark.parametrize("year", ACS_YEARS) +def test_cps_openfisca_us_compatible(year): + Microsimulation(dataset=ACS, year=year).calc("tax") diff --git a/openfisca_us/tests/policy/baseline/demographic/geographic/state_name.yaml b/openfisca_us/tests/policy/baseline/demographic/geographic/state_name.yaml new file mode 100644 index 00000000000..3a2311773ed --- /dev/null +++ b/openfisca_us/tests/policy/baseline/demographic/geographic/state_name.yaml @@ -0,0 +1,7 @@ +- name: California is decoded correctly from FIPS code. + period: 2020 + absolute_error_margin: 0 + input: + fips: 6 + output: + state_name: CA diff --git a/openfisca_us/variables/contrib/ubi_center/basic_income/basic_income.py b/openfisca_us/variables/contrib/ubi_center/basic_income/basic_income.py new file mode 100644 index 00000000000..e429154c7cc --- /dev/null +++ b/openfisca_us/variables/contrib/ubi_center/basic_income/basic_income.py @@ -0,0 +1,15 @@ +from openfisca_us.model_api import * + + +class basic_income(Variable): + value_type = float + entity = Person + label = "Basic income" + unit = USD + documentation = "Total basic income payments for this person." + definition_period = YEAR + + def formula(person, period, parameters): + bi = parameters(period).contrib.ubi_center.basic_income + age = person("age", period) + return bi.amount_by_age.calc(age) diff --git a/openfisca_us/variables/demographic/geographic/state_name.py b/openfisca_us/variables/demographic/geographic/state_name.py index 4bf51168c97..516b37305ab 100644 --- a/openfisca_us/variables/demographic/geographic/state_name.py +++ b/openfisca_us/variables/demographic/geographic/state_name.py @@ -73,3 +73,66 @@ class state_name(Variable): entity = Household label = "State" definition_period = ETERNITY + + def formula(household, period, parameters): + fips = household("fips", period) + return ( + pd.Series(fips) + .map( + { + 1: StateName.AL, + 2: StateName.AK, + 4: StateName.AZ, + 5: StateName.AR, + 6: StateName.CA, + 8: StateName.CO, + 9: StateName.CT, + 10: StateName.DE, + 11: StateName.DC, + 12: StateName.FL, + 13: StateName.GA, + 15: StateName.HI, + 16: StateName.ID, + 17: StateName.IL, + 18: StateName.IN, + 19: StateName.IA, + 20: StateName.KS, + 21: StateName.KY, + 22: StateName.LA, + 23: StateName.ME, + 24: StateName.MD, + 25: StateName.MA, + 26: StateName.MI, + 27: StateName.MN, + 28: StateName.MS, + 29: StateName.MO, + 30: StateName.MT, + 31: StateName.NE, + 32: StateName.NV, + 33: StateName.NH, + 34: StateName.NJ, + 35: StateName.NM, + 36: StateName.NY, + 37: StateName.NC, + 38: StateName.ND, + 39: StateName.OH, + 40: StateName.OK, + 41: StateName.OR, + 42: StateName.PA, + 44: StateName.RI, + 45: StateName.SC, + 46: StateName.SD, + 47: StateName.TN, + 48: StateName.TX, + 49: StateName.UT, + 50: StateName.VT, + 51: StateName.VA, + 53: StateName.WA, + 54: StateName.WV, + 55: StateName.WI, + 56: StateName.WY, + 72: StateName.PR, + } + ) + .values + ) diff --git a/openfisca_us/variables/demographic/household/vehicles_owned.py b/openfisca_us/variables/demographic/household/vehicles_owned.py new file mode 100644 index 00000000000..1dd8125571e --- /dev/null +++ b/openfisca_us/variables/demographic/household/vehicles_owned.py @@ -0,0 +1,10 @@ +from openfisca_us.model_api import * + + +class household_vehicles_owned(Variable): + value_type = float + entity = Household + label = "Vehicles owned" + unit = USD + documentation = "Number of vehicles owned by the household" + definition_period = YEAR diff --git a/openfisca_us/variables/demographic/person/is_adult.py b/openfisca_us/variables/demographic/person/is_adult.py new file mode 100644 index 00000000000..993e3b22ec0 --- /dev/null +++ b/openfisca_us/variables/demographic/person/is_adult.py @@ -0,0 +1,12 @@ +from openfisca_us.model_api import * + + +class is_adult(Variable): + value_type = bool + entity = Person + label = "Is an adult" + documentation = "Whether this person is over 18" + definition_period = YEAR + + def formula(person, period, parameters): + return person("age", period) >= 18 diff --git a/openfisca_us/variables/demographic/person/vehicles_owned.py b/openfisca_us/variables/demographic/person/vehicles_owned.py new file mode 100644 index 00000000000..ce32ecf6e95 --- /dev/null +++ b/openfisca_us/variables/demographic/person/vehicles_owned.py @@ -0,0 +1,35 @@ +from numpy import maximum +from openfisca_us.model_api import * +from random import randint +from openfisca_core.populations import Population + + +class vehicles_owned(Variable): + value_type = float + entity = Person + label = "Vehicles owned" + unit = USD + documentation = "Number of vehicles owned by this person" + definition_period = YEAR + + def formula(person, period, parameters): + # We randomly split the household's vehicles between its adults + household = person.household + household_vehicles = household("household_vehicles_owned", period) + is_adult = person("is_adult", period) + num_adults_in_household = household.sum(is_adult) + max_vehicles = household_vehicles.max() + adult_rank = where(is_adult, household.members_position, 100) + vehicles = is_adult * 0 + for _ in range(int(max_vehicles)): + # Pick a random adult in each household + selected_adult = ( + randint(0, adult_rank[is_adult].max()) + % num_adults_in_household + ) + maximum_reached = household.sum(vehicles) >= household_vehicles + should_add_vehicle = ~maximum_reached & ( + adult_rank == selected_adult + ) + vehicles += where(should_add_vehicle, 1, 0) + return vehicles diff --git a/openfisca_us/variables/demographic/spm_unit/spm_unit_id.py b/openfisca_us/variables/demographic/spm_unit/spm_unit_id.py new file mode 100644 index 00000000000..c516a6d643e --- /dev/null +++ b/openfisca_us/variables/demographic/spm_unit/spm_unit_id.py @@ -0,0 +1,15 @@ +from openfisca_us.model_api import * + + +class spm_unit_id(Variable): + value_type = int + entity = SPMUnit + label = "SPM unit ID" + definition_period = YEAR + + +class person_spm_unit_id(Variable): + value_type = int + entity = Person + label = "SPM unit ID" + definition_period = YEAR diff --git a/openfisca_us/variables/demographic/weights/person_weight.py b/openfisca_us/variables/demographic/weights/person_weight.py index a2a9b276a73..9ca2ae46d18 100644 --- a/openfisca_us/variables/demographic/weights/person_weight.py +++ b/openfisca_us/variables/demographic/weights/person_weight.py @@ -6,3 +6,6 @@ class person_weight(Variable): entity = Person label = "Person weight" definition_period = YEAR + + def formula(person, period, parameters): + return person.household("household_weight", period) diff --git a/openfisca_us/variables/demographic/weights/spm_unit_weight.py b/openfisca_us/variables/demographic/weights/spm_unit_weight.py index 44c1918fdc8..7f35cb95c60 100644 --- a/openfisca_us/variables/demographic/weights/spm_unit_weight.py +++ b/openfisca_us/variables/demographic/weights/spm_unit_weight.py @@ -6,3 +6,7 @@ class spm_unit_weight(Variable): entity = SPMUnit label = "SPM unit weight" definition_period = YEAR + + def formula(spm_unit, period, parameters): + # Use household weights if not provided + return spm_unit.household("household_weight", period) diff --git a/openfisca_us/variables/income/spm_unit/spm_unit_fips.py b/openfisca_us/variables/income/spm_unit/spm_unit_fips.py new file mode 100644 index 00000000000..5353f5014e3 --- /dev/null +++ b/openfisca_us/variables/income/spm_unit/spm_unit_fips.py @@ -0,0 +1,11 @@ +from openfisca_us.model_api import * + + +class spm_unit_fips(Variable): + value_type = float + entity = SPMUnit + label = "SPM unit FIPS code" + definition_period = YEAR + + def formula(spm_unit, period, parameters): + return spm_unit.household("fips", period) diff --git a/openfisca_us/variables/income/spm_unit/spm_unit_is_in_deep_spm_poverty.py b/openfisca_us/variables/income/spm_unit/spm_unit_is_in_deep_spm_poverty.py new file mode 100644 index 00000000000..f3477012ac9 --- /dev/null +++ b/openfisca_us/variables/income/spm_unit/spm_unit_is_in_deep_spm_poverty.py @@ -0,0 +1,13 @@ +from openfisca_us.model_api import * + + +class spm_unit_is_in_deep_spm_poverty(Variable): + value_type = bool + entity = SPMUnit + label = "SPM unit in deep SPM poverty" + definition_period = YEAR + + def formula(spm_unit, period, parameters): + income = spm_unit("spm_unit_net_income", period) + poverty_threshold = spm_unit("spm_unit_spm_threshold", period) / 2 + return income < poverty_threshold diff --git a/openfisca_us/variables/income/spm_unit/spm_unit_net_income.py b/openfisca_us/variables/income/spm_unit/spm_unit_net_income.py index f2802ff5137..992b321fdc5 100644 --- a/openfisca_us/variables/income/spm_unit/spm_unit_net_income.py +++ b/openfisca_us/variables/income/spm_unit/spm_unit_net_income.py @@ -9,6 +9,11 @@ class spm_unit_net_income(Variable): unit = USD def formula(spm_unit, period, parameters): + reported_net_income = spm_unit("spm_unit_net_income_reported", period) + if reported_net_income.sum() > 0: + # If we have reported net income, use that instead for now. This + # is only until the full microsimulation can be run. + return reported_net_income market_income = spm_unit("spm_unit_market_income", period) benefits = spm_unit("spm_unit_benefits", period) taxes = spm_unit("spm_unit_taxes", period) diff --git a/openfisca_us/variables/irs/income/sources.py b/openfisca_us/variables/irs/income/sources.py index 3c2f093194e..80bf0fedd5a 100644 --- a/openfisca_us/variables/irs/income/sources.py +++ b/openfisca_us/variables/irs/income/sources.py @@ -1009,9 +1009,10 @@ class ffpos(Variable): class fips(Variable): value_type = int - entity = TaxUnit + entity = Household definition_period = YEAR documentation = "FIPS state code (not used in tax-calculation logic)" + default_value = 6 class h_seq(Variable): diff --git a/setup.py b/setup.py index eaba6c1c931..b83e13ae9e5 100644 --- a/setup.py +++ b/setup.py @@ -53,6 +53,7 @@ "coverage", "plotly", "yaml-changelog>=0.1.6", + "us", ], }, python_requires=">=3.7,<3.8",