Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ data: install tmd_files test
format:
black . -l 79

PYLINT_DISABLE = duplicate-code,invalid-name,too-many-instance-attributes,too-many-locals,too-many-arguments,too-many-positional-arguments,too-many-statements,too-many-branches,too-many-nested-blocks,broad-exception-caught,missing-function-docstring,missing-module-docstring
PYLINT_DISABLE = duplicate-code,invalid-name,too-many-instance-attributes,too-many-locals,too-many-arguments,too-many-positional-arguments,too-many-statements,too-many-branches,too-many-nested-blocks,broad-exception-caught,missing-function-docstring,missing-module-docstring,missing-class-docstring

PYLINT_OPTIONS = --disable=$(PYLINT_DISABLE) --score=no --jobs=4 \
--check-quote-consistency=yes
Expand Down
57 changes: 29 additions & 28 deletions tmd/datasets/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,12 @@ def generate(self) -> pd.DataFrame:
col for col in spm_unit_columns if col != "SPM_BBSUBVAL"
]

response = requests.get(url, stream=True, verify=False)
response = requests.get(
url,
stream=True,
verify=False,
timeout=(20, 600),
)
total_size_in_bytes = int(
response.headers.get("content-length", 200e6)
)
Expand Down Expand Up @@ -257,7 +262,7 @@ def generate(self) -> pd.DataFrame:
"Attempted to extract and save the CSV files, "
f"but encountered an error: {e} "
"(removed the intermediate dataset)."
)
) from e

@staticmethod
def _create_tax_unit_table(person: pd.DataFrame) -> pd.DataFrame:
Expand Down Expand Up @@ -309,7 +314,7 @@ def generate(self):

add_id_variables(cps, person, tax_unit, family, spm_unit, household)
add_personal_variables(cps, person)
add_personal_income_variables(cps, person, self.raw_cps.time_period)
add_personal_income_variables(cps, person)
add_previous_year_income(self, cps)
add_spm_variables(cps, spm_unit)
add_household_variables(cps, household)
Expand Down Expand Up @@ -461,15 +466,13 @@ def children_per_parent(col: str) -> pd.DataFrame:
cps["is_full_time_college_student"] = person.A_HSCOL == 2


def add_personal_income_variables(
cps: h5py.File, person: DataFrame, year: int
):
"""Add income variables.
def add_personal_income_variables(cps: h5py.File, person: DataFrame):
"""
Add income variables.

Args:
cps (h5py.File): The CPS dataset file.
person (DataFrame): The CPS person table.
year (int): The CPS year
"""
# Get income imputation parameters.
yamlfilename = os.path.join(
Expand Down Expand Up @@ -670,29 +673,27 @@ def add_personal_income_variables(


def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None:
SPM_RENAMES = dict(
spm_unit_total_income_reported="SPM_TOTVAL",
snap_reported="SPM_SNAPSUB",
spm_unit_capped_housing_subsidy_reported="SPM_CAPHOUSESUB",
free_school_meals_reported="SPM_SCHLUNCH",
spm_unit_energy_subsidy_reported="SPM_ENGVAL",
spm_unit_wic_reported="SPM_WICVAL",
spm_unit_broadband_subsidy_reported="SPM_BBSUBVAL",
spm_unit_payroll_tax_reported="SPM_FICA",
spm_unit_federal_tax_reported="SPM_FEDTAX",
# State tax includes refundable credits.
spm_unit_state_tax_reported="SPM_STTAX",
spm_unit_capped_work_childcare_expenses="SPM_CAPWKCCXPNS",
spm_unit_medical_expenses="SPM_MEDXPNS",
spm_unit_spm_threshold="SPM_POVTHRESHOLD",
spm_unit_net_income_reported="SPM_RESOURCES",
spm_unit_pre_subsidy_childcare_expenses="SPM_CHILDCAREXPNS",
)

SPM_RENAMES = {
"spm_unit_total_income_reported": "SPM_TOTVAL",
"snap_reported": "SPM_SNAPSUB",
"spm_unit_capped_housing_subsidy_reported": "SPM_CAPHOUSESUB",
"free_school_meals_reported": "SPM_SCHLUNCH",
"spm_unit_energy_subsidy_reported": "SPM_ENGVAL",
"spm_unit_wic_reported": "SPM_WICVAL",
"spm_unit_broadband_subsidy_reported": "SPM_BBSUBVAL",
"spm_unit_payroll_tax_reported": "SPM_FICA",
"spm_unit_federal_tax_reported": "SPM_FEDTAX",
# state tax includes refundable credits
"spm_unit_state_tax_reported": "SPM_STTAX",
"spm_unit_capped_work_childcare_expenses": "SPM_CAPWKCCXPNS",
"spm_unit_medical_expenses": "SPM_MEDXPNS",
"spm_unit_spm_threshold": "SPM_POVTHRESHOLD",
"spm_unit_net_income_reported": "SPM_RESOURCES",
"spm_unit_pre_subsidy_childcare_expenses": "SPM_CHILDCAREXPNS",
}
for openfisca_variable, asec_variable in SPM_RENAMES.items():
if asec_variable in spm_unit.columns:
cps[openfisca_variable] = spm_unit[asec_variable]

cps["reduced_price_school_meals_reported"] = (
cps["free_school_meals_reported"][...] * 0
)
Expand Down
11 changes: 3 additions & 8 deletions tmd/utils/is_tax_filer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from policyengine_us.system import system
"""
The is_tax_filer function is not currently called anywhere in the TMD code.
"""


def is_tax_filer(
Expand Down Expand Up @@ -58,10 +60,3 @@ def is_tax_filer(
# (a)(1)(D) is just definitions

return required_to_file or not_required_but_likely_filer


parameters = system.parameters.gov.irs

aged_blind_standard_deduction = (
parameters.deductions.standard.aged_or_blind.amount
)
7 changes: 4 additions & 3 deletions tmd/utils/soi_replication.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
def pe_to_soi(pe_dataset):
pe_sim = Microsimulation(dataset=pe_dataset)
df = pd.DataFrame()
pe = lambda variable: np.array(
pe_sim.calculate(variable, map_to="tax_unit")
)

def pe(variable):
return np.array(pe_sim.calculate(variable, map_to="tax_unit"))

df["adjusted_gross_income"] = pe("adjusted_gross_income")
df["exemption"] = pe("exemptions")
df["itemded"] = pe("itemized_taxable_income_deductions")
Expand Down
39 changes: 14 additions & 25 deletions tmd/utils/soi_targets.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,21 @@
import pandas as pd
import numpy as np
import pandas as pd
from tmd.storage import STORAGE_FOLDER

soi = pd.read_csv(STORAGE_FOLDER / "input" / "agi_targets.csv")


def clean_agi_bounds(
soi: pd.DataFrame,
) -> pd.DataFrame:
"""Adds cleaned AGI bounds to Don's scraped SOI statistics file.
"""
Adds cleaned AGI bounds to the scraped SOI statistics file.

Args:
soi (pd.DataFrame): DataFrame with AGI targets.

Returns:
pd.DataFrame: DataFrame with cleaned AGI bounds.
"""

soi = soi.copy()

agi_bound_map = {
"All returns": (-np.inf, np.inf),
"No adjusted gross income": (-np.inf, 0),
Expand Down Expand Up @@ -53,17 +50,16 @@ def clean_agi_bounds(
"$55,000 under $60,000": (55_000, 60_000),
"$60,000 under $75,000": (60_000, 75_000),
}

soi["agi_lower"] = soi["incrange"].map(lambda x: agi_bound_map[x][0])
soi["agi_upper"] = soi["incrange"].map(lambda x: agi_bound_map[x][1])

return soi


def clean_filing_status(
soi: pd.DataFrame,
) -> pd.DataFrame:
"""Adds cleaned filing status values to Don's scraped SOI statistics file.
"""
Adds cleaned filing status values to the scraped SOI statistics file.

Args:
soi (pd.DataFrame): DataFrame with AGI targets.
Expand All @@ -78,29 +74,24 @@ def clean_filing_status(
def get_filing_status(name):
if "single" in name:
return "Single"
elif "mfs" in name:
if "mfs" in name:
return "Married Filing Separately"
elif "mfjss" in name:
if "mfjss" in name:
return "Married Filing Jointly/Surviving Spouse"
elif "hoh" in name:
if "hoh" in name:
return "Head of Household"
else:
return "All"
return "All"

soi["filing_status"] = soi.vname.apply(get_filing_status)

return soi


def clean_vname(vname):
REMOVED = ["nret", "single", "mfs", "mfjss", "hoh", "_"]

for r in REMOVED:
vname = vname.replace(r, "")

if vname == "" or vname == "all":
if vname in ["", "all"]:
return "count"

VARIABLE_RENAMES = {
"agi": "adjusted gross income",
"exemption": "exemptions",
Expand Down Expand Up @@ -151,17 +142,15 @@ def clean_vname(vname):
"idsalt": "state and local tax deductions",
"idtaxpaid": "itemized taxes paid deductions", # federal tax payments
}

if vname in VARIABLE_RENAMES:
return VARIABLE_RENAMES[vname]

return vname


def clean_soi_file(soi):
soi.vname = soi.vname.replace(
"nret_partnerpinc", "nret_partnerinc"
) # Typo
) # fix typo

soi["Count"] = soi.vname.apply(lambda x: "nret" in x)
soi["Taxable only"] = soi.datatype == "taxable"
Expand Down Expand Up @@ -256,6 +245,6 @@ def clean_soi_file(soi):
return soi[columns]


soi = clean_soi_file(soi)

soi.to_csv(STORAGE_FOLDER / "input" / "soi.csv", index=False)
soi_raw = pd.read_csv(STORAGE_FOLDER / "input" / "agi_targets.csv")
soi_clean = clean_soi_file(soi_raw)
soi_clean.to_csv(STORAGE_FOLDER / "input" / "soi.csv", index=False)