PSLmodels · martinholmer · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026
diff --git a/Makefile b/Makefile
@@ -75,6 +75,16 @@ data: install tmd_files test
 format:
 	black . -l 79
 
+PYLINT_DISABLE = duplicate-code,invalid-name,too-many-instance-attributes,too-many-locals,too-many-arguments,too-many-positional-arguments,too-many-statements,too-many-branches,too-many-nested-blocks,broad-exception-caught,missing-function-docstring,missing-module-docstring
+
+PYLINT_OPTIONS = --disable=$(PYLINT_DISABLE) --score=no --jobs=4 \
+                 --check-quote-consistency=yes
+
+.PHONY=style
+style:
+	@pycodestyle --ignore=E731,E712,W503 .
+	@pylint $(PYLINT_OPTIONS) .
+
 .PHONY=reweighting-visualisation
 reweighting-visualisation:
 	tensorboard --logdir=tmd/storage/output/reweighting
diff --git a/setup.py b/setup.py
@@ -16,6 +16,8 @@
         "scipy",
         "jax",
         "black>=26.1.0",
+        "pycodestyle>=2.14.0",
+        "pylint>=3.3.8",
         "pytest",
         "pytest-xdist",
         "jupyter-book",

diff --git a/tests/test_area_weights.py b/tests/test_area_weights.py
@@ -3,7 +3,6 @@
 """
 
 import yaml
-import pandas as pd
 import taxcalc as tc
 from tmd.storage import STORAGE_FOLDER
 from tmd.create_taxcalc_input_variables import TAXYEAR

diff --git a/tests/test_tax_revenue.py b/tests/test_tax_revenue.py
@@ -5,7 +5,6 @@
 import yaml
 import numpy as np
 import taxcalc as tc
-from tmd.storage import STORAGE_FOLDER
 from tmd.create_taxcalc_input_variables import TAXYEAR
 
 FIRST_CYR = 2023

diff --git a/tmd/areas/create_area_weights.py b/tmd/areas/create_area_weights.py
@@ -355,7 +355,7 @@ def target_rmse(wght, target_matrix, target_array, out, delta=None):
             continue
         line = (
             f">={interval.left:13.6f}, <{interval.right:13.6f}:"
-            f"  {num:6d}   {cum:6d}   {num/tot:7.2%}   {cum/tot:7.2%}\n"
+            f"  {num:6d}   {cum:6d}   {num / tot:7.2%}   {cum / tot:7.2%}\n"
         )
         out.write(line)
         if cum == tot:
@@ -429,7 +429,7 @@ def weight_ratio_distribution(ratio, delta, out):
             continue
         line = (
             f">={interval.left:13.6f}, <{interval.right:13.6f}:"
-            f"  {num:6d}   {cum:6d}   {num/tot:7.2%}   {cum/tot:7.2%}\n"
+            f"  {num:6d}   {cum:6d}   {num / tot:7.2%}   {cum / tot:7.2%}\n"
         )
         out.write(line)
         if cum == tot:
@@ -620,7 +620,7 @@ def create_area_weights_file(
         )
     else:
         res_summary = (
-            f">>> final delta loop exectime= {(time1-time0):.1f} secs"
+            f">>> final delta loop exectime= {(time1 - time0):.1f} secs"
             f"  iterations={res.nit}  success={res.success}\n"
             f">>> message: {res.message}\n"
             f">>> L-BFGS-B optimized objective function value: {res.fun:.9e}\n"

diff --git a/tmd/datasets/cps.py b/tmd/datasets/cps.py
@@ -1,8 +1,8 @@
 import os
-import yaml
 from io import BytesIO
 from typing import Type
 from zipfile import ZipFile
+import yaml
 import requests
 import numpy as np
 import pandas as pd
@@ -152,11 +152,26 @@ def generate(self) -> pd.DataFrame:
         file_year_code = str(file_year)[-2:]
 
         CPS_URL_BY_YEAR = {
-            2018: "https://www2.census.gov/programs-surveys/cps/datasets/2019/march/asecpub19csv.zip",
-            2019: "https://www2.census.gov/programs-surveys/cps/datasets/2020/march/asecpub20csv.zip",
-            2020: "https://www2.census.gov/programs-surveys/cps/datasets/2021/march/asecpub21csv.zip",
-            2021: "https://www2.census.gov/programs-surveys/cps/datasets/2022/march/asecpub22csv.zip",
-            2022: "https://www2.census.gov/programs-surveys/cps/datasets/2023/march/asecpub23csv.zip",
+            2018: (
+                "https://www2.census.gov/programs-surveys/cps/datasets/"
+                "2019/march/asecpub19csv.zip"
+            ),
+            2019: (
+                "https://www2.census.gov/programs-surveys/cps/datasets/"
+                "2020/march/asecpub20csv.zip"
+            ),
+            2020: (
+                "https://www2.census.gov/programs-surveys/cps/datasets/"
+                "2021/march/asecpub21csv.zip"
+            ),
+            2021: (
+                "https://www2.census.gov/programs-surveys/cps/datasets/"
+                "2022/march/asecpub22csv.zip"
+            ),
+            2022: (
+                "https://www2.census.gov/programs-surveys/cps/datasets/"
+                "2023/march/asecpub23csv.zip"
+            ),
         }
 
         if self.time_period not in CPS_URL_BY_YEAR:
@@ -239,7 +254,9 @@ def generate(self) -> pd.DataFrame:
                 )
         except Exception as e:
             raise ValueError(
-                f"Attempted to extract and save the CSV files, but encountered an error: {e} (removed the intermediate dataset)."
+                "Attempted to extract and save the CSV files, "
+                f"but encountered an error: {e} "
+                "(removed the intermediate dataset)."
             )
 
     @staticmethod
@@ -366,7 +383,9 @@ def add_id_variables(
 
     marital_unit_id = Series(marital_unit_id).rank(
         method="dense"
-    )  # Simplify to a natural number sequence with repetitions [0, 1, 1, 2, 3, ...]
+        # simplifies to a natural number sequence
+        # with repetitions [0, 1, 1, 2, 3, ...]
+    )
 
     cps["person_marital_unit_id"] = marital_unit_id.values
     cps["marital_unit_id"] = marital_unit_id.drop_duplicates().values
@@ -563,10 +582,11 @@ def add_personal_income_variables(
     # 2) If they report any wage and salary income, allocate in this order:
     #    a) Traditional 401(k) contributions up to to limit
     #    b) Roth 401(k) contributions up to the limit
-    #    c) IRA contributions up to the limit, split according to administrative fractions
+    #    c) IRA contributions up to the limit, split according
+    #       to administrative fractions
     #    d) Other retirement contributions
-    # Disregard reported pension contributions from people who report neither wage and salary
-    # nor self-employment income.
+    # Disregard reported pension contributions from people
+    #    who report neither wage and salary nor self-employment income.
     # Assume no 403(b) or 457 contributions for now.
     LIMIT_401K_2022 = 20_500
     LIMIT_401K_CATCH_UP_2022 = 6_500
@@ -620,7 +640,8 @@ def add_personal_income_variables(
         np.minimum(remaining_retirement_contributions, roth_ira_limit),
         0,
     )
-    # Allocate capital gains into long-term and short-term based on aggregate split.
+    # Allocate capital gains into long-term and short-term
+    # based on aggregate split.
     cps["long_term_capital_gains"] = person.CAP_VAL * (
         p["long_term_capgain_fraction"]
     )

diff --git a/tmd/datasets/puf.py b/tmd/datasets/puf.py
@@ -1,7 +1,9 @@
-import yaml
-from tqdm import tqdm
 import numpy as np
 import pandas as pd
+from tqdm import tqdm
+from microdf import MicroDataFrame
+from policyengine_core.data import Dataset
+from policyengine_us.system import system
 from tmd.storage import STORAGE_FOLDER
 from tmd.datasets.uprate_puf import uprate_puf
 from tmd.utils.imputation import Imputation
@@ -10,9 +12,6 @@
     IMPUTATION_BETA_RNG_SEED,
     W2_WAGES_SCALE,
 )
-from microdf import MicroDataFrame
-from policyengine_core.data import Dataset
-from policyengine_us.system import system
 
 FILER_AGE_RNG = np.random.default_rng(seed=64963751)
 SPOUSE_GENDER_RNG = np.random.default_rng(seed=83746519)
@@ -281,15 +280,15 @@ def generate(self, puf: pd.DataFrame, demographics: pd.DataFrame):
             IDVARS = ["E17500", "E18400", "E18500", "E19200", "E19800"]
             wght = puf.S006 / 100.0
             for var in IDVARS:
-                print(f"%%15:{var}= {(puf[var]*wght).sum()*1e-9:.3f}")
+                print(f"%%15:{var}= {(puf[var] * wght).sum() * 1e-9:.3f}")
 
         if self.time_period > 2015:
             puf = uprate_puf(puf, 2015, self.time_period)
 
         if itmded_dump:
             wght = puf.S006 / 100.0
             for var in IDVARS:
-                print(f"%%21:{var}= {(puf[var]*wght).sum()*1e-9:.3f}")
+                print(f"%%21:{var}= {(puf[var] * wght).sum() * 1e-9:.3f}")
 
         puf = puf[puf.MARS != 0]
 

diff --git a/tmd/datasets/taxcalc_dataset.py b/tmd/datasets/taxcalc_dataset.py
@@ -205,7 +205,11 @@ def map_to_tax_unit(var_array):
 
     # correct case of df variable names for Tax-Calculator
     tc_variable_metadata = yaml.safe_load(
-        open(STORAGE_FOLDER / "input" / "tc_variable_metadata.yaml", "r")
+        open(
+            STORAGE_FOLDER / "input" / "tc_variable_metadata.yaml",
+            "r",
+            encoding="utf-8",
+        )
     )
     renames = {}
     for variable in df.columns:

diff --git a/tmd/datasets/tmd.py b/tmd/datasets/tmd.py
@@ -8,7 +8,6 @@
 from tmd.utils.trace import trace1
 from tmd.utils.taxcalc_utils import add_taxcalc_outputs
 from tmd.utils.reweight import reweight
-from tmd.storage import STORAGE_FOLDER
 
 
 def create_tmd_2021():

diff --git a/tmd/examination/2022/bootstrap_sampling.py b/tmd/examination/2022/bootstrap_sampling.py
@@ -31,8 +31,10 @@ def bootstrap_sampling(outfile):
     else:
         fdf = gdf
     print(f"SS_FRAC = {SS_FRAC:.2f}")
-    print(f"SS:wght(#M) = {fdf['s006'].sum() * 1e-6:.3f}")
-    print(f"SS:itax($B) = {(fdf['s006'] * fdf['iitax']).sum() * 1e-9:.3f}")
+    wght = fdf["s006"]
+    print(f"SS:wght(#M) = {wght.sum() * 1e-6:.3f}")
+    itax = fdf["iitax"]
+    print(f"SS:itax($B) = {(wght * itax).sum() * 1e-9:.3f}")
 
     # compute sum of wght and wght*itax for each bootstrap sample
     xdf = pd.DataFrame({"wght": fdf["s006"], "itax": fdf["iitax"]})

diff --git a/tmd/storage/output/__init__.py b/tmd/storage/output/__init__.py
@@ -1,5 +1,5 @@
-import pandas as pd
 from pathlib import Path
+import pandas as pd
 
 output = Path(__file__).parent
 

diff --git a/tmd/utils/imputation.py b/tmd/utils/imputation.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 from typing import List, Dict
+import pickle
 import numpy as np
 import pandas as pd
 from sklearn.ensemble import RandomForestRegressor
@@ -154,19 +155,16 @@ def save(self, path: str):
         Args:
             path (str): The path to save the model to.
         """
-
-        import pickle
-
         path = Path(path)
         path.parent.mkdir(parents=True, exist_ok=True)
         with open(path, "wb") as f:
             # Store the models only in a dictionary.
-            data = dict(
-                models=self.models,
-                X_columns=self.X_columns,
-                X_category_mappings=self.X_category_mappings,
-                Y_columns=self.Y_columns,
-            )
+            data = {
+                "models": self.models,
+                "X_columns": self.X_columns,
+                "X_category_mappings": self.X_category_mappings,
+                "Y_columns": self.Y_columns,
+            }
             pickle.dump(data, f)
 
     @staticmethod
@@ -180,9 +178,6 @@ def load(path: str) -> "Imputation":
         Returns:
             Imputation: The imputation model.
         """
-
-        import pickle
-
         imputation = Imputation()
         with open(path, "rb") as f:
             data = pickle.load(f)
@@ -234,6 +229,7 @@ class ManyToOneImputation:
     """Random number generator seed used by RandomForestRegressor."""
     beta_rng_seed: int = None
     """Random number generator seed used to generate Beta variates."""
+    encode_categories: pd.DataFrame = None
 
     def train(
         self,
@@ -264,7 +260,7 @@ def train(
             self.is_integer_coded = (
                 isinstance(y[0], str) or (y - y.round()).mean() < 1e-3
             )
-        except Exception as e:
+        except Exception:
             pass
         self.model.fit(X, y, sample_weight=sample_weight)
 
@@ -284,7 +280,7 @@ def predict(
             pd.Series: The predicted distribution of values for each input row.
         """
         if isinstance(X, pd.DataFrame) and any(
-            [X[column].dtype == "O" for column in X.columns]
+            X[column].dtype == "O" for column in X.columns
         ):
             X = self.encode_categories(X)
         X = to_array(X)
@@ -344,8 +340,8 @@ def loss(mean_quantile):
             pred_values = self.predict(input_df, mean_quantile)
             pred_aggregate = (pred_values * weights).sum()
             msg = (
-                f"PREDICTED: {pred_aggregate/1e9:.1f} "
-                f"(target: {target/1e9:.1f})"
+                f"PREDICTED: {pred_aggregate / 1e9:.1f} "
+                f"(target: {target / 1e9:.1f})"
             )
             print(msg)
             return (pred_aggregate - target) ** 2, pred_aggregate
@@ -364,8 +360,7 @@ def loss(mean_quantile):
                     f"(loss: {loss_value:.4f})"
                 )
                 print(msg)
-            if loss_value < best_loss:
-                best_loss = loss_value
+            best_loss = min(loss_value, best_loss)
             if pred_agg < target:
                 min_quantile = mean_quantile
             else:

diff --git a/tmd/utils/is_tax_filer.py b/tmd/utils/is_tax_filer.py
@@ -1,3 +1,6 @@
+from policyengine_us.system import system
+
+
 def is_tax_filer(
     irs_gross_income: float,
     filing_status: str,
@@ -16,9 +19,11 @@ def is_tax_filer(
         filing_status: Filing status of the tax unit.
         earned_income: Total earned income.
         total_income_tax: Total income tax liability.
-        aged_blind_count: Number of aged or blind individuals in the tax unit.
+        aged_blind_count: Number of aged or blind individuals
+                          in the tax unit.
         standard_deduction: Standard deduction for the tax unit.
-        aged_blind_standard_deduction: Additional standard deduction for aged or blind individuals.
+        aged_blind_standard_deduction: Additional standard deduction
+                                       for aged or blind individuals.
         exemption_amount: Exemption amount for the tax unit.
 
     Returns:
@@ -55,8 +60,6 @@ def is_tax_filer(
     return required_to_file or not_required_but_likely_filer
 
 
-from policyengine_us.system import system
-
 parameters = system.parameters.gov.irs
 
 aged_blind_standard_deduction = (

diff --git a/tmd/utils/pension_contributions.py b/tmd/utils/pension_contributions.py
@@ -1,4 +1,3 @@
-import numpy as np
 from policyengine_us import Microsimulation
 from tmd.datasets.cps import CPS_2021
 from tmd.utils.imputation import Imputation