Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
590586e
Revise tests for now, add new field names to create_db.sql
kyrasturgill Feb 27, 2026
6317e13
Update db version and base url
kyrasturgill Feb 27, 2026
2510bca
Remove failing checks, add pin_tif_distribution table
kyrasturgill Feb 28, 2026
7ad0774
Add ref to pin_tif_distribution
kyrasturgill Feb 28, 2026
eabc60c
Add pre-release tag to db url
kyrasturgill Mar 16, 2026
6061b9b
Add exe_vet_dis_100 to pin_dt
kyrasturgill Mar 17, 2026
644cdbf
Set unify_schemas to TRUE
kyrasturgill Mar 17, 2026
0e7cab3
Update pre-release package version
kyrasturgill Apr 3, 2026
babadff
Remove the collar county EAV fields in agency table
kyrasturgill Apr 3, 2026
f16d1cc
Remove reference to fields no longer present in agency table
kyrasturgill Apr 8, 2026
5cd0d4f
Some style fixes
kyrasturgill Apr 14, 2026
4a44423
Add lim_denominator test back in, fix pre-commit error
kyrasturgill Apr 14, 2026
397b290
Delete errant comma
kyrasturgill Apr 14, 2026
36990d6
Merge branch '2024-data-update' into kyrasturgill/update_create_db
kyrasturgill Apr 14, 2026
a8e0e8f
Update DESCRIPTION file
kyrasturgill Apr 14, 2026
881981d
Fix db test
kyrasturgill Apr 14, 2026
a3f7dc6
Fix DESCRIPTION typo
kyrasturgill Apr 15, 2026
bc44010
Another typo
kyrasturgill Apr 15, 2026
a8c2113
Yet another DESCRIPTION typo
kyrasturgill Apr 15, 2026
8409982
Test db version
kyrasturgill Apr 15, 2026
786919e
Remove alpha tag from require DB version config
kyrasturgill Apr 15, 2026
7034cad
Delete extra space
kyrasturgill Apr 15, 2026
d9fa6c9
Review revisions
kyrasturgill Apr 15, 2026
4084d5c
Apply suggestions from code review
kyrasturgill Apr 15, 2026
a49336f
Styler fixes
kyrasturgill Apr 15, 2026
c3f960b
Update data-raw/create_db.R
kyrasturgill Apr 16, 2026
6e0c3b2
Add NOT NULL checks
kyrasturgill Apr 16, 2026
f1a4cc2
Merge branch 'kyrasturgill/update_create_db' of github.com:ccao-data/…
kyrasturgill Apr 16, 2026
4dc1e0d
Bundle the NOT NULL checks
kyrasturgill Apr 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
Package: ptaxsim
Type: Package
Title: Calculate Cook County Property Tax Bills and Simulate Scenarios
Version: 1.0.0
Version: 1.1.0
Authors@R: c(
person(given = "Dan", family = "Snow", email = "daniel.snow@cookcountyil.gov", role = c("aut", "cre")),
person(given = "Rob", family = "Ross", role = c("aut", "ctb")),
person(given = "Kyra", family = "Sturgill", email = "kyra.sturgill@cookcountyil.gov", role = c("aut", "cre")),
person(given = "Dan", family = "Snow", role = c("aut")),
person(given = "Jean", family = "Cochrane", role = c("aut")),
person(given = "Rob", family = "Ross", role = c("aut")),
person(give = "Eric", family = "Langowski", role = c("ctb")),
person(given = "Mike", family = "Wu", role = c("ctb"))
)
Description: An R package to estimate property tax bills and simulate tax
Expand Down Expand Up @@ -63,5 +66,5 @@ Depends:
Remotes:
paleolimbot/geoarrow,
ropensci/tabulapdf
Config/Requires_DB_Version: 2021.0.4
Config/Wants_DB_Version: 2024.0.0-alpha.1
Config/Requires_DB_Version: 2024.0.0
Config/Wants_DB_Version: 2024.0.0-alpha.2
30 changes: 25 additions & 5 deletions data-raw/create_db.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,12 @@ db_send_queries <- function(conn, sql) {
# Set the database version. This gets incremented manually whenever the database
# changes. This is checked against Config/Requires_DB_Version in the DESCRIPTION
# file via check_db_version(). Schema is:
# "MAX_YEAR_OF_DATA.MAJOR_VERSION.MINOR_VERSION"
db_version <- "2023.0.0"
# "MAX_YEAR_OF_DATA.MAJOR_VERSION.MINOR_VERSION-PRE_RELEASE_VERSION"
Comment thread
kyrasturgill marked this conversation as resolved.
db_version <- "2024.0.0"
# Optional pre-release identifier. Informational only, not compared.
# Set this to an empty string for a public release, or to a string like
# "alpha.1" for a release candidate
db_pre_release_version <- "alpha.2"

# Set the package version required to use this database. This is checked against
# Version in the DESCRIPTION file. Basically, we have a two-way check so that
Expand Down Expand Up @@ -73,10 +77,19 @@ desc_url_package <- desc %>%
str_extract("(?<=URL: ).*(?=,)")

db_base_url <- "https://ccao-data-public-us-east-1.s3.amazonaws.com/ptaxsim/"
db_full_url <- paste0(db_base_url, "ptaxsim-", db_version, ".db.bz2")
db_full_url <- paste0(
db_base_url, "ptaxsim-", db_version,
if (nzchar(db_pre_release_version)) {
paste0("-", db_pre_release_version)
} else {
""
},
".db.bz2"
)
Comment thread
jeancochrane marked this conversation as resolved.

# Load agency files to get min and max year
agency_df <- read_parquet(file.path(remote_bucket, "agency", "part-0.parquet"))

min_year <- min(as.integer(agency_df$year))
max_year <- max(as.integer(agency_df$year))

Expand Down Expand Up @@ -112,7 +125,8 @@ DBI::dbAppendTable(conn, "metadata", metadata_df)
# Load tables contained in a single file
files <- c(
"agency", "agency_info", "agency_fund", "agency_fund_info",
"cpi", "eq_factor", "tif", "tif_crosswalk", "tif_distribution"
"cpi", "eq_factor", "tif", "tif_crosswalk", "tif_distribution",
"pin_tif_distribution"
)
for (file in files) {
message("Now loading: ", file)
Expand All @@ -124,7 +138,13 @@ for (file in files) {
datasets <- c("pin", "tax_code")
for (dataset in datasets) {
message("Now loading: ", dataset)
df <- collect(arrow::open_dataset(file.path(remote_bucket, dataset)))
df <- collect(arrow::open_dataset(file.path(remote_bucket, dataset),
# Starting in 2024, there are some major changes regarding the columns
# that are present in these data files. That means we need to unify the
# schemas across files, since otherwise arrow will take the schema from
# the first file it finds in the dataset
unify_schemas = TRUE
Comment thread
kyrasturgill marked this conversation as resolved.
))
DBI::dbAppendTable(conn, dataset, df)
}

Expand Down
78 changes: 54 additions & 24 deletions data-raw/create_db.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,32 @@ PRAGMA foreign_keys = ON;
CREATE TABLE agency (
year int NOT NULL,
agency_num varchar(9) NOT NULL,
authority_num varchar(6) CHECK(year < 2024
OR authority_num IS NOT NULL) ,
Comment thread
jeancochrane marked this conversation as resolved.
home_rule_ind boolean NOT NULL,
agg_ext_base_year int CHECK(agg_ext_base_year >= 2003) ,
lim_numerator bigint CHECK(lim_numerator >= 0) ,
lim_denominator bigint CHECK(lim_denominator >= 0) ,
lim_rate double CHECK(lim_rate >= 0) ,
prior_eav bigint CHECK(prior_eav >= 0) NOT NULL,
curr_new_prop bigint CHECK(curr_new_prop >= 0) NOT NULL,
prior_eav bigint CHECK((prior_eav >= 0)
AND (year >= 2024 OR prior_eav IS NOT NULL)),
curr_new_prop bigint NOT NULL,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Thought, non-blocking] Clarifying this change for posterity: We learned that curr_new_prop can be negative if the value of disconnections (parcels that are no longer subject to the taxing authority of the agency) is greater than the value of new property.

cty_cook_eav bigint CHECK(cty_cook_eav >= 0) NOT NULL,
cty_dupage_eav bigint CHECK(cty_dupage_eav >= 0) NOT NULL,
cty_lake_eav bigint CHECK(cty_lake_eav >= 0) NOT NULL,
cty_will_eav bigint CHECK(cty_will_eav >= 0) NOT NULL,
cty_kane_eav bigint CHECK(cty_kane_eav >= 0) NOT NULL,
cty_mchenry_eav bigint CHECK(cty_mchenry_eav >= 0) NOT NULL,
cty_dekalb_eav bigint CHECK(cty_dekalb_eav >= 0) NOT NULL,
cty_grundy_eav bigint CHECK(cty_grundy_eav >= 0) NOT NULL,
cty_kankakee_eav bigint CHECK(cty_kankakee_eav >= 0) NOT NULL,
cty_kendall_eav bigint CHECK(cty_kendall_eav >= 0) NOT NULL,
cty_lasalle_eav bigint CHECK(cty_lasalle_eav >= 0) NOT NULL,
cty_livingston_eav bigint CHECK(cty_livingston_eav >= 0) NOT NULL,
cty_overlap_eav bigint CHECK(cty_overlap_eav >= 0) NOT NULL,
cty_total_eav bigint CHECK(cty_total_eav >= 0) NOT NULL,
pct_burden double CHECK(pct_burden >= 0
AND pct_burden <= 1) NOT NULL,
pct_burden double CHECK((pct_burden >= 0
AND pct_burden <= 1)
AND (year >= 2024 OR pct_burden IS NOT NULL)),
total_levy bigint CHECK(total_levy >= 0) NOT NULL,
total_max_levy bigint CHECK(total_max_levy >= 0) NOT NULL,
total_prelim_rate double CHECK(total_prelim_rate >= 0) NOT NULL,
total_reduced_levy bigint CHECK(total_reduced_levy >= 0) ,
total_final_levy bigint CHECK(total_final_levy >= 0) NOT NULL,
total_final_rate double CHECK(total_final_rate >= 0) NOT NULL,
reduction_type varchar ,
reduction_pct double CHECK(reduction_pct >= 0
AND reduction_pct <= 1) ,
total_non_cap_ext double CHECK(total_non_cap_ext >= 0) ,
total_ext double CHECK(total_ext >= 0) NOT NULL,

PRIMARY KEY (year, agency_num)
) WITHOUT ROWID;

Expand All @@ -53,6 +46,9 @@ CREATE TABLE agency_info (
agency_name_original varchar NOT NULL,
major_type varchar(21) NOT NULL,
minor_type varchar(10) NOT NULL,
agency_num_24 varchar(9) ,
agency_name_24 varchar ,
agency_change_24 boolean NOT NULL,
PRIMARY KEY (agency_num)
) WITHOUT ROWID;

Expand All @@ -66,14 +62,13 @@ CREATE TABLE agency_fund (
agency_num varchar(9) NOT NULL,
fund_num varchar(3) NOT NULL,
levy bigint CHECK(levy >= 0) NOT NULL,
loss_pct double CHECK(loss_pct >= 0
loss_pct double CHECK(loss_pct >= 0
AND loss_pct <= 1) NOT NULL,
levy_plus_loss bigint CHECK(levy_plus_loss >= 0) NOT NULL,
rate_ceiling double CHECK(rate_ceiling >= 0) NOT NULL,
rate_ceiling double CHECK(rate_ceiling >= 0) NOT NULL,
max_levy bigint CHECK(max_levy >= 0) NOT NULL,
prelim_rate double CHECK(prelim_rate >= 0) NOT NULL,
ptell_reduced_levy bigint CHECK(ptell_reduced_levy >= 0) ,
ptell_reduced_ind boolean NOT NULL,
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This field is no longer present in the 2024 reports and was removed from the agency_fund table. I'd forgotten to remove it from the table schema here.

final_levy bigint CHECK(final_levy >= 0) NOT NULL,
final_rate double CHECK(final_rate >= 0) NOT NULL,
PRIMARY KEY (year, agency_num, fund_num),
Expand All @@ -87,14 +82,17 @@ CREATE INDEX ix_agency_fund_fund_num ON agency_fund(fund_num);

/** agency_fund_info **/
CREATE TABLE agency_fund_info (
fund_num varchar(3) NOT NULL,
agency_num varchar(9) NOT NULL,
fund_type_num varchar(3) NOT NULL,
fund_type_name varchar NOT NULL,
fund_num varchar(6) NOT NULL,
fund_name varchar NOT NULL,
capped_ind boolean NOT NULL,
PRIMARY KEY (fund_num)
PRIMARY KEY (agency_num, fund_type_num, fund_num)
) WITHOUT ROWID;

CREATE INDEX ix_agency_fund_info_capped_ind ON agency_fund_info(capped_ind);


/** cpi **/
CREATE TABLE cpi (
Expand Down Expand Up @@ -155,6 +153,7 @@ CREATE TABLE pin (
exe_vet_dis_lt50 int CHECK(exe_vet_dis_lt50 >= 0) NOT NULL,
exe_vet_dis_50_69 int CHECK(exe_vet_dis_50_69 >= 0) NOT NULL,
exe_vet_dis_ge70 int CHECK(exe_vet_dis_ge70 >= 0) NOT NULL,
exe_vet_dis_100 int CHECK(exe_vet_dis_100 >= 0) NOT NULL,
exe_abate int CHECK(exe_abate >= 0) NOT NULL,
PRIMARY KEY (year, pin)
) WITHOUT ROWID;
Expand Down Expand Up @@ -224,7 +223,10 @@ CREATE TABLE tif_distribution (
tax_code_eav bigint CHECK(tax_code_eav >= 0) NOT NULL,
tax_code_frozen_eav bigint CHECK(tax_code_frozen_eav >= 0) NOT NULL,
tax_code_revenue bigint CHECK(tax_code_revenue >= 0) NOT NULL,
tax_code_distribution_pct double CHECK(tax_code_distribution_pct >= 0) NOT NULL,
tax_code_distribution_pct double CHECK((tax_code_distribution_pct >= 0)
AND (year >= 2024
OR tax_code_distribution_pct IS NOT NULL)),

PRIMARY KEY (year, agency_num, tax_code_num)
FOREIGN KEY (year, agency_num) REFERENCES tif_crosswalk(year, agency_num_dist)
) WITHOUT ROWID;
Expand All @@ -236,3 +238,31 @@ CREATE INDEX ix_tif_distribution_year_agency_num
ON tif_distribution(year, agency_num);
CREATE INDEX ix_tif_distribution_year_tax_code_num
ON tif_distribution(year, tax_code_num);

/** pin_tif_distribution **/
CREATE TABLE pin_tif_distribution (
year int NOT NULL,
pin varchar(14) NOT NULL,
agency_num varchar(9) NOT NULL,
tax_code_num varchar(5) NOT NULL,
tax_code_rate double CHECK(tax_code_rate >= 0) NOT NULL,
pin_eav int CHECK(pin_eav >= 0) NOT NULL,
pin_frozen_eav int CHECK(pin_frozen_eav >= 0) NOT NULL,
pin_revenue double CHECK(pin_revenue >= 0) NOT NULL,
pin_increment_eav int CHECK(pin_increment_eav >= 0) NOT NULL,
pin_distribution_pct double CHECK(pin_distribution_pct >= 0) NOT NULL,
transit_tif_to_cps double CHECK(transit_tif_to_cps >= 0) ,
transit_tif_to_tif double CHECK(transit_tif_to_tif >= 0) ,
transit_tif_to_dist double CHECK(transit_tif_to_dist >= 0) ,
is_transit_tif boolean NOT NULL,
PRIMARY KEY (year, pin, agency_num)
FOREIGN KEY (year, agency_num) REFERENCES tif_crosswalk(year, agency_num_dist)
) WITHOUT ROWID;

CREATE INDEX ix_pin_tif_distribution_year ON pin_tif_distribution(year);
CREATE INDEX ix_pin_tif_distribution_agency_num ON pin_tif_distribution(agency_num);
CREATE INDEX ix_pin_tif_distribution_tax_code_num ON pin_tif_distribution(tax_code_num);
CREATE INDEX ix_pin_tif_distribution_year_agency_num
ON pin_tif_distribution(year, agency_num);
CREATE INDEX ix_pin_tif_distribution_year_tax_code_num
ON pin_tif_distribution(year, tax_code_num);
Loading