Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
aed6791
Agency and tax code rate ingest code revisions to enable reading 2024…
kyrasturgill Dec 16, 2025
40383a6
Upload 2024 agency and tax code rate reports
kyrasturgill Dec 16, 2025
e0f6f03
Upload pin tif distribution data
kyrasturgill Jan 13, 2026
c13d197
Upload 2024 data for existing TIF reports
kyrasturgill Jan 13, 2026
90c0843
Ingest code adjustments to account for 2024 data changes
kyrasturgill Jan 13, 2026
5df954e
Update tif_agency_names.csv and add s3 bucket for pin_tif_dist
kyrasturgill Jan 14, 2026
a2ee000
Merge pre-commit test fixes
kyrasturgill Jan 15, 2026
738eb12
Pre-commit check fixing style errors
kyrasturgill Jan 15, 2026
76154b3
Fix the overlap eav fields, keep fund_type_num in agency_fund_info table
kyrasturgill Jan 21, 2026
42a6abd
Add updated 2024 agency_num to agency_info table to enable crosswalk …
kyrasturgill Jan 22, 2026
888e638
linting
kyrasturgill Jan 22, 2026
827ce0e
Styler fixes
kyrasturgill Jan 22, 2026
a1df266
Put cty_overall_eav back in until we understand what it is
kyrasturgill Jan 22, 2026
0e9e3d4
Replace ifelse() with coalesce()
kyrasturgill Feb 23, 2026
11fffce
Review edits
kyrasturgill Feb 23, 2026
3b25d7c
More review edits.
kyrasturgill Feb 23, 2026
d263e35
Replace cty_total_eav with cty_overall_eav for 2013 and then remove c…
kyrasturgill Feb 24, 2026
5c805e1
Fix duplicate issue generated by data errors in 2024 Clerk reports
kyrasturgill Feb 25, 2026
b500b4e
Fix linting problem
kyrasturgill Feb 25, 2026
19d305b
Add field rename to existing pipe
kyrasturgill Feb 26, 2026
1fd2d97
Temporary fix to ensure all TIF agency numbers are included in tif_cr…
kyrasturgill Mar 3, 2026
4de82f4
Account for transit tif PINs getting duped
kyrasturgill Mar 3, 2026
e08a921
Remove select agency_name
kyrasturgill Mar 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions data-raw/agency/2024-agency-rate-report.xlsx
Git LFS file not shown
171 changes: 119 additions & 52 deletions data-raw/agency/agency.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,21 +76,26 @@ agency_fund <- map_dfr(file_names, function(file) {
"ptell_levy", "fund_ptell_levy", "ptell_red_levy",
"fund_ptell_reduced_levy"
))) %>%
rename_with(~"ptell_reduced_ind", any_of(c(
"ptell_ind", "reduction_ind", "rate_reduction_indicator",
"reduction_indicator"
))) %>%
rename_with(~"final_levy", any_of(c(
"final_levy", "fund_final_levy"
))) %>%
rename_with(~"final_rate", any_of(c(
"fund_rate", "final_rate", "fund_final_rate", "final_fund_rate"
))) %>%
mutate(
fund_num =
ifelse(
str_length(fund) == 3,
paste0(fund, "000"),
fund
),
fund_type_num = substr(fund, 1, 3)
) %>%
select(
year,
agency_num = agency, fund_num = fund, fund_name, levy, loss_pct,
agency_num = agency, fund_type_num, fund_num, fund_name, levy, loss_pct,
levy_plus_loss, rate_ceiling, max_levy, prelim_rate, ptell_reduced_levy,
ptell_reduced_ind, final_levy, final_rate
final_levy, final_rate
) %>%
mutate(across(year, as.character))
}) %>%
Expand Down Expand Up @@ -120,8 +125,6 @@ agency_fund <- map_dfr(file_names, function(file) {
rate_ceiling = replace_na(rate_ceiling, 0),
rate_ceiling = ifelse(final_rate == 0 & final_levy == 0, 0, rate_ceiling),
ptell_reduced_levy = na_if(ptell_reduced_levy, 0),
ptell_reduced_ind = ptell_reduced_ind == "*",
ptell_reduced_ind = replace_na(ptell_reduced_ind, FALSE),
final_rate = ifelse(
agency_num == "050200000" & fund_num == "202" & year == 2006,
0,
Expand All @@ -139,20 +142,25 @@ agency_fund <- map_dfr(file_names, function(file) {
as.double
)
) %>%
arrange(year, agency_num, fund_num)
arrange(year, agency_num, fund_num) %>%
# Remove duplicate rows included in Clerk report in 2024 for certain
# bond funds where levy data is empty
filter(
!is.na(levy_plus_loss)
)


# agency_fund_info -------------------------------------------------------------

# Breakout the fund names into their own table
agency_fund_info <- agency_fund %>%
group_by(fund_num) %>%
group_by(fund_type_num, fund_num) %>%
summarise(fund_name = calc_mode(fund_name)) %>%
ungroup() %>%
arrange(fund_num) %>%
mutate(
fund_name = str_trim(str_squish(fund_name)),
capped_ind = !fund_num %in% c(
capped_ind = !fund_type_num %in% c(
"003", "027", "054", "182", "202", "259", "261", "284", "286", "287",
"293", "294", "315", "320", "321", "322", "351", "400", "401", "402",
"404", "405", "406", "407"
Expand All @@ -161,7 +169,7 @@ agency_fund_info <- agency_fund %>%

# Drop names from the fund table since they're now stored separately
agency_fund <- agency_fund %>%
select(-fund_name)
select(-fund_name, -fund_type_num)

# Write the resulting datasets to S3
arrow::write_parquet(
Expand Down Expand Up @@ -191,8 +199,7 @@ agency <- map_dfr(file_names, function(file) {
),
across(
c(
contains("year"), contains("agency"),
contains("reduction_type"), contains("agg_ext_base")
contains("year"), contains("agency")
),
as.character
)
Expand All @@ -202,10 +209,6 @@ agency <- map_dfr(file_names, function(file) {
rename_with(~ str_remove(.x, "_18"), ends_with("_18")) %>%
rename_with(~ str_remove(.x, "_num"), starts_with("agency")) %>%
rename_with(~ str_replace(.x, "county", "cook"), any_of("county_eav")) %>%
rename_with(~"agg_ext_base_year", any_of(c(
"agg_ext_base_year", "agg_ext_base_yr", "agg_ext_base",
"prior_year", "agg_yr"
))) %>%
rename_with(~"lim_numerator", any_of(c(
"lim_numerator", "prior_agg_ext"
))) %>%
Expand All @@ -218,66 +221,76 @@ agency <- map_dfr(file_names, function(file) {
rename_with(~"curr_new_prop", any_of(c(
"current_new_prop", "new_prop", "curr_new_prop", "current_new_property"
))) %>%
rename_with(~"lasalle_eav", any_of(c("lasalle_eav", "la_salle_eav"))) %>%
rename_with(~"mchenry_eav", any_of(c("mc_henry_eav", "mchency_eav"))) %>%
rename_with(
~"reduction_type",
any_of(c("reduction_type", "reduction"))
) %>%
rename_with(~"reduction_pct", any_of(c(
"reduction_percent", "reduction_factor", "clerk_reduction_factor"
))) %>%
rename_with(~"total_non_cap_ext", any_of(c(
"total_non_cap_ext", "final_non_cap_ext", "total_non_cap_extension"
))) %>%
rename_with(~"total_ext", any_of(c(
"total_ext", "final_ext",
"grand_total_ext"
))) %>%
rename_with(~ rep("lasalle_eav", length(.x)), any_of(c(
"lasalle_eav",
"la_salle_eav"
))) %>%
rename_with(~ rep("mchenry_eav", length(.x)), any_of(c(
"mc_henry_eav",
"mchency_eav"
))) %>%
# Select, order, and rename columns
select(
year,
agency_num = agency, agency_name, home_rule_ind, agg_ext_base_year,
authority_num = any_of("authority"),
agency_num = agency, agency_name, home_rule_ind,
lim_numerator, lim_denominator, lim_rate, prior_eav, curr_new_prop,
ends_with("_eav"), percent_burden,
reduction_pct,
starts_with("grand_total_"),
reduction_type, reduction_pct, total_non_cap_ext,
any_of("total_ext")
) %>%
rename_with(~ paste0("cty_", .x), ends_with("_eav")) %>%
select(-any_of("cty_total_eav")) %>%
rename(
prior_eav = cty_prior_eav,
cty_total_eav = cty_overall_eav,
pct_burden = percent_burden
) %>%
rename_with(
~ gsub("grand_total_", "total_", .x),
starts_with("grand_total_")
) %>%
relocate(total_ext, .after = everything())
)
}) %>%
mutate(
# One row is missing a Cook EAV value. Fill manually from prior year
cty_cook_eav = ifelse(
agency_num == "030580002" & year == "2006",
0,
cty_cook_eav
),
across(starts_with("cty_"), ~ replace_na(.x, 0)),
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Question, non-blocking] No action necessary right now, but I'm wondering about this choice to coerce these nulls to 0 in the context of 2024 data, where these County fields are always null. If we were to keep them null, we would hew closer to the actual contents of the input data, where these fields are totally missing. However, that would have the downside of requiring users to handle nulls whenever using these columns. I don't have a good enough grasp of the context to be able to make a decision, but I wanted to raise it as a quirk of the data that I noticed while QCing.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree - this was bothering me because it is not technically correct to call those 0. I think my preference would be to not replace those NAs with 0, but happy to discuss more.

agency_num = str_pad(agency_num, 9, "left", "0"),
agency_name = str_trim(str_squish(agency_name)),
agg_ext_base_year = as.integer(agg_ext_base_year),
agg_ext_base_year = na_if(agg_ext_base_year, 0),
home_rule_ind = home_rule_ind %in% c("Y", "HR", "No PTELL"),
home_rule_ind = replace_na(home_rule_ind, FALSE),
cty_overlap_eav = ifelse(year < "2024",
rowSums(across(starts_with(c(
"cty_dupage_eav",
"cty_lake_eav",
"cty_will_eav",
"cty_kane_eav",
"cty_mchenry_eav",
"cty_dekalb_eav",
"cty_kankakee_eav",
"cty_grundy_eav",
"cty_lasalle_eav",
"cty_livingston_eav"
)))),
cty_overlap_eav
),
across(
c(
starts_with("lim_"), "agg_ext_base_year", "total_reduced_levy",
starts_with("lim_"),
starts_with("reduction_")
),
~ ifelse(home_rule_ind, NA, .x)
),
# One row is missing a Cook EAV value. Fill manually from prior year
cty_cook_eav = ifelse(
agency_num == "030580002" & year == "2006",
0,
cty_cook_eav
),
across(starts_with("cty_"), ~ replace_na(.x, 0)),
# Make all percentages decimals
across(
pct_burden,
Expand All @@ -286,11 +299,6 @@ agency <- map_dfr(file_names, function(file) {
across(
reduction_pct,
~ ifelse(!year %in% c(2017), .x / 100, .x)
),
reduction_type = ifelse(
!toupper(reduction_type) %in% c("NO REDUCTION", "NONE"),
toupper(reduction_type),
NA_character_
)
) %>%
arrange(year, agency_num) %>%
Expand All @@ -307,11 +315,13 @@ agency <- map_dfr(file_names, function(file) {
across(
c(
lim_rate, pct_burden, total_prelim_rate, total_final_rate,
reduction_pct, total_non_cap_ext, total_ext
reduction_pct, total_ext
),
~ as.double(.x)
)
)
) %>%
select(-total_reduced_levy) %>%
Comment thread
kyrasturgill marked this conversation as resolved.
relocate(total_ext, .after = everything())

# Tax year 2013 is missing the total levy columns from its overview sheet, but
# we can fill it in by joining the totals from each fund sheet
Expand All @@ -322,22 +332,23 @@ agency_fund_2013 <- agency_fund %>%
total_levy = sum(levy),
total_max_levy = sum(max_levy),
total_prelim_rate = ceiling(sum(prelim_rate) * 1000) / 1000,
total_reduced_levy = sum(ptell_reduced_levy),
total_final_levy = sum(final_levy),
total_final_rate = sum(final_rate)
)

agency_2013 <- agency %>%
filter(year == 2013) %>%
mutate(cty_total_eav = as.integer64(cty_overall_eav)) %>%
select(-c(
total_levy, total_max_levy, total_prelim_rate,
total_reduced_levy, total_final_levy, total_final_rate
total_final_levy, total_final_rate
)) %>%
left_join(agency_fund_2013, by = "agency_num")

agency <- agency %>%
filter(year != 2013) %>%
bind_rows(agency_2013) %>%
select(-cty_overall_eav) %>%
arrange(year, agency_num)
Comment on lines 339 to 352
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jeancochrane - I added this code to address cty_overall_eav - this field is no longer included in 2024 reports but was previously used to define cty_total_eav which is a field in the source data but for some reason it was chosen to replace it with cty_overall_eav. I believe cty_total_eav from the source data is the correct field to use. The only issue is that the 2013 does not have the cty_total_eav, but it does have cty_overall_eav. Since we already have this code for account for the oddities of 2013, I tweaked it to also carry over cty_overall_eav to replace cty_total_eav. As mentioned above, there are a few agencies where there's a slight discrepancy between the two values - so this isn't perfect but I believe a suitable workaround.



Expand Down Expand Up @@ -549,6 +560,62 @@ agency_info <- agency_info %>%
)
)

# Load 2024 tax code agency rate file to import legacy-new agency_num crosswalk
agency_legacy_cw <-
openxlsx::read.xlsx(
"data-raw/tax_code/2024-tax-code-agency-rate-file.xlsx"
) %>%
set_names(snakecase::to_snake_case(names(.))) %>%
select(
agency_num_24 = agency,
agency_num = legacy_num,
authority_num = authority,
agency_name_24 = authority_name
) %>%
unique() %>%
# Account for error in Clerk's report which lists Village of Skokie Library
# Fund twice
filter(!(agency_num == "031170001" & agency_num_24 == "031170000")) %>%
# Correct error in Clerk's report which lists incorrect agency number for
# the TIF VIL OF OLYMPIA FIELDS-GOV HWY/VOLL
mutate(
across(
c(agency_num, agency_num_24),
~ if_else(
agency_name_24 == "TIF VIL OF OLYMPIA FIELDS-GOV HWY/VOLL",
"030930502",
.x
)
)
)

agency_info <- agency_info %>%
left_join(agency_legacy_cw, by = "agency_num") %>%
mutate(
agency_change_24 = coalesce(agency_num != agency_num_24, FALSE),
agency_num_24 =
ifelse(agency_change_24,
agency_num_24,
NA
),
agency_name_24 =
ifelse(agency_change_24,
agency_name_24,
NA
)
) %>%
select(
agency_num,
agency_name,
agency_name_short,
agency_name_original,
Comment on lines +610 to +611
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Question, non-blocking] Do you think it's a problem that we don't have short names for agencies that changed in 2024?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm I don't think I understand, do you mean that we're missing field along lines of agency_name_short_24?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's right, sorry for the lack of clarity! I don't really know what we use these short names for, so I'll leave it up to you to decide if it's a problem that they're not included in the crosswalk.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see... I am also not positive - I'm not totally confident as to how this crosswalk is going to work out so I imagine we'll likely need to revisit it. Hopefully we will have better clarity after testing it as to whether we need the other name iterations in the crosswalk.

major_type,
minor_type,
agency_num_24,
agency_name_24,
agency_change_24
Comment thread
kyrasturgill marked this conversation as resolved.
)

# Write both data sets to S3
arrow::write_parquet(
x = agency %>% select(-agency_name),
Expand Down
4 changes: 2 additions & 2 deletions data-raw/agency/tif_agency_names.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions data-raw/tax_code/2024-tax-code-agency-rate-file.xlsx
Git LFS file not shown
30 changes: 17 additions & 13 deletions data-raw/tax_code/tax_code.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,19 @@ tax_code <- map_dfr(file_names, function(file) {
df %>%
set_names(snakecase::to_snake_case(names(.))) %>%
select(-contains("year")) %>%
rename_with(
~ str_replace(.x, "taxcode", "tax_code"),
starts_with("taxcode")
) %>%
rename_with(
~ str_replace(.x, "ag_rate", "agency_rate"),
starts_with("ag_rate")
) %>%
rename_with(
~ str_replace(.x, "code_rate", "tax_code_rate"),
starts_with("code_rate")
) %>%
rename_with(~ str_remove(.x, "_24"), ends_with("_24")) %>%
rename_with(~ rep("tax_code", length(.x)), any_of(c(
"taxcode", "code"
))) %>%
rename_with(~ rep("agency_name", length(.x)), any_of(c(
"authority_name"
))) %>%
rename_with(~ rep("agency_rate", length(.x)), any_of(c(
"ag_rate", "auth_rate"
))) %>%
rename_with(~ rep("tax_code_rate", length(.x)), any_of(c(
"code_rate", "taxcode_rate"
))) %>%
mutate(
year = as.character(year_ext),
agency_rate = as.numeric(agency_rate),
Expand All @@ -57,12 +58,15 @@ tax_code <- map_dfr(file_names, function(file) {

# Clean up resulting combined data frame
tax_code <- tax_code %>%
filter(!grepl("TIF", agency_name)) %>%
select(
year,
agency_num = agency, agency_rate,
tax_code_num = tax_code, tax_code_rate
) %>%
arrange(year, agency_num, tax_code_num)
arrange(year, agency_num, tax_code_num) %>%
distinct()


arrow::write_dataset(
dataset = tax_code,
Expand Down
Git LFS file not shown
3 changes: 3 additions & 0 deletions data-raw/tif/main/2024-cook-county-tif-summary.xlsx
Git LFS file not shown
3 changes: 3 additions & 0 deletions data-raw/tif/pin_distribution/2024-tif-pin-list.xlsx
Git LFS file not shown
Loading