ccao-data · kyrasturgill · Mar 4, 2026 · Dec 16, 2025 · Dec 16, 2025 · Jan 13, 2026
@@ -76,21 +76,26 @@ agency_fund <- map_dfr(file_names, function(file) {
       "ptell_levy", "fund_ptell_levy", "ptell_red_levy",
       "fund_ptell_reduced_levy"
     ))) %>%
-    rename_with(~"ptell_reduced_ind", any_of(c(
-      "ptell_ind", "reduction_ind", "rate_reduction_indicator",
-      "reduction_indicator"
-    ))) %>%
     rename_with(~"final_levy", any_of(c(
       "final_levy", "fund_final_levy"
     ))) %>%
     rename_with(~"final_rate", any_of(c(
       "fund_rate", "final_rate", "fund_final_rate", "final_fund_rate"
     ))) %>%
+    mutate(
+      fund_num =
+        ifelse(
+          str_length(fund) == 3,
+          paste0(fund, "000"),
+          fund
+        ),
+      fund_type_num = substr(fund, 1, 3)
+    ) %>%
     select(
       year,
-      agency_num = agency, fund_num = fund, fund_name, levy, loss_pct,
+      agency_num = agency, fund_type_num, fund_num, fund_name, levy, loss_pct,
       levy_plus_loss, rate_ceiling, max_levy, prelim_rate, ptell_reduced_levy,
-      ptell_reduced_ind, final_levy, final_rate
+      final_levy, final_rate
     ) %>%
     mutate(across(year, as.character))
 }) %>%
@@ -120,8 +125,6 @@ agency_fund <- map_dfr(file_names, function(file) {
     rate_ceiling = replace_na(rate_ceiling, 0),
     rate_ceiling = ifelse(final_rate == 0 & final_levy == 0, 0, rate_ceiling),
     ptell_reduced_levy = na_if(ptell_reduced_levy, 0),
-    ptell_reduced_ind = ptell_reduced_ind == "*",
-    ptell_reduced_ind = replace_na(ptell_reduced_ind, FALSE),
     final_rate = ifelse(
       agency_num == "050200000" & fund_num == "202" & year == 2006,
       0,
@@ -139,20 +142,25 @@ agency_fund <- map_dfr(file_names, function(file) {
       as.double
     )
   ) %>%
-  arrange(year, agency_num, fund_num)
+  arrange(year, agency_num, fund_num) %>%
+  # Remove duplicate rows included in Clerk report in 2024 for certain
+  # bond funds where levy data is empty
+  filter(
+    !is.na(levy_plus_loss)
+  )
 
 
 # agency_fund_info -------------------------------------------------------------
 
 # Breakout the fund names into their own table
 agency_fund_info <- agency_fund %>%
-  group_by(fund_num) %>%
+  group_by(fund_type_num, fund_num) %>%
   summarise(fund_name = calc_mode(fund_name)) %>%
   ungroup() %>%
   arrange(fund_num) %>%
   mutate(
     fund_name = str_trim(str_squish(fund_name)),
-    capped_ind = !fund_num %in% c(
+    capped_ind = !fund_type_num %in% c(
       "003", "027", "054", "182", "202", "259", "261", "284", "286", "287",
       "293", "294", "315", "320", "321", "322", "351", "400", "401", "402",
       "404", "405", "406", "407"
@@ -161,7 +169,7 @@ agency_fund_info <- agency_fund %>%
 
 # Drop names from the fund table since they're now stored separately
 agency_fund <- agency_fund %>%
-  select(-fund_name)
+  select(-fund_name, -fund_type_num)
 
 # Write the resulting datasets to S3
 arrow::write_parquet(
@@ -191,8 +199,7 @@ agency <- map_dfr(file_names, function(file) {
       ),
       across(
         c(
-          contains("year"), contains("agency"),
-          contains("reduction_type"), contains("agg_ext_base")
+          contains("year"), contains("agency")
         ),
         as.character
       )
@@ -202,10 +209,6 @@ agency <- map_dfr(file_names, function(file) {
     rename_with(~ str_remove(.x, "_18"), ends_with("_18")) %>%
     rename_with(~ str_remove(.x, "_num"), starts_with("agency")) %>%
     rename_with(~ str_replace(.x, "county", "cook"), any_of("county_eav")) %>%
-    rename_with(~"agg_ext_base_year", any_of(c(
-      "agg_ext_base_year", "agg_ext_base_yr", "agg_ext_base",
-      "prior_year", "agg_yr"
-    ))) %>%
     rename_with(~"lim_numerator", any_of(c(
       "lim_numerator", "prior_agg_ext"
     ))) %>%
@@ -218,66 +221,76 @@ agency <- map_dfr(file_names, function(file) {
     rename_with(~"curr_new_prop", any_of(c(
       "current_new_prop", "new_prop", "curr_new_prop", "current_new_property"
     ))) %>%
-    rename_with(~"lasalle_eav", any_of(c("lasalle_eav", "la_salle_eav"))) %>%
-    rename_with(~"mchenry_eav", any_of(c("mc_henry_eav", "mchency_eav"))) %>%
-    rename_with(
-      ~"reduction_type",
-      any_of(c("reduction_type", "reduction"))
-    ) %>%
     rename_with(~"reduction_pct", any_of(c(
       "reduction_percent", "reduction_factor", "clerk_reduction_factor"
     ))) %>%
-    rename_with(~"total_non_cap_ext", any_of(c(
-      "total_non_cap_ext", "final_non_cap_ext", "total_non_cap_extension"
-    ))) %>%
     rename_with(~"total_ext", any_of(c(
       "total_ext", "final_ext",
       "grand_total_ext"
     ))) %>%
+    rename_with(~ rep("lasalle_eav", length(.x)), any_of(c(
+      "lasalle_eav",
+      "la_salle_eav"
+    ))) %>%
+    rename_with(~ rep("mchenry_eav", length(.x)), any_of(c(
+      "mc_henry_eav",
+      "mchency_eav"
+    ))) %>%
     # Select, order, and rename columns
     select(
       year,
-      agency_num = agency, agency_name, home_rule_ind, agg_ext_base_year,
+      authority_num = any_of("authority"),
+      agency_num = agency, agency_name, home_rule_ind,
       lim_numerator, lim_denominator, lim_rate, prior_eav, curr_new_prop,
       ends_with("_eav"), percent_burden,
+      reduction_pct,
       starts_with("grand_total_"),
-      reduction_type, reduction_pct, total_non_cap_ext,
       any_of("total_ext")
     ) %>%
     rename_with(~ paste0("cty_", .x), ends_with("_eav")) %>%
-    select(-any_of("cty_total_eav")) %>%
     rename(
       prior_eav = cty_prior_eav,
-      cty_total_eav = cty_overall_eav,
       pct_burden = percent_burden
     ) %>%
     rename_with(
       ~ gsub("grand_total_", "total_", .x),
       starts_with("grand_total_")
-    ) %>%
-    relocate(total_ext, .after = everything())
+    )
 }) %>%
   mutate(
+    # One row is missing a Cook EAV value. Fill manually from prior year
+    cty_cook_eav = ifelse(
+      agency_num == "030580002" & year == "2006",
+      0,
+      cty_cook_eav
+    ),
+    across(starts_with("cty_"), ~ replace_na(.x, 0)),
     agency_num = str_pad(agency_num, 9, "left", "0"),
     agency_name = str_trim(str_squish(agency_name)),
-    agg_ext_base_year = as.integer(agg_ext_base_year),
-    agg_ext_base_year = na_if(agg_ext_base_year, 0),
     home_rule_ind = home_rule_ind %in% c("Y", "HR", "No PTELL"),
     home_rule_ind = replace_na(home_rule_ind, FALSE),
+    cty_overlap_eav = ifelse(year < "2024",
+      rowSums(across(starts_with(c(
+        "cty_dupage_eav",
+        "cty_lake_eav",
+        "cty_will_eav",
+        "cty_kane_eav",
+        "cty_mchenry_eav",
+        "cty_dekalb_eav",
+        "cty_kankakee_eav",
+        "cty_grundy_eav",
+        "cty_lasalle_eav",
+        "cty_livingston_eav"
+      )))),
+      cty_overlap_eav
+    ),
     across(
       c(
-        starts_with("lim_"), "agg_ext_base_year", "total_reduced_levy",
+        starts_with("lim_"),
         starts_with("reduction_")
       ),
       ~ ifelse(home_rule_ind, NA, .x)
     ),
-    # One row is missing a Cook EAV value. Fill manually from prior year
-    cty_cook_eav = ifelse(
-      agency_num == "030580002" & year == "2006",
-      0,
-      cty_cook_eav
-    ),
-    across(starts_with("cty_"), ~ replace_na(.x, 0)),
     # Make all percentages decimals
     across(
       pct_burden,
@@ -286,11 +299,6 @@ agency <- map_dfr(file_names, function(file) {
     across(
       reduction_pct,
       ~ ifelse(!year %in% c(2017), .x / 100, .x)
-    ),
-    reduction_type = ifelse(
-      !toupper(reduction_type) %in% c("NO REDUCTION", "NONE"),
-      toupper(reduction_type),
-      NA_character_
     )
   ) %>%
   arrange(year, agency_num) %>%
@@ -307,11 +315,13 @@ agency <- map_dfr(file_names, function(file) {
     across(
       c(
         lim_rate, pct_burden, total_prelim_rate, total_final_rate,
-        reduction_pct, total_non_cap_ext, total_ext
+        reduction_pct, total_ext
       ),
       ~ as.double(.x)
     )
-  )
+  ) %>%
+  select(-total_reduced_levy) %>%
+  relocate(total_ext, .after = everything())
 
 # Tax year 2013 is missing the total levy columns from its overview sheet, but
 # we can fill it in by joining the totals from each fund sheet
@@ -322,22 +332,23 @@ agency_fund_2013 <- agency_fund %>%
     total_levy = sum(levy),
     total_max_levy = sum(max_levy),
     total_prelim_rate = ceiling(sum(prelim_rate) * 1000) / 1000,
-    total_reduced_levy = sum(ptell_reduced_levy),
     total_final_levy = sum(final_levy),
     total_final_rate = sum(final_rate)
   )
 
 agency_2013 <- agency %>%
   filter(year == 2013) %>%
+  mutate(cty_total_eav = as.integer64(cty_overall_eav)) %>%
   select(-c(
     total_levy, total_max_levy, total_prelim_rate,
-    total_reduced_levy, total_final_levy, total_final_rate
+    total_final_levy, total_final_rate
   )) %>%
   left_join(agency_fund_2013, by = "agency_num")
 
 agency <- agency %>%
   filter(year != 2013) %>%
   bind_rows(agency_2013) %>%
+  select(-cty_overall_eav) %>%
   arrange(year, agency_num)
 
 
@@ -549,6 +560,62 @@ agency_info <- agency_info %>%
     )
   )
 
+# Load 2024 tax code agency rate file to import legacy-new agency_num crosswalk
+agency_legacy_cw <-
+  openxlsx::read.xlsx(
+    "data-raw/tax_code/2024-tax-code-agency-rate-file.xlsx"
+  ) %>%
+  set_names(snakecase::to_snake_case(names(.))) %>%
+  select(
+    agency_num_24 = agency,
+    agency_num = legacy_num,
+    authority_num = authority,
+    agency_name_24 = authority_name
+  ) %>%
+  unique() %>%
+  # Account for error in Clerk's report which lists Village of Skokie Library
+  # Fund twice
+  filter(!(agency_num == "031170001" & agency_num_24 == "031170000")) %>%
+  # Correct error in Clerk's report which lists incorrect agency number for
+  # the TIF VIL OF OLYMPIA FIELDS-GOV HWY/VOLL
+  mutate(
+    across(
+      c(agency_num, agency_num_24),
+      ~ if_else(
+        agency_name_24 == "TIF VIL OF OLYMPIA FIELDS-GOV HWY/VOLL",
+        "030930502",
+        .x
+      )
+    )
+  )
+
+agency_info <- agency_info %>%
+  left_join(agency_legacy_cw, by = "agency_num") %>%
+  mutate(
+    agency_change_24 = coalesce(agency_num != agency_num_24, FALSE),
+    agency_num_24 =
+      ifelse(agency_change_24,
+        agency_num_24,
+        NA
+      ),
+    agency_name_24 =
+      ifelse(agency_change_24,
+        agency_name_24,
+        NA
+      )
+  ) %>%
+  select(
+    agency_num,
+    agency_name,
+    agency_name_short,
+    agency_name_original,
+    major_type,
+    minor_type,
+    agency_num_24,
+    agency_name_24,
+    agency_change_24
+  )
+
 # Write both data sets to S3
 arrow::write_parquet(
   x = agency %>% select(-agency_name),

@@ -36,18 +36,19 @@ tax_code <- map_dfr(file_names, function(file) {
   df %>%
     set_names(snakecase::to_snake_case(names(.))) %>%
     select(-contains("year")) %>%
-    rename_with(
-      ~ str_replace(.x, "taxcode", "tax_code"),
-      starts_with("taxcode")
-    ) %>%
-    rename_with(
-      ~ str_replace(.x, "ag_rate", "agency_rate"),
-      starts_with("ag_rate")
-    ) %>%
-    rename_with(
-      ~ str_replace(.x, "code_rate", "tax_code_rate"),
-      starts_with("code_rate")
-    ) %>%
+    rename_with(~ str_remove(.x, "_24"), ends_with("_24")) %>%
+    rename_with(~ rep("tax_code", length(.x)), any_of(c(
+      "taxcode", "code"
+    ))) %>%
+    rename_with(~ rep("agency_name", length(.x)), any_of(c(
+      "authority_name"
+    ))) %>%
+    rename_with(~ rep("agency_rate", length(.x)), any_of(c(
+      "ag_rate", "auth_rate"
+    ))) %>%
+    rename_with(~ rep("tax_code_rate", length(.x)), any_of(c(
+      "code_rate", "taxcode_rate"
+    ))) %>%
     mutate(
       year = as.character(year_ext),
       agency_rate = as.numeric(agency_rate),
@@ -57,12 +58,15 @@ tax_code <- map_dfr(file_names, function(file) {
 
 # Clean up resulting combined data frame
 tax_code <- tax_code %>%
+  filter(!grepl("TIF", agency_name)) %>%
   select(
     year,
     agency_num = agency, agency_rate,
     tax_code_num = tax_code, tax_code_rate
   ) %>%
-  arrange(year, agency_num, tax_code_num)
+  arrange(year, agency_num, tax_code_num) %>%
+  distinct()
+
 
 arrow::write_dataset(
   dataset = tax_code,