-
Notifications
You must be signed in to change notification settings - Fork 4
Add 2024 agency, tax code and TIF reports #65
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
aed6791
40383a6
e0f6f03
c13d197
90c0843
5df954e
a2ee000
738eb12
76154b3
42a6abd
888e638
827ce0e
a1df266
0e9e3d4
11fffce
3b25d7c
d263e35
5c805e1
b500b4e
19d305b
1fd2d97
4de82f4
e08a921
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -76,21 +76,26 @@ agency_fund <- map_dfr(file_names, function(file) { | |
| "ptell_levy", "fund_ptell_levy", "ptell_red_levy", | ||
| "fund_ptell_reduced_levy" | ||
| ))) %>% | ||
| rename_with(~"ptell_reduced_ind", any_of(c( | ||
| "ptell_ind", "reduction_ind", "rate_reduction_indicator", | ||
| "reduction_indicator" | ||
| ))) %>% | ||
| rename_with(~"final_levy", any_of(c( | ||
| "final_levy", "fund_final_levy" | ||
| ))) %>% | ||
| rename_with(~"final_rate", any_of(c( | ||
| "fund_rate", "final_rate", "fund_final_rate", "final_fund_rate" | ||
| ))) %>% | ||
| mutate( | ||
| fund_num = | ||
| ifelse( | ||
| str_length(fund) == 3, | ||
| paste0(fund, "000"), | ||
| fund | ||
| ), | ||
| fund_type_num = substr(fund, 1, 3) | ||
| ) %>% | ||
| select( | ||
| year, | ||
| agency_num = agency, fund_num = fund, fund_name, levy, loss_pct, | ||
| agency_num = agency, fund_type_num, fund_num, fund_name, levy, loss_pct, | ||
| levy_plus_loss, rate_ceiling, max_levy, prelim_rate, ptell_reduced_levy, | ||
| ptell_reduced_ind, final_levy, final_rate | ||
| final_levy, final_rate | ||
| ) %>% | ||
| mutate(across(year, as.character)) | ||
| }) %>% | ||
|
|
@@ -120,8 +125,6 @@ agency_fund <- map_dfr(file_names, function(file) { | |
| rate_ceiling = replace_na(rate_ceiling, 0), | ||
| rate_ceiling = ifelse(final_rate == 0 & final_levy == 0, 0, rate_ceiling), | ||
| ptell_reduced_levy = na_if(ptell_reduced_levy, 0), | ||
| ptell_reduced_ind = ptell_reduced_ind == "*", | ||
| ptell_reduced_ind = replace_na(ptell_reduced_ind, FALSE), | ||
| final_rate = ifelse( | ||
| agency_num == "050200000" & fund_num == "202" & year == 2006, | ||
| 0, | ||
|
|
@@ -139,20 +142,25 @@ agency_fund <- map_dfr(file_names, function(file) { | |
| as.double | ||
| ) | ||
| ) %>% | ||
| arrange(year, agency_num, fund_num) | ||
| arrange(year, agency_num, fund_num) %>% | ||
| # Remove duplicate rows included in Clerk report in 2024 for certain | ||
| # bond funds where levy data is empty | ||
| filter( | ||
| !is.na(levy_plus_loss) | ||
| ) | ||
|
|
||
|
|
||
| # agency_fund_info ------------------------------------------------------------- | ||
|
|
||
| # Breakout the fund names into their own table | ||
| agency_fund_info <- agency_fund %>% | ||
| group_by(fund_num) %>% | ||
| group_by(fund_type_num, fund_num) %>% | ||
| summarise(fund_name = calc_mode(fund_name)) %>% | ||
| ungroup() %>% | ||
| arrange(fund_num) %>% | ||
| mutate( | ||
| fund_name = str_trim(str_squish(fund_name)), | ||
| capped_ind = !fund_num %in% c( | ||
| capped_ind = !fund_type_num %in% c( | ||
| "003", "027", "054", "182", "202", "259", "261", "284", "286", "287", | ||
| "293", "294", "315", "320", "321", "322", "351", "400", "401", "402", | ||
| "404", "405", "406", "407" | ||
|
|
@@ -161,7 +169,7 @@ agency_fund_info <- agency_fund %>% | |
|
|
||
| # Drop names from the fund table since they're now stored separately | ||
| agency_fund <- agency_fund %>% | ||
| select(-fund_name) | ||
| select(-fund_name, -fund_type_num) | ||
|
|
||
| # Write the resulting datasets to S3 | ||
| arrow::write_parquet( | ||
|
|
@@ -191,8 +199,7 @@ agency <- map_dfr(file_names, function(file) { | |
| ), | ||
| across( | ||
| c( | ||
| contains("year"), contains("agency"), | ||
| contains("reduction_type"), contains("agg_ext_base") | ||
| contains("year"), contains("agency") | ||
| ), | ||
| as.character | ||
| ) | ||
|
|
@@ -202,10 +209,6 @@ agency <- map_dfr(file_names, function(file) { | |
| rename_with(~ str_remove(.x, "_18"), ends_with("_18")) %>% | ||
| rename_with(~ str_remove(.x, "_num"), starts_with("agency")) %>% | ||
| rename_with(~ str_replace(.x, "county", "cook"), any_of("county_eav")) %>% | ||
| rename_with(~"agg_ext_base_year", any_of(c( | ||
| "agg_ext_base_year", "agg_ext_base_yr", "agg_ext_base", | ||
| "prior_year", "agg_yr" | ||
| ))) %>% | ||
| rename_with(~"lim_numerator", any_of(c( | ||
| "lim_numerator", "prior_agg_ext" | ||
| ))) %>% | ||
|
|
@@ -218,66 +221,76 @@ agency <- map_dfr(file_names, function(file) { | |
| rename_with(~"curr_new_prop", any_of(c( | ||
| "current_new_prop", "new_prop", "curr_new_prop", "current_new_property" | ||
| ))) %>% | ||
| rename_with(~"lasalle_eav", any_of(c("lasalle_eav", "la_salle_eav"))) %>% | ||
| rename_with(~"mchenry_eav", any_of(c("mc_henry_eav", "mchency_eav"))) %>% | ||
| rename_with( | ||
| ~"reduction_type", | ||
| any_of(c("reduction_type", "reduction")) | ||
| ) %>% | ||
| rename_with(~"reduction_pct", any_of(c( | ||
| "reduction_percent", "reduction_factor", "clerk_reduction_factor" | ||
| ))) %>% | ||
| rename_with(~"total_non_cap_ext", any_of(c( | ||
| "total_non_cap_ext", "final_non_cap_ext", "total_non_cap_extension" | ||
| ))) %>% | ||
| rename_with(~"total_ext", any_of(c( | ||
| "total_ext", "final_ext", | ||
| "grand_total_ext" | ||
| ))) %>% | ||
| rename_with(~ rep("lasalle_eav", length(.x)), any_of(c( | ||
| "lasalle_eav", | ||
| "la_salle_eav" | ||
| ))) %>% | ||
| rename_with(~ rep("mchenry_eav", length(.x)), any_of(c( | ||
| "mc_henry_eav", | ||
| "mchency_eav" | ||
| ))) %>% | ||
| # Select, order, and rename columns | ||
| select( | ||
| year, | ||
| agency_num = agency, agency_name, home_rule_ind, agg_ext_base_year, | ||
| authority_num = any_of("authority"), | ||
| agency_num = agency, agency_name, home_rule_ind, | ||
| lim_numerator, lim_denominator, lim_rate, prior_eav, curr_new_prop, | ||
| ends_with("_eav"), percent_burden, | ||
| reduction_pct, | ||
| starts_with("grand_total_"), | ||
| reduction_type, reduction_pct, total_non_cap_ext, | ||
| any_of("total_ext") | ||
| ) %>% | ||
| rename_with(~ paste0("cty_", .x), ends_with("_eav")) %>% | ||
| select(-any_of("cty_total_eav")) %>% | ||
| rename( | ||
| prior_eav = cty_prior_eav, | ||
| cty_total_eav = cty_overall_eav, | ||
| pct_burden = percent_burden | ||
| ) %>% | ||
| rename_with( | ||
| ~ gsub("grand_total_", "total_", .x), | ||
| starts_with("grand_total_") | ||
| ) %>% | ||
| relocate(total_ext, .after = everything()) | ||
| ) | ||
| }) %>% | ||
| mutate( | ||
| # One row is missing a Cook EAV value. Fill manually from prior year | ||
| cty_cook_eav = ifelse( | ||
| agency_num == "030580002" & year == "2006", | ||
| 0, | ||
| cty_cook_eav | ||
| ), | ||
| across(starts_with("cty_"), ~ replace_na(.x, 0)), | ||
| agency_num = str_pad(agency_num, 9, "left", "0"), | ||
| agency_name = str_trim(str_squish(agency_name)), | ||
| agg_ext_base_year = as.integer(agg_ext_base_year), | ||
| agg_ext_base_year = na_if(agg_ext_base_year, 0), | ||
| home_rule_ind = home_rule_ind %in% c("Y", "HR", "No PTELL"), | ||
| home_rule_ind = replace_na(home_rule_ind, FALSE), | ||
| cty_overlap_eav = ifelse(year < "2024", | ||
| rowSums(across(starts_with(c( | ||
| "cty_dupage_eav", | ||
| "cty_lake_eav", | ||
| "cty_will_eav", | ||
| "cty_kane_eav", | ||
| "cty_mchenry_eav", | ||
| "cty_dekalb_eav", | ||
| "cty_kankakee_eav", | ||
| "cty_grundy_eav", | ||
| "cty_lasalle_eav", | ||
| "cty_livingston_eav" | ||
| )))), | ||
| cty_overlap_eav | ||
| ), | ||
| across( | ||
| c( | ||
| starts_with("lim_"), "agg_ext_base_year", "total_reduced_levy", | ||
| starts_with("lim_"), | ||
| starts_with("reduction_") | ||
| ), | ||
| ~ ifelse(home_rule_ind, NA, .x) | ||
| ), | ||
| # One row is missing a Cook EAV value. Fill manually from prior year | ||
| cty_cook_eav = ifelse( | ||
| agency_num == "030580002" & year == "2006", | ||
| 0, | ||
| cty_cook_eav | ||
| ), | ||
| across(starts_with("cty_"), ~ replace_na(.x, 0)), | ||
| # Make all percentages decimals | ||
| across( | ||
| pct_burden, | ||
|
|
@@ -286,11 +299,6 @@ agency <- map_dfr(file_names, function(file) { | |
| across( | ||
| reduction_pct, | ||
| ~ ifelse(!year %in% c(2017), .x / 100, .x) | ||
| ), | ||
| reduction_type = ifelse( | ||
| !toupper(reduction_type) %in% c("NO REDUCTION", "NONE"), | ||
| toupper(reduction_type), | ||
| NA_character_ | ||
| ) | ||
| ) %>% | ||
| arrange(year, agency_num) %>% | ||
|
|
@@ -307,11 +315,13 @@ agency <- map_dfr(file_names, function(file) { | |
| across( | ||
| c( | ||
| lim_rate, pct_burden, total_prelim_rate, total_final_rate, | ||
| reduction_pct, total_non_cap_ext, total_ext | ||
| reduction_pct, total_ext | ||
| ), | ||
| ~ as.double(.x) | ||
| ) | ||
| ) | ||
| ) %>% | ||
| select(-total_reduced_levy) %>% | ||
|
kyrasturgill marked this conversation as resolved.
|
||
| relocate(total_ext, .after = everything()) | ||
|
|
||
| # Tax year 2013 is missing the total levy columns from its overview sheet, but | ||
| # we can fill it in by joining the totals from each fund sheet | ||
|
|
@@ -322,22 +332,23 @@ agency_fund_2013 <- agency_fund %>% | |
| total_levy = sum(levy), | ||
| total_max_levy = sum(max_levy), | ||
| total_prelim_rate = ceiling(sum(prelim_rate) * 1000) / 1000, | ||
| total_reduced_levy = sum(ptell_reduced_levy), | ||
| total_final_levy = sum(final_levy), | ||
| total_final_rate = sum(final_rate) | ||
| ) | ||
|
|
||
| agency_2013 <- agency %>% | ||
| filter(year == 2013) %>% | ||
| mutate(cty_total_eav = as.integer64(cty_overall_eav)) %>% | ||
| select(-c( | ||
| total_levy, total_max_levy, total_prelim_rate, | ||
| total_reduced_levy, total_final_levy, total_final_rate | ||
| total_final_levy, total_final_rate | ||
| )) %>% | ||
| left_join(agency_fund_2013, by = "agency_num") | ||
|
|
||
| agency <- agency %>% | ||
| filter(year != 2013) %>% | ||
| bind_rows(agency_2013) %>% | ||
| select(-cty_overall_eav) %>% | ||
| arrange(year, agency_num) | ||
|
Comment on lines
339
to
352
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jeancochrane - I added this code to address |
||
|
|
||
|
|
||
|
|
@@ -549,6 +560,62 @@ agency_info <- agency_info %>% | |
| ) | ||
| ) | ||
|
|
||
| # Load 2024 tax code agency rate file to import legacy-new agency_num crosswalk | ||
| agency_legacy_cw <- | ||
| openxlsx::read.xlsx( | ||
| "data-raw/tax_code/2024-tax-code-agency-rate-file.xlsx" | ||
| ) %>% | ||
| set_names(snakecase::to_snake_case(names(.))) %>% | ||
| select( | ||
| agency_num_24 = agency, | ||
| agency_num = legacy_num, | ||
| authority_num = authority, | ||
| agency_name_24 = authority_name | ||
| ) %>% | ||
| unique() %>% | ||
| # Account for error in Clerk's report which lists Village of Skokie Library | ||
| # Fund twice | ||
| filter(!(agency_num == "031170001" & agency_num_24 == "031170000")) %>% | ||
| # Correct error in Clerk's report which lists incorrect agency number for | ||
| # the TIF VIL OF OLYMPIA FIELDS-GOV HWY/VOLL | ||
| mutate( | ||
| across( | ||
| c(agency_num, agency_num_24), | ||
| ~ if_else( | ||
| agency_name_24 == "TIF VIL OF OLYMPIA FIELDS-GOV HWY/VOLL", | ||
| "030930502", | ||
| .x | ||
| ) | ||
| ) | ||
| ) | ||
|
|
||
| agency_info <- agency_info %>% | ||
| left_join(agency_legacy_cw, by = "agency_num") %>% | ||
| mutate( | ||
| agency_change_24 = coalesce(agency_num != agency_num_24, FALSE), | ||
| agency_num_24 = | ||
| ifelse(agency_change_24, | ||
| agency_num_24, | ||
| NA | ||
| ), | ||
| agency_name_24 = | ||
| ifelse(agency_change_24, | ||
| agency_name_24, | ||
| NA | ||
| ) | ||
| ) %>% | ||
| select( | ||
| agency_num, | ||
| agency_name, | ||
| agency_name_short, | ||
| agency_name_original, | ||
|
Comment on lines
+610
to
+611
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [Question, non-blocking] Do you think it's a problem that we don't have short names for agencies that changed in 2024?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm I don't think I understand, do you mean that we're missing field along lines of
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's right, sorry for the lack of clarity! I don't really know what we use these short names for, so I'll leave it up to you to decide if it's a problem that they're not included in the crosswalk.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see... I am also not positive - I'm not totally confident as to how this crosswalk is going to work out so I imagine we'll likely need to revisit it. Hopefully we will have better clarity after testing it as to whether we need the other name iterations in the crosswalk. |
||
| major_type, | ||
| minor_type, | ||
| agency_num_24, | ||
| agency_name_24, | ||
| agency_change_24 | ||
|
kyrasturgill marked this conversation as resolved.
|
||
| ) | ||
|
|
||
| # Write both data sets to S3 | ||
| arrow::write_parquet( | ||
| x = agency %>% select(-agency_name), | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[Question, non-blocking] No action necessary right now, but I'm wondering about this choice to coerce these nulls to 0 in the context of 2024 data, where these County fields are always null. If we were to keep them null, we would hew closer to the actual contents of the input data, where these fields are totally missing. However, that would have the downside of requiring users to handle nulls whenever using these columns. I don't have a good enough grasp of the context to be able to make a decision, but I wanted to raise it as a quirk of the data that I noticed while QCing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree - this was bothering me because it is not technically correct to call those 0. I think my preference would be to not replace those NAs with 0, but happy to discuss more.