From e454936a25a5badeb563a54a88383617fb2d28bc Mon Sep 17 00:00:00 2001 From: njtierney Date: Mon, 24 Oct 2022 18:35:32 +0800 Subject: [PATCH 1/2] initial start on #106 --- R/age_ranges.R | 15 +++++++ R/aggregate_predicted_contacts.R | 4 +- R/make_bam_predictions.R | 39 +++++++++++++++++ R/predict_contacts.R | 9 ++-- R/predict_contacts_1y.R | 75 ++++++++++++-------------------- R/predict_contacts_flexible.R | 16 +++++++ R/predict_setting_contacts.R | 6 +-- 7 files changed, 107 insertions(+), 57 deletions(-) create mode 100644 R/age_ranges.R create mode 100644 R/make_bam_predictions.R create mode 100644 R/predict_contacts_flexible.R diff --git a/R/age_ranges.R b/R/age_ranges.R new file mode 100644 index 00000000..7d47064f --- /dev/null +++ b/R/age_ranges.R @@ -0,0 +1,15 @@ +#' @title +#' @param ages vector of ages +#' @noRd +#' @return vector length 1 of min and max ages used in calculating age +#' @author njtierney +age_ranges <- function(ages) { + + min_age <- min(ages) + bin_widths <- diff(ages) + final_bin_width <- bin_widths[length(bin_widths)] + age_max_integration <- max(ages) + final_bin_width + + c(min_age, age_max_integration) + +} \ No newline at end of file diff --git a/R/aggregate_predicted_contacts.R b/R/aggregate_predicted_contacts.R index 532b3ce2..4a59fcba 100644 --- a/R/aggregate_predicted_contacts.R +++ b/R/aggregate_predicted_contacts.R @@ -71,8 +71,8 @@ aggregate_predicted_contacts <- function(predicted_contacts_1y, .groups = "drop" ) %>% # *average* the total contacts within the 'from' contacts, weighted by the - # population distribution (to get contacts for the population-average ember of - # that age group) + # population distribution (to get contacts for the population-average + # ember of that age group) dplyr::mutate( pop_age_from = age_population_function(age_from), age_group_from = cut( diff --git a/R/make_bam_predictions.R b/R/make_bam_predictions.R new file mode 100644 index 00000000..2248e42a --- /dev/null +++ b/R/make_bam_predictions.R @@ -0,0 +1,39 @@ +#' .. content for \description{} (no empty lines) .. +#' +#' .. content for \details{} .. +#' +#' @title + +#' @return +#' @author njtierney +#' @export +make_bam_predictions <- function(x) { + + x %>% + # add on prediction features, setting the population to predict to + add_modelling_features( + population = population + ) %>% + dplyr::mutate( + # prediction + contacts = mgcv::predict.bam( + model, + newdata = ., + type = "response" + ), + # uncertainty + se_contacts = mgcv::predict.bam( + model, + newdata = ., + type = "response", + se.fit = TRUE + )$se.fit + ) %>% + dplyr::select( + age_from, + age_to, + contacts, + se_contacts + ) + +} diff --git a/R/predict_contacts.R b/R/predict_contacts.R index 6898b716..0a4f538f 100644 --- a/R/predict_contacts.R +++ b/R/predict_contacts.R @@ -70,11 +70,10 @@ predict_contacts <- function(model, population <- population %>% dplyr::arrange(lower.age.limit) - # this could be changed to a function for lower age limit - age_min_integration <- min(population$lower.age.limit) - bin_widths <- diff(population$lower.age.limit) - final_bin_width <- bin_widths[length(bin_widths)] - age_max_integration <- max(population$lower.age.limit) + final_bin_width + # get the age ranges plus the final age bin (which is the bin width) + the_age_ranges <- age_ranges(population$lower.age.limit) + age_min_integration <- min(the_age_ranges) + age_max_integration <- max(the_age_ranges) # need to check we are not predicting to 0 populations (interpolator can # predict 0 values, then the aggregated ages get screwed up) diff --git a/R/predict_contacts_1y.R b/R/predict_contacts_1y.R index e48c0d79..2692d18f 100644 --- a/R/predict_contacts_1y.R +++ b/R/predict_contacts_1y.R @@ -2,39 +2,39 @@ #' #' @description Provides a predicted rate of contacts for contact ages. #' Take an already fitted model of contact rate and predict the -#' estimated contact rate, and standard error, for all combinations of the -#' provided ages in 1 year increments. So if the minimum age is 5, and the -#' maximum age is 10, it will provide the estimated contact rate for all age -#' combinations: 5 and 5, 5 and 6 ... 5 and 10, and so on. This function is -#' used internally within [predict_contacts()], and thus +#' estimated contact rate, and standard error, for all combinations of the +#' provided ages in 1 year increments. So if the minimum age is 5, and the +#' maximum age is 10, it will provide the estimated contact rate for all age +#' combinations: 5 and 5, 5 and 6 ... 5 and 10, and so on. This function is +#' used internally within [predict_contacts()], and thus #' [predict_setting_contacts()] as well, although it can be used by itself. #' See examples for more details, and details for more information. -#' -#' @details Prediction features are added using [add_modelling_features()]. -#' These features include the population distribution of contact ages, -#' fraction of population in each age group that attend school/work as well +#' +#' @details Prediction features are added using [add_modelling_features()]. +#' These features include the population distribution of contact ages, +#' fraction of population in each age group that attend school/work as well #' as the offset according to the settings on all combinations of the #' participant & contact ages. -#' +#' #' @param model A single fitted model of contact rate (e.g., -#' [fit_single_contact_model()]) -#' @param population a dataframe of age population information, with columns -#' indicating some lower age limit, and population, (e.g., +#' [fit_single_contact_model()]) +#' @param population a dataframe of age population information, with columns +#' indicating some lower age limit, and population, (e.g., #' [get_polymod_population()]) #' @param age_min Age range minimum value. Default: 0 #' @param age_max Age range maximum value, Default: 100 #' @return Data frame with four columns: `age_from`, `age_to`, `contacts`, and -#' `se_contacts`. This contains the participant & contact ages from the -#' minimum and maximum ages provided along with the predicted rate of +#' `se_contacts`. This contains the participant & contact ages from the +#' minimum and maximum ages provided along with the predicted rate of #' contacts and standard error around the prediction. #' @examples -#' +#' #' fairfield_abs_data <- abs_age_lga("Fairfield (C)") -#' +#' #' fairfield_abs_data -#' +#' #' # predict the contact rates in 1 year blocks to Fairfield data -#' +#' #' fairfield_contacts_1 <- predict_contacts_1y( #' model = polymod_setting_models$home, #' population = fairfield_abs_data, @@ -42,38 +42,19 @@ #' age_max = 2 #' ) #' @export -predict_contacts_1y <- function(model, population, age_min = 0, age_max = 100) { - +predict_contacts_1y <- function( + model, + population, + age_min = 0, + age_max = 100 +) { all_ages <- age_min:age_max - # predict contacts to all integer years, adjusting for the population in a given place + # predict contacts to all integer years, adjusting for the population in a + # given place tidyr::expand_grid( age_from = all_ages, age_to = all_ages, ) %>% - # add on prediction features, setting the population to predict to - add_modelling_features( - population = population - ) %>% - dplyr::mutate( - # prediction - contacts = mgcv::predict.bam( - model, - newdata = ., - type = "response" - ), - # uncertainty - se_contacts = mgcv::predict.bam( - model, - newdata = ., - type = "response", - se.fit = TRUE - )$se.fit - ) %>% - dplyr::select( - age_from, - age_to, - contacts, - se_contacts - ) + make_bam_predictions() } diff --git a/R/predict_contacts_flexible.R b/R/predict_contacts_flexible.R new file mode 100644 index 00000000..cbb116c7 --- /dev/null +++ b/R/predict_contacts_flexible.R @@ -0,0 +1,16 @@ +# taken from predict_contacts_1y +#' @export +predict_contacts_flexible <- function( + model, + population, + age_vector +) { + + # predict contacts to all given years + tidyr::expand_grid( + age_from = age_vector, + age_to = age_vector, + ) %>% + make_bam_predictions() + +} diff --git a/R/predict_setting_contacts.R b/R/predict_setting_contacts.R index dab58337..a45a9d99 100644 --- a/R/predict_setting_contacts.R +++ b/R/predict_setting_contacts.R @@ -87,12 +87,12 @@ predict_setting_contacts <- function(population, model_per_capita_household_size = get_polymod_per_capita_household_size()) { - setting_predictions <- furrr::future_map( + setting_predictions <- purrr::map( .x = contact_model, .f = predict_contacts, population = population, - age_breaks = age_breaks, - .options = furrr::furrr_options(seed = TRUE) + age_breaks = age_breaks + # .options = furrr::furrr_options(seed = TRUE) ) setting_matrices <- furrr::future_map( From 50ef110a774dbfea6c8583c921afed5656546dca Mon Sep 17 00:00:00 2001 From: njtierney Date: Wed, 30 Nov 2022 13:40:25 +0800 Subject: [PATCH 2/2] minor style changes --- R/add_population_age_to.R | 4 +++- R/aggregate_predicted_contacts.R | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/add_population_age_to.R b/R/add_population_age_to.R index 3a52b6ca..dd062b3f 100644 --- a/R/add_population_age_to.R +++ b/R/add_population_age_to.R @@ -29,7 +29,9 @@ add_population_age_to <- function(contact_data, population = get_polymod_population()) { # get function to interpolate population age distributions to 1y bins - age_population_function <- get_age_population_function(population) + age_population_function <- get_age_population_function( + population + ) # add the population in each 'to' age for the survey context contact_data %>% diff --git a/R/aggregate_predicted_contacts.R b/R/aggregate_predicted_contacts.R index 4a59fcba..1551686b 100644 --- a/R/aggregate_predicted_contacts.R +++ b/R/aggregate_predicted_contacts.R @@ -72,7 +72,7 @@ aggregate_predicted_contacts <- function(predicted_contacts_1y, ) %>% # *average* the total contacts within the 'from' contacts, weighted by the # population distribution (to get contacts for the population-average - # ember of that age group) + # member of that age group) dplyr::mutate( pop_age_from = age_population_function(age_from), age_group_from = cut(