diff --git a/NEWS.md b/NEWS.md index 404022e..2394a7d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -40,6 +40,8 @@ be directly added to this file to describe the related changes. - Allowed user-supplied regularization functions +- Allowed driver-specific initial values and parameters + - Errors that occur while running simulations are now caught so they do not prevent an optimization from finishing @@ -51,6 +53,9 @@ be directly added to this file to describe the related changes. parameter lists are alphabetized, equals signs are aligned, and module names are preserved +- Fixed a bug where calling `objective_function` with + `dependent_arg_function = NULL` and `verbose_mode = TRUE` caused an error + # Changes in BioCroValidation Version 0.2.0 (2025-05-23) - Added 2002 and 2005 SoyFACE biomass and standard deviation data. diff --git a/R/combine_lists.R b/R/combine_lists.R new file mode 100644 index 0000000..ff8f5bd --- /dev/null +++ b/R/combine_lists.R @@ -0,0 +1,9 @@ +# Helping function for overwriting elements of a base list with elements of a +# second list; here we assume that both lists have names, and that the names of +# new_list is a subset of the names of base_list +combine_lists <- function(base_list, new_list) { + for (element_name in names(new_list)) { + base_list[[element_name]] <- new_list[[element_name]] + } + base_list +} diff --git a/R/objective_function.R b/R/objective_function.R index c03ee8a..7a93a0a 100644 --- a/R/objective_function.R +++ b/R/objective_function.R @@ -19,7 +19,11 @@ objective_function <- function( ) { # Check the data-driver pairs - check_data_driver_pairs(base_model_definition, data_driver_pairs) + check_data_driver_pairs( + base_model_definition, + data_driver_pairs, + verbose_startup + ) # Check the arguments to be varied check_args_to_vary( diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R index b376388..12d7312 100644 --- a/R/objective_function_helpers.R +++ b/R/objective_function_helpers.R @@ -46,8 +46,14 @@ get_model_runner <- function( # Build the runner tryCatch({ partial_func <- BioCro::partial_run_biocro( - base_model_definition[['initial_values']], - base_model_definition[['parameters']], + combine_lists( + base_model_definition[['initial_values']], + ddp[['initial_values']] + ), + combine_lists( + base_model_definition[['parameters']], + ddp[['parameters']] + ), ddp[['drivers']], base_model_definition[['direct_modules']], base_model_definition[['differential_modules']], diff --git a/R/objective_function_input_checks.R b/R/objective_function_input_checks.R index f8d0afb..1736c27 100644 --- a/R/objective_function_input_checks.R +++ b/R/objective_function_input_checks.R @@ -4,7 +4,7 @@ # Helping function for checking the data-driver pairs; will throw an error if # a problem is detected, and will otherwise be silent with no return value. -check_data_driver_pairs <- function(base_model_definition, data_driver_pairs) { +check_data_driver_pairs <- function(base_model_definition, data_driver_pairs, verbose) { # There must be at least one data-driver pair if (length(data_driver_pairs) < 1) { stop('`data_driver_pairs` must have at least one element') @@ -37,7 +37,7 @@ check_data_driver_pairs <- function(base_model_definition, data_driver_pairs) { } # Only required or optional elements should be provided - optional_elements <- 'data_stdev' + optional_elements <- c('data_stdev', 'initial_values', 'parameters') acceptable_elements <- c(required_elements, optional_elements) @@ -117,12 +117,132 @@ check_data_driver_pairs <- function(base_model_definition, data_driver_pairs) { stop(msg) } + # When provided, the driver-specific initial values and parameters must be + # lists + iv_is_list <- sapply(data_driver_pairs, function(x) { + iv <- x[['initial_values']] + + if (!is.null(iv)) { + is.list(iv) && !is.null(names(iv)) + } else { + TRUE + } + }) + + if (any(!iv_is_list)) { + stop( + 'When provided, the driver-specific initial values must be a list ', + 'of named elements' + ) + } + + param_is_list <- sapply(data_driver_pairs, function(x) { + param <- x[['parameters']] + + if (!is.null(param)) { + is.list(param) && !is.null(names(param)) + } else { + TRUE + } + }) + + if (any(!param_is_list)) { + stop( + 'When provided, the driver-specific parameters must be a list ', + 'of named elements' + ) + } + + # Names of driver-specific initial values and parameter names must be the + # same for all data-driver pairs + iv_names <- lapply(data_driver_pairs, function(x) { + sort(names(x[['initial_values']])) + }) + + if (length(unique(iv_names)) > 1) { + msg <- paste0( + 'The following driver-specific initial value names were provided ', + 'in the data-driver pairs:\n', + paste( + names(iv_names), ':', + sapply(iv_names, function(x) {paste(x, collapse = ', ')}), + collapse = '\n' + ), + '\nWhen provided, these names must be the same for each set of ', + 'drivers' + ) + + stop(msg) + } + + param_names <- lapply(data_driver_pairs, function(x) { + sort(names(x[['parameters']])) + }) + + if (length(unique(param_names)) > 1) { + msg <- paste0( + 'The following driver-specific parameter names were provided ', + 'in the data-driver pairs:\n', + paste( + names(param_names), ':', + sapply(param_names, function(x) {paste(x, collapse = ', ')}), + collapse = '\n' + ), + '\nWhen provided, these names must be the same for each set of ', + 'drivers' + ) + + stop(msg) + } + + # Each driver-specific initial value and parameter must be included in the + # base model definition + iv_names_to_check <- unique(unlist(iv_names)) + + iv_names_okay <- + iv_names_to_check %in% names(base_model_definition[['initial_values']]) + + if (!all(iv_names_okay)) { + bad_names <- iv_names_to_check[!iv_names_okay] + + msg <- paste( + 'The following driver-specific initial values are not included in', + 'the base model definition:', + paste0('"', bad_names, '"', collapse = ', ') + ) + + stop(msg) + } + + param_names_to_check <- unique(unlist(param_names)) + + param_names_okay <- + param_names_to_check %in% names(base_model_definition[['parameters']]) + + if (!all(param_names_okay)) { + bad_names <- param_names_to_check[!param_names_okay] + + msg <- paste( + 'The following driver-specific parameters are not included in', + 'the base model definition:', + paste0('"', bad_names, '"', collapse = ', ') + ) + + stop(msg) + } + # Each set of drivers must form a valid dynamical system along with the # base model definition valid_definitions <- sapply(data_driver_pairs, function(ddp) { BioCro::validate_dynamical_system_inputs( - base_model_definition[['initial_values']], - base_model_definition[['parameters']], + combine_lists( + base_model_definition[['initial_values']], + ddp[['initial_values']] + ), + combine_lists( + base_model_definition[['parameters']], + ddp[['parameters']] + ), ddp[['drivers']], base_model_definition[['direct_modules']], base_model_definition[['differential_modules']], @@ -141,6 +261,27 @@ check_data_driver_pairs <- function(base_model_definition, data_driver_pairs) { stop(msg) } + # Print driver-specific initial values and parameters, if necessary + if (verbose) { + cat('\nDriver-specific initial values:\n\n') + + if (all(sapply(iv_names, is.null))) { + cat(' None\n') + } else { + iv <- lapply(data_driver_pairs, function(x) {x[['initial_values']]}) + utils::str(iv) + } + + cat('\nDriver-specific parameters:\n\n') + + if (all(sapply(param_names, is.null))) { + cat(' None\n') + } else { + param <- lapply(data_driver_pairs, function(x) {x[['parameters']]}) + utils::str(param) + } + } + return(invisible(NULL)) } @@ -175,7 +316,12 @@ check_args_to_vary <- function( utils::str(independent_args) cat('\nThe dependent arguments and their initial values:\n\n') - utils::str(dependent_arg_function(independent_args)) + + if (is.null(dependent_arg_function)) { + cat(' None\n') + } else { + utils::str(dependent_arg_function(independent_args)) + } } # Make sure no drivers were specified diff --git a/man/objective_function.Rd b/man/objective_function.Rd index 21de429..b97461a 100644 --- a/man/objective_function.Rd +++ b/man/objective_function.Rd @@ -55,8 +55,8 @@ \item{data_driver_pairs}{ A list of named elements, where each element is a "data-driver pair." A data-driver pair is a list with three required elements: \code{data}, - \code{drivers}, and \code{weight}. Optionally, it may also have a - \code{data_stdev} element. + \code{drivers}, and \code{weight}. Optionally, it may also have elements + named \code{data_stdev}, \code{initial_values}, and \code{parameters}. The \code{data} element must be a data frame with one column named \code{time}, whose values follow BioCro's definition of @@ -76,6 +76,13 @@ should represent the standard deviation associated with each entry in \code{data}. If \code{data_stdev} is not supplied, all standard deviations will be set to 1. + + The optional \code{initial_values} and \code{parameters} elements must be + named lists of driver-specific initial values and parameters, respectively, + that will overwrite the default values specified in the + \code{base_model_definition}. When driver-specific \code{initial_values} are + provided, they must be provided for each of the data-driver pairs; the same + rule applies for driver-specific \code{parameters}. } \item{independent_args}{ @@ -185,7 +192,10 @@ simulation must be compared to associated sets of observed data. Here, this is handled through the \code{data_driver_pairs}, which allows the user to specify which drivers and data sets should be compared to each - other. + other. Optionally, it is also possible to specify different initial + values or parameters for each set of drivers; for example, the + atmospheric CO2 concentration may need to change for different years, + or soil properties may need to change for different locations. \item \strong{Complicated normalization:} Care must be taken to ensure that certain years or output variables are not over-valued in the error @@ -622,18 +632,21 @@ if (require(BioCro)) { # The data-driver pairs can now be created by associating each data set with # its corresponding weather data. Here we will weight the 2005 data twice as - # heavily as the 2002 data. + # heavily as the 2002 data. Note that we also specify different atmospheric + # CO2 concentrations for each year. data_driver_pairs <- list( ambient_2002 = list( data = process_table(soyface_biomass[['ambient_2002']]), data_stdev = process_table(soyface_biomass[['ambient_2002_std']]), drivers = BioCro::soybean_weather[['2002']], + parameters = list(Catm = with(BioCro::catm_data, {Catm[year == '2002']})), weight = 1 ), ambient_2005 = list( data = process_table(soyface_biomass[['ambient_2005']]), data_stdev = process_table(soyface_biomass[['ambient_2005_std']]), drivers = BioCro::soybean_weather[['2005']], + parameters = list(Catm = with(BioCro::catm_data, {Catm[year == '2005']})), weight = 2 ) ) diff --git a/tests/testthat/test-objective_function.R b/tests/testthat/test-objective_function.R index 19f4969..cf896ed 100644 --- a/tests/testthat/test-objective_function.R +++ b/tests/testthat/test-objective_function.R @@ -91,6 +91,24 @@ test_that('Objective functions can be created and behave as expected', { ) ) + # Make sure there are no errors for one data-driver pair, no dependent + # arguments in verbose mode + sink(tempfile()) + + expect_no_error( + objective_function( + model, + ddps[1], + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = TRUE + ) + ) + + sink() + # Two data-driver pairs, with dependent arguments and L2 regularization obj_fun <- expect_silent( objective_function( @@ -285,6 +303,159 @@ test_that('Data-driver pairs must have correct elements', { ) }) +test_that('Driver-specific initial values are checked and used', { + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$initial_values = 5}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'When provided, the driver-specific initial values must be a list of named elements' + ) + + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$initial_values = list(5)}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'When provided, the driver-specific initial values must be a list of named elements' + ) + + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$initial_values = list(Leaf = 5)}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The following driver-specific initial value names were provided in the data-driver pairs:\nambient_2002 : Leaf\nambient_2005 : \nWhen provided, these names must be the same for each set of drivers', + fixed = TRUE + ) + + expect_error( + objective_function( + model, + within(ddps, { + ambient_2002$initial_values = list(extra_iv = 5) + ambient_2005$initial_values = list(extra_iv = 7) + }), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The following driver-specific initial values are not included in the base model definition: "extra_iv"' + ) + + # Here we specify a "bad" initial value of Leaf for 2005. This will trigger + # an error, but the particular error may depend on the particular version of + # BioCro being used, so we don't check the error message content. + expect_error( + objective_function( + model, + within(ddps, { + ambient_2002$initial_values = list(Leaf = 0.1) # This shouldn't cause a problem + ambient_2005$initial_values = list(Leaf = -1) # This should trigger an error + }), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ) + ) +}) + +test_that('Driver-specific parameters are checked and used', { + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$parameters = 5}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'When provided, the driver-specific parameters must be a list of named elements' + ) + + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$parameters = list(5)}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'When provided, the driver-specific parameters must be a list of named elements' + ) + + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$parameters = list(Catm = 5)}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The following driver-specific parameter names were provided in the data-driver pairs:\nambient_2002 : Catm\nambient_2005 : \nWhen provided, these names must be the same for each set of drivers', + fixed = TRUE + ) + + expect_error( + objective_function( + model, + within(ddps, { + ambient_2002$parameters = list(extra_param = 5) + ambient_2005$parameters = list(extra_param = 7) + }), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The following driver-specific parameters are not included in the base model definition: "extra_param"' + ) + + # Here we specify a "bad" value of the Ball-Berry intercept for 2005. This + # will trigger an error, but the particular error may depend on the + # particular version of BioCro being used, so we don't check the error + # message content. + expect_error( + objective_function( + model, + within(ddps, { + ambient_2002$parameters = list(b0 = 0.1) # This shouldn't cause a problem + ambient_2005$parameters = list(b0 = -1) # This should trigger an error + }), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ) + ) +}) + test_that('Data must have a `time` column', { expect_error( objective_function( diff --git a/vignettes/parameterizing_soybean_biocro.Rmd b/vignettes/parameterizing_soybean_biocro.Rmd index 81ede07..3a3612e 100644 --- a/vignettes/parameterizing_soybean_biocro.Rmd +++ b/vignettes/parameterizing_soybean_biocro.Rmd @@ -163,7 +163,8 @@ process_table <- function(data_table, type) { The `BioCro` R package includes weather data for the years in the `soyface_biomass` data set. So now we are ready to define the data-driver pairs, which includes the weather, the observed biomass, the standard deviation of the -observed biomass, and the weight to assign to each year: +observed biomass, the atmospheric CO2 concentration to use for each year, and +the weight to assign to each year: ```{r data_driver_pairs} # Define the data-driver pairs @@ -172,12 +173,14 @@ data_driver_pairs <- list( data = process_table(soyface_biomass[['ambient_2002']], 'biomass'), data_stdev = process_table(soyface_biomass[['ambient_2002_std']], 'stdev'), drivers = BioCro::soybean_weather[['2002']], + parameters = list(Catm = with(BioCro::catm_data, {Catm[year == '2002']})), weight = 1 ), ambient_2005 = list( data = process_table(soyface_biomass[['ambient_2005']], 'biomass'), data_stdev = process_table(soyface_biomass[['ambient_2005_std']], 'stdev'), drivers = BioCro::soybean_weather[['2005']], + parameters = list(Catm = with(BioCro::catm_data, {Catm[year == '2005']})), weight = 1 ) ) @@ -630,9 +633,11 @@ guess. In this example, `r sum(ind_arg_table[['improved']])` out of (`r round(100 * sum(ind_arg_table[['improved']]) / nrow(ind_arg_table))` percent). We can also compare the error metric to its original value. As shown above, it -is now `r optim_res[['value']]`, which is only -`r round(100 * (optim_res[['value']] - default_error) / optim_res[['value']], 1)` -percent larger than with the default parameter values. +is now `r optim_res[['value']]`, which is +`r round(100 * abs(optim_res[['value']] - default_error) / optim_res[['value']], 1)` +percent +`r if (optim_res[['value']] > default_error) {'larger'} else {'smaller'}` +than with the default parameter values. The optimized parameter values could likely be improved by using a more stringent tolerance for the optimizer, but this would require more time to run.