diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index acfefe6..69208fb 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -25,7 +25,7 @@ jobs: - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - {os: ubuntu-latest, r: 'oldrel-1'} - - {os: ubuntu-latest, r: '3.6.0'} + - {os: ubuntu-latest, r: '4.1.0'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} diff --git a/DESCRIPTION b/DESCRIPTION index 0e388e9..7eee250 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: BioCroValidation -Version: 0.2.0 +Version: 0.3.0 Title: Tools for Validating BioCro Models Description: A collection of tools for validating BioCro crop growth models. Authors@R: c( @@ -9,7 +9,7 @@ Authors@R: c( person("BioCroValidation authors", role = "cph") ) Depends: - R (>= 3.6.0) + R (>= 4.1.0) Imports: BioCro (>= 3.2.0) Suggests: @@ -18,10 +18,10 @@ Suggests: lattice, knitr, rmarkdown, - testthat (>= 3.0.0) + testthat (>= 3.2.0) VignetteBuilder: knitr License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -URL: https://github.com/BioCro/BioCroValidation, https://biocro.github.io/BioCroValidation/ +URL: https://github.com/BioCro/BioCroValidation, https://biocro.org/BioCroValidation/ Config/testthat/edition: 3 diff --git a/NEWS.md b/NEWS.md index 45cd934..f10903e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -33,6 +33,29 @@ In the case of a hotfix, a short section headed by the new release number should be directly added to this file to describe the related changes. --> +# Changes in BioCroValidation Version 0.3.0 (2026-03-11) + +- Fixed typos in the help page for `objective_function`, and in the `add_norm` + function (defined in `R/objective_function_helpers.R`) + +- Allowed user-supplied regularization functions + +- Allowed driver-specific initial values and parameters + +- Errors that occur while running simulations are now caught so they do not + prevent an optimization from finishing + +- More options for `debug_mode` are now available; the default setting + (`minimal`) only prints info to the terminal when an issue with a simulation + occurs + +- Improved formatting of output from `write_module`, so that initial value and + parameter lists are alphabetized, equals signs are aligned, and module names + are preserved + +- Fixed a bug where calling `objective_function` with + `dependent_arg_function = NULL` and `verbose_mode = TRUE` caused an error + # Changes in BioCroValidation Version 0.2.0 (2025-05-23) - Added 2002 and 2005 SoyFACE biomass and standard deviation data. diff --git a/R/objective_function.R b/R/objective_function.R index 6c21e00..7a93a0a 100644 --- a/R/objective_function.R +++ b/R/objective_function.R @@ -19,7 +19,11 @@ objective_function <- function( ) { # Check the data-driver pairs - check_data_driver_pairs(base_model_definition, data_driver_pairs) + check_data_driver_pairs( + base_model_definition, + data_driver_pairs, + verbose_startup + ) # Check the arguments to be varied check_args_to_vary( @@ -76,14 +80,16 @@ objective_function <- function( long_form_data, normalization_method, normalization_param, - length(data_driver_pairs) + length(data_driver_pairs), + verbose_startup ) # Add variance-based weights long_form_data <- add_w_var( long_form_data, stdev_weight_method, - stdev_weight_param + stdev_weight_param, + verbose_startup ) # Print the long form data, if desired. Do this before checking the data, @@ -109,11 +115,15 @@ objective_function <- function( # Get the data-driver pair weights ddp_weights <- get_ddp_weights(data_driver_pairs) - # Print the data-driver pair weights, if desired - if (verbose_startup) { - cat('\nThe user-supplied data-driver pair weights:\n\n') - utils::str(ddp_weights) - } + # Print additional startup information, if desired + print_misc_verbose_startup( + ddp_weights, + regularization_method, + dependent_arg_function, + post_process_function, + extra_penalty_function, + verbose_startup + ) # Create the objective function obj_fun <- get_obj_fun( diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R index ae5ef09..45d6bfc 100644 --- a/R/objective_function_helpers.R +++ b/R/objective_function_helpers.R @@ -4,6 +4,13 @@ # Value to return when a simulation fails to run FAILURE_VALUE <- 1e10 +# Define the allowed debug modes +DEBUG_MODES <- c( + 'none', + 'minimal', + 'everything' +) + # Helping function for getting a full list of argument names get_full_arg_names <- function(independent_args, dependent_arg_function) { # Get the independent argument names @@ -39,8 +46,14 @@ get_model_runner <- function( # Build the runner tryCatch({ partial_func <- BioCro::partial_run_biocro( - base_model_definition[['initial_values']], - base_model_definition[['parameters']], + utils::modifyList( + base_model_definition[['initial_values']], + c(list(), ddp[['initial_values']]) + ), + utils::modifyList( + base_model_definition[['parameters']], + c(list(), ddp[['parameters']]) + ), ddp[['drivers']], base_model_definition[['direct_modules']], base_model_definition[['differential_modules']], @@ -188,9 +201,32 @@ add_norm <- function( long_form_data, normalization_method, normalization_param, - n_ddp + n_ddp, + verbose_startup ) { + eps_max <- get_param_default(normalization_param, 1e-1) + eps_obs <- get_param_default(normalization_param, 1e-1) + + method <- toupper(normalization_method) + + if (verbose_startup) { + method_info <- if (method %in% c('MAX', 'MEAN_MAX')) { + paste(method, 'with eps =', eps_max) + } else if (method %in% c('OBS', 'MEAN_OBS')) { + paste(method, 'with eps =', eps_obs) + } else { + method + } + + cat(paste( + '\nNormalization method:', + method_info, + '\n', + collapse = '' + )) + } + for (i in seq_along(long_form_data)) { data_table <- long_form_data[[i]] @@ -200,23 +236,22 @@ add_norm <- function( qname_subset <- data_table[data_table[['quantity_name']] == qname, ] - if (tolower(normalization_method) == 'equal') { + npts <- nrow(qname_subset) + qmax <- max(abs(qname_subset[['quantity_value']])) + qobs <- data_table[j, 'quantity_value'] + + if (method == 'EQUAL') { 1.0 - } else if (tolower(normalization_method) == 'mean') { - nrow(qname_subset) * n_ddp - } else if (tolower(normalization_method) == 'max') { - max(qname_subset[['quantity_value']])^2 - } else if (tolower(normalization_method) == 'obs') { - eps <- get_param_default(normalization_param, 1e-1) - data_table[i, 'quantity_value']^2 + eps - } else if (tolower(normalization_method) == 'mean_max') { - npts <- nrow(qname_subset) - qmax <- max(qname_subset[['quantity_value']]) - npts * n_ddp * qmax^2 - } else if (tolower(normalization_method) == 'mean_obs') { - npts <- nrow(qname_subset) - eps <- get_param_default(normalization_param, 1e-1) - npts * n_ddp * (data_table[i, 'quantity_value']^2 + eps) + } else if (method == 'MEAN') { + npts * n_ddp + } else if (method == 'MAX') { + qmax^2 + eps_max + } else if (method == 'OBS') { + qobs^2 + eps_obs + } else if (method == 'MEAN_MAX') { + npts * n_ddp * (qmax^2 + eps_max) + } else if (method == 'MEAN_OBS') { + npts * n_ddp * (qobs^2 + eps_obs) } else { stop('Unsupported normalization_method: ', normalization_method) } @@ -229,20 +264,46 @@ add_norm <- function( } # Helping function for getting variance-based weights -add_w_var <- function(long_form_data, stdev_weight_method, stdev_weight_param) { +add_w_var <- function( + long_form_data, + stdev_weight_method, + stdev_weight_param, + verbose_startup +) +{ + eps_log <- get_param_default(stdev_weight_param, 1e-5) + eps_inv <- get_param_default(stdev_weight_param, 1e-1) + + method <- toupper(stdev_weight_method) + + if (verbose_startup) { + method_info <- if (method == 'LOGARITHM') { + paste(method, 'with eps =', eps_log) + } else if (method == 'INVERSE') { + paste(method, 'with eps =', eps_inv) + } else { + method + } + + cat(paste( + '\nStandard-deviation-based weight method:', + method_info, + '\n', + collapse = '' + )) + } + for (i in seq_along(long_form_data)) { data_table <- long_form_data[[i]] data_stdev <- data_table[['quantity_stdev']] data_table[['w_var']] <- - if (tolower(stdev_weight_method) == 'equal') { + if (method == 'EQUAL') { 1.0 - } else if (tolower(stdev_weight_method) == 'logarithm') { - eps <- get_param_default(stdev_weight_param, 1e-5) - log(1 / (data_stdev + eps)) - } else if (tolower(stdev_weight_method) == 'inverse') { - eps <- get_param_default(stdev_weight_param, 1e-1) - 1 / (data_stdev^2 + eps) + } else if (method == 'LOGARITHM') { + log(1.0 / (data_stdev + eps_log)) + } else if (method == 'INVERSE') { + 1.0 / (data_stdev^2 + eps_inv) } else { stop('Unsupported stdev_weight_method: ', stdev_weight_method) } @@ -316,6 +377,24 @@ one_error <- function( } } +# Helping function for debug printing. Here, `obj` should either be a list (in +# which case its "structure" will be printed via `str`) or a numeric vector (in +# which case its values will be printed with up to 32 decimal places). +# `obj_name` should be a string describing what is being printed. The printout +# will include newlines at the start and end, along with a timestamp. +debug_print <- function(obj, obj_name) { + cat(paste0('\nTime: ', Sys.time()), ' ', obj_name, ': ') + + if (is.list(obj)) { + cat('\n\n') + utils::str(obj) + } else { + cat(paste(sprintf('%.32f', obj), collapse = ', ')) + } + + cat('\n') +} + # Helping function for returning a failure value failure_value <- function(error_sum, return_terms) { if (return_terms) { @@ -336,13 +415,40 @@ error_from_res <- function( ddp_weight, normalization_method, extra_penalty_function, - return_terms + return_terms, + x, + debug_mode ) { + # If there was an error, return a very high value + if (is.null(simulation_result)) { + return( + failure_value(NA, return_terms) + ) + } + # If the simulation did not finish, return a very high value expected_npts <- long_form_data_table[1, 'expected_npts'] if (nrow(simulation_result) < expected_npts) { + if (debug_mode %in% c(DEBUG_MODES[2], DEBUG_MODES[3])) { + # debug_mode is `minimal` or `everything`, so indicate that the + # simulation did not finish; if debug_mode is `minimal`, the inputs + # have not yet been printed, so we need to print them here + if (debug_mode == DEBUG_MODES[2]) { + debug_print(x, 'Independent argument values') + } + + msg <- paste0( + '\nSimulation did not finish; produced ', + nrow(simulation_result), + ' rows instead of the expected ', + expected_npts, '\n' + ) + + cat(msg) + } + return( failure_value(NA, return_terms) ) @@ -376,6 +482,24 @@ error_from_res <- function( # If the error sum is not finite, return a very high value if (!is.finite(error_sum)) { + if (debug_mode %in% c(DEBUG_MODES[2], DEBUG_MODES[3])) { + # debug_mode is `minimal` or `everything`, so indicate that the sum + # of squared errors is not finite; if debug_mode is `minimal`, the + # inputs have not yet been printed, so we need to print them here + if (debug_mode == DEBUG_MODES[2]) { + debug_print(x, 'Independent argument values') + } + + msg <- paste0( + '\nSum of squared errors is not finite: ', + error_sum, + '\n' + ) + + cat(msg) + } + + return( failure_value(error_sum, return_terms) ) @@ -405,18 +529,33 @@ regularization_penalty <- function( regularization_lambda ) { - if (toupper(regularization_method) == 'NONE') { - 0.0 - } else if (toupper(regularization_method) == 'LASSO' || toupper(regularization_method) == 'L1') { - regularization_lambda * sum(abs(ind_arg_vals)) - } else if (toupper(regularization_method) == 'RIDGE' || toupper(regularization_method) == 'L2') { - regularization_lambda * sum(ind_arg_vals^2) + if (is.function(regularization_method)) { + regularization_method(ind_arg_vals, regularization_lambda) } else { - stop('Unsupported regularization method: ', regularization_method) + method <- toupper(regularization_method) + + if (method == 'NONE') { + 0.0 + } else if (method %in% c('LASSO', 'L1')) { + regularization_lambda * sum(abs(ind_arg_vals)) + } else if (method %in% c('RIDGE', 'L2')) { + regularization_lambda * sum(ind_arg_vals^2) + } else { + stop('Unsupported regularization method: ', regularization_method) + } } } -# Helping function that forms the overall objective function +# Helping function that forms the overall objective function. Here, `debug_mode` +# should be a numeric value, and the debug outputs work as follows: +# +# - debug_mode 1: Minimal debug printing; inputs and error messages will be +# printed only if a simulation fails to complete +# +# - debug mode 2: Maximal debug printing; all inputs and outputs will be printed +# +# - any other value: No debug printing +# get_obj_fun <- function( model_runners, long_form_data, @@ -427,21 +566,32 @@ get_obj_fun <- function( regularization_method ) { - function(x, lambda = 0, return_terms = FALSE, debug_mode = FALSE) { - if (debug_mode) { - msg <- paste0( - '\nTime: ', - Sys.time(), - ' Independent argument values: ', - paste(x, collapse = ', '), - '\n' - ) - cat(msg) + function(x, lambda = 0, return_terms = FALSE, debug_mode = 'minimal') { + if (tolower(debug_mode) == DEBUG_MODES[3]) { + # debug_mode is `everything`, so print the inputs + debug_print(x, 'Independent argument values') } errors <- lapply(seq_along(model_runners), function(i) { runner <- model_runners[[i]] - res <- runner(x) + + res <- tryCatch( + runner(x), + error = function(e) { + if (debug_mode %in% c(DEBUG_MODES[2], DEBUG_MODES[3])) { + # debug_mode is `minimal` or `everything`, so print the + # error message; if debug_mode is `minimal`, the inputs + # have not yet been printed, so we need to print them + # here + if (debug_mode == DEBUG_MODES[2]) { + debug_print(x, 'Independent argument values') + } + cat(paste0('\n', conditionMessage(e), '\n')) + } + # Return NULL to indicate that an error occurred + NULL + } + ) error_from_res( res, @@ -450,7 +600,9 @@ get_obj_fun <- function( ddp_weights[[i]], normalization_method, extra_penalty_function, - return_terms + return_terms, + x, + debug_mode ) }) @@ -465,28 +617,66 @@ get_obj_fun <- function( regularization_penalty = reg_penalty ) - if (debug_mode) { - cat(paste0('Time: ', Sys.time()), ' Error metric terms: ') - utils::str(error_metric_terms) - cat('\n') + if (tolower(debug_mode) == DEBUG_MODES[3]) { + # debug_mode is `everything`, so print the error metric terms + debug_print(error_metric_terms, 'Error metric terms') } error_metric_terms } else { error_metric <- sum(as.numeric(errors)) + reg_penalty - if (debug_mode) { - msg <- paste0( - 'Time: ', - Sys.time(), - ' Error metric: ', - error_metric, - '\n' - ) - cat(msg) + if (tolower(debug_mode) == DEBUG_MODES[3]) { + # debug_mode is `everything`, so print the error metric + debug_print(error_metric, 'Error metric') } error_metric } } } + +# Print the data-driver pair weights, information about the regularization +# method, and information about optional functions, if desired +print_misc_verbose_startup <- function( + ddp_weights, + regularization_method, + dependent_arg_function, + post_process_function, + extra_penalty_function, + verbose_startup +) +{ + if (verbose_startup){ + user_func_msg <- 'user-supplied function:\n\n' + + cat('\nThe user-supplied data-driver pair weights:\n\n') + utils::str(ddp_weights) + + cat('\nRegularization method: ') + + if (is.function(regularization_method)) { + cat(user_func_msg) + print(regularization_method) + } else { + cat(paste0(toupper(regularization_method), '\n')) + } + + func_to_print <- list( + list(info = 'Dependent argument', func = dependent_arg_function), + list(info = 'Post-processing', func = post_process_function), + list(info = 'Extra penalty', func = extra_penalty_function) + ) + + for (x in func_to_print) { + cat(paste0('\n', x$info, ' function: ')) + + if (is.null(x$func)) { + cat('none\n') + } else { + cat(user_func_msg) + print(x$func) + } + } + } +} diff --git a/R/objective_function_input_checks.R b/R/objective_function_input_checks.R index 7a7a5a0..4d9de7b 100644 --- a/R/objective_function_input_checks.R +++ b/R/objective_function_input_checks.R @@ -4,7 +4,7 @@ # Helping function for checking the data-driver pairs; will throw an error if # a problem is detected, and will otherwise be silent with no return value. -check_data_driver_pairs <- function(base_model_definition, data_driver_pairs) { +check_data_driver_pairs <- function(base_model_definition, data_driver_pairs, verbose) { # There must be at least one data-driver pair if (length(data_driver_pairs) < 1) { stop('`data_driver_pairs` must have at least one element') @@ -37,7 +37,7 @@ check_data_driver_pairs <- function(base_model_definition, data_driver_pairs) { } # Only required or optional elements should be provided - optional_elements <- 'data_stdev' + optional_elements <- c('data_stdev', 'initial_values', 'parameters') acceptable_elements <- c(required_elements, optional_elements) @@ -117,12 +117,132 @@ check_data_driver_pairs <- function(base_model_definition, data_driver_pairs) { stop(msg) } + # When provided, the driver-specific initial values and parameters must be + # lists + iv_is_list <- sapply(data_driver_pairs, function(x) { + iv <- x[['initial_values']] + + if (!is.null(iv)) { + is.list(iv) && !is.null(names(iv)) + } else { + TRUE + } + }) + + if (any(!iv_is_list)) { + stop( + 'When provided, the driver-specific initial values must be a list ', + 'of named elements' + ) + } + + param_is_list <- sapply(data_driver_pairs, function(x) { + param <- x[['parameters']] + + if (!is.null(param)) { + is.list(param) && !is.null(names(param)) + } else { + TRUE + } + }) + + if (any(!param_is_list)) { + stop( + 'When provided, the driver-specific parameters must be a list ', + 'of named elements' + ) + } + + # Names of driver-specific initial values and parameter names must be the + # same for all data-driver pairs + iv_names <- lapply(data_driver_pairs, function(x) { + sort(names(x[['initial_values']])) + }) + + if (length(unique(iv_names)) > 1) { + msg <- paste0( + 'The following driver-specific initial value names were provided ', + 'in the data-driver pairs:\n', + paste( + names(iv_names), ':', + sapply(iv_names, function(x) {paste(x, collapse = ', ')}), + collapse = '\n' + ), + '\nWhen provided, these names must be the same for each set of ', + 'drivers' + ) + + stop(msg) + } + + param_names <- lapply(data_driver_pairs, function(x) { + sort(names(x[['parameters']])) + }) + + if (length(unique(param_names)) > 1) { + msg <- paste0( + 'The following driver-specific parameter names were provided ', + 'in the data-driver pairs:\n', + paste( + names(param_names), ':', + sapply(param_names, function(x) {paste(x, collapse = ', ')}), + collapse = '\n' + ), + '\nWhen provided, these names must be the same for each set of ', + 'drivers' + ) + + stop(msg) + } + + # Each driver-specific initial value and parameter must be included in the + # base model definition + iv_names_to_check <- unique(unlist(iv_names)) + + iv_names_okay <- + iv_names_to_check %in% names(base_model_definition[['initial_values']]) + + if (!all(iv_names_okay)) { + bad_names <- iv_names_to_check[!iv_names_okay] + + msg <- paste( + 'The following driver-specific initial values are not included in', + 'the base model definition:', + paste0('"', bad_names, '"', collapse = ', ') + ) + + stop(msg) + } + + param_names_to_check <- unique(unlist(param_names)) + + param_names_okay <- + param_names_to_check %in% names(base_model_definition[['parameters']]) + + if (!all(param_names_okay)) { + bad_names <- param_names_to_check[!param_names_okay] + + msg <- paste( + 'The following driver-specific parameters are not included in', + 'the base model definition:', + paste0('"', bad_names, '"', collapse = ', ') + ) + + stop(msg) + } + # Each set of drivers must form a valid dynamical system along with the # base model definition valid_definitions <- sapply(data_driver_pairs, function(ddp) { BioCro::validate_dynamical_system_inputs( - base_model_definition[['initial_values']], - base_model_definition[['parameters']], + utils::modifyList( + base_model_definition[['initial_values']], + c(list(), ddp[['initial_values']]) + ), + utils::modifyList( + base_model_definition[['parameters']], + c(list(), ddp[['parameters']]) + ), ddp[['drivers']], base_model_definition[['direct_modules']], base_model_definition[['differential_modules']], @@ -141,6 +261,27 @@ check_data_driver_pairs <- function(base_model_definition, data_driver_pairs) { stop(msg) } + # Print driver-specific initial values and parameters, if necessary + if (verbose) { + cat('\nDriver-specific initial values:\n\n') + + if (all(sapply(iv_names, is.null))) { + cat(' None\n') + } else { + iv <- lapply(data_driver_pairs, function(x) {x[['initial_values']]}) + utils::str(iv) + } + + cat('\nDriver-specific parameters:\n\n') + + if (all(sapply(param_names, is.null))) { + cat(' None\n') + } else { + param <- lapply(data_driver_pairs, function(x) {x[['parameters']]}) + utils::str(param) + } + } + return(invisible(NULL)) } @@ -175,7 +316,12 @@ check_args_to_vary <- function( utils::str(independent_args) cat('\nThe dependent arguments and their initial values:\n\n') - utils::str(dependent_arg_function(independent_args)) + + if (is.null(dependent_arg_function)) { + cat(' None\n') + } else { + utils::str(dependent_arg_function(independent_args)) + } } # Make sure no drivers were specified @@ -420,8 +566,11 @@ check_long_form_data <- function(long_form_data) { # Helping function for checking the objective function; will throw an error if a # problem is detected, and will otherwise be silent with no return value. check_obj_fun <- function(obj_fun, initial_ind_arg_values, verbose) { - initial_error_terms <- - obj_fun(as.numeric(initial_ind_arg_values), return_terms = TRUE) + initial_error_terms <- obj_fun( + as.numeric(initial_ind_arg_values), + return_terms = TRUE, + debug_mode = 'none' + ) initial_error <- sum(unlist(initial_error_terms)) diff --git a/R/write_model.R b/R/write_model.R index 2bb652f..be6ea32 100644 --- a/R/write_model.R +++ b/R/write_model.R @@ -7,19 +7,39 @@ write_model <- function( ode_solver ) { + # Get the longest name among the initial values and parameters + all_names <- c( + names(initial_values), + names(parameters) + ) + + name_width <- max(sapply(all_names, nchar)) + + # Alphabetize the initial values and parameters + initial_values <- initial_values[order(tolower(names(initial_values)))] + parameters <- parameters[order(tolower(names(parameters)))] + + # Prepare the module list strings + dir_module_string <- paste(paste0(' ', names(direct_modules), ' = "', direct_modules, '"'), collapse = ',\n') + diff_module_string <- paste(paste0(' ', names(differential_modules), ' = "', differential_modules, '"'), collapse = ',\n') + + # Remove any lines without names + dir_module_string <- gsub(' = ', ' ', dir_module_string) + diff_module_string <- gsub(' = ', ' ', diff_module_string) + # Fill in the module definition template (defined below) model_text <- sprintf( model_definition_template, name, - paste(paste0(' "', direct_modules, '"'), collapse = ',\n'), - paste(paste0(' "', differential_modules, '"'), collapse = ',\n'), + dir_module_string, + diff_module_string, ode_solver[['type']], ode_solver[['output_step_size']], ode_solver[['adaptive_rel_error_tol']], ode_solver[['adaptive_abs_error_tol']], ode_solver[['adaptive_max_steps']], - paste(paste0(' "', names(initial_values), '" = ', initial_values), collapse = ',\n'), - paste(paste0(' "', names(parameters), '" = ', parameters), collapse = ',\n') + paste(paste0(' ', format(names(initial_values), width = name_width), ' = ', initial_values), collapse = ',\n'), + paste(paste0(' ', format(names(parameters), width = name_width), ' = ', parameters), collapse = ',\n') ) } @@ -31,11 +51,11 @@ model_definition_template <- '%s <- list( %s ), ode_solver = list( - type = "%s", - output_step_size = %f, + type = "%s", + output_step_size = %f, adaptive_rel_error_tol = %e, adaptive_abs_error_tol = %e, - adaptive_max_steps = %i + adaptive_max_steps = %i ), initial_values = list( %s diff --git a/README.md b/README.md index f6eeee0..42a223e 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ from within R by typing `install.packages('remotes')`. ### Usage The best way to learn about using `BioCroValidation` is to visit the -[BioCroValidation website](https://biocro.github.io/BioCroValidation/index.html) +[BioCroValidation website](https://biocro.org/BioCroValidation/index.html) and click the "Get Started" link in the top menu bar. The website includes documentation for all the functions and data sets included in the package, as well as articles that describe its general features and several important use diff --git a/man/objective_function.Rd b/man/objective_function.Rd index 2cb1e5a..b97461a 100644 --- a/man/objective_function.Rd +++ b/man/objective_function.Rd @@ -24,7 +24,7 @@ parameterization procedure. For a detailed example of using \code{objective_function}, see the - "Parameterizing Soybean-BioCro" vignette/article. + "Parameterizing Soybean-BioCro" vignette. } \usage{ @@ -55,8 +55,8 @@ \item{data_driver_pairs}{ A list of named elements, where each element is a "data-driver pair." A data-driver pair is a list with three required elements: \code{data}, - \code{drivers}, and \code{weight}. Optionally, it may also have a - \code{data_stdev} element. + \code{drivers}, and \code{weight}. Optionally, it may also have elements + named \code{data_stdev}, \code{initial_values}, and \code{parameters}. The \code{data} element must be a data frame with one column named \code{time}, whose values follow BioCro's definition of @@ -76,13 +76,20 @@ should represent the standard deviation associated with each entry in \code{data}. If \code{data_stdev} is not supplied, all standard deviations will be set to 1. + + The optional \code{initial_values} and \code{parameters} elements must be + named lists of driver-specific initial values and parameters, respectively, + that will overwrite the default values specified in the + \code{base_model_definition}. When driver-specific \code{initial_values} are + provided, they must be provided for each of the data-driver pairs; the same + rule applies for driver-specific \code{parameters}. } \item{independent_args}{ A list of named numeric values. The names will determine the independent arguments to be varied during their optimization, and the values specify - "initial" or "test" values of each argument that will be used internally to - check that the objective function is properly defined and can be evaluated. + "test" values of each argument that will be used internally to check that + the objective function is properly defined and can be evaluated. } \item{quantity_weights}{ @@ -128,7 +135,8 @@ \item{regularization_method}{ A string indicating the regularization method to be used when calculating - the regularization penalty term; see below for more details. + the regularization penalty term, or a function that calculates the penalty; + see below for more details. } \item{dependent_arg_function}{ @@ -165,16 +173,17 @@ \strong{Overview} When parameterizing a BioCro model, the general idea is to vary a subset of - the model's parameters to achieve the best agreement with a set of observed - data. The degree of agreement is expressed as an "error metric," which - includes terms derived from the agreement between the modeled and observed - values, as well as (optional) penalty terms. A function that calculates an - error metric when given a set of parameter values is called an "objective - function." Defining an objective function suitable for BioCro parameterization - can be complicated, but the \code{objective_function} function helps to - simplify the process of creating such a function. It is designed to - accommodate the following scenarios, which often occur in the context of - BioCro model parameterization: + the model's \code{parameters} and/or \code{initial_values} to achieve the + best agreement with a set of observed data. The degree of agreement is + expressed as an "error metric," which includes terms derived from the + agreement between the modeled and observed values, as well as (optional) + penalty terms. A function that calculates an error metric when given a set of + argument values (\code{parameters} and/or \code{initial_values}) is called an + "objective function." Defining an objective function suitable for BioCro + parameterization can be complicated, but the \code{objective_function} + function helps to simplify the process of creating such a function. It is + designed to accommodate the following scenarios, which often occur in the + context of BioCro model parameterization: \itemize{ \item \strong{Multi-year or multi-location data:} Often the model needs to @@ -183,7 +192,10 @@ simulation must be compared to associated sets of observed data. Here, this is handled through the \code{data_driver_pairs}, which allows the user to specify which drivers and data sets should be compared to each - other. + other. Optionally, it is also possible to specify different initial + values or parameters for each set of drivers; for example, the + atmospheric CO2 concentration may need to change for different years, + or soil properties may need to change for different locations. \item \strong{Complicated normalization:} Care must be taken to ensure that certain years or output variables are not over-valued in the error @@ -267,7 +279,7 @@ by retrieving the value of the \eqn{Y_i} variable at the closest time to \eqn{t_i} that is included in the model output. It is assumed that the model always outputs the same sequence of time values each time it - is run with a particular set of drivers, regardless of the input + is run with a particular set of drivers, regardless of the provided argument values. The quantity-based weight factors \eqn{w_i^{quantity}} are directly @@ -314,7 +326,7 @@ The function \eqn{f_{user}} must accept a single data frame as an input and return a single numeric value as its output, but has no other requirements. It is specified via the - \code{extra_penalty_function} argument. When + \code{extra_penalty_function}. When \code{extra_penalty_function} is \code{NULL}, \eqn{P_{user}} is zero. \item \strong{Regularization penalty term:} The regularization penalty term @@ -332,9 +344,10 @@ \strong{Standard-deviation-based weight methods} - The following methods are available for determining weight factors from values - of the standard deviation (\eqn{\sigma}), which can be (optionally) supplied - via the \code{data_stdev} elements of the \code{data_driver_pairs}: + The following pre-set methods are available for determining weight factors + from values of the standard deviation (\eqn{\sigma}), which can be + (optionally) supplied via the \code{data_stdev} elements of the + \code{data_driver_pairs}: \itemize{ \item \code{'equal'}: For this method, \eqn{w_i^{stdev}} is always set to @@ -350,14 +363,14 @@ ln \left( \frac{1}{\sigma_i + \epsilon} \right),} where \eqn{ln} denotes a logarithm with base \eqn{e} and \eqn{\epsilon} is a small number included to prevent numerical errors - that would otherwise occur when \eqn{\sigma = 0}. This method was + that would otherwise occur when \eqn{\sigma_i = 0}. This method was used in the original Soybean-BioCro paper. - The value of \eqn{\epsilon} is specified by the - \code{stdev_weight_param} input argument, which defaults to - \code{1e-5} when \code{stdev_weight_param} is \code{NULL} when using - this method. With the default value of \eqn{\epsilon}, - \eqn{w_i^{stdev} \approx 11.512} when \eqn{\sigma = 0}. + The value of \eqn{\epsilon} is specified by \code{stdev_weight_param}, + which defaults to \code{1e-5} if \code{stdev_weight_param} is + \code{NULL} when using this method. With the default value of + \eqn{\epsilon}, \eqn{w_i^{stdev} \approx 11.512} when + \eqn{\sigma = 0}. Note: this method should be used with caution, because \eqn{w_i^{stdev}} is zero for \eqn{\sigma_i = 1 - \epsilon}, and it @@ -369,11 +382,10 @@ where \eqn{\epsilon} is a small number included to prevent numerical errors that would otherwise occur when \eqn{\sigma_i = 0}. - The value of \eqn{\epsilon} is specified by the - \code{stdev_weight_param} input argument, which defaults to - \code{1e-1} when \code{stdev_weight_param} is \code{NULL} when using - this method. With the default value of \eqn{\epsilon}, - \eqn{w_i^{stdev} = 10} when \eqn{\sigma_i = 0}. + The value of \eqn{\epsilon} is specified by \code{stdev_weight_param}, + which defaults to \code{1e-1} if \code{stdev_weight_param} is + \code{NULL} when using this method. With the default value of + \eqn{\epsilon}, \eqn{w_i^{stdev} = 10} when \eqn{\sigma_i = 0}. } If any values of \eqn{w_i^{stdev}} are undefined, negative, or infinite, an @@ -381,7 +393,7 @@ \strong{Normalization methods} - The following normalization methods are available: + The following pre-set normalization methods are available: \itemize{ \item \code{'equal'}: For this method, \eqn{N_i} is always set to 1. In @@ -404,13 +416,21 @@ \item \code{'max'}: For this method, when \eqn{Y_i} is named \code{vtype} and the observation is from a set called \code{vdata}, then - \deqn{N_i = \left( max_{vtype}^{vdata} \right)^2,} - where \eqn{max_{vtype}^{vdata}} is the maximum observed value of - \code{vtype} across \code{vdata}. In this case, the observed and + \deqn{N_i = \left( max_{vtype}^{vdata} \right)^2 + \epsilon,} + where \eqn{max_{vtype}^{vdata}} is the maximum observed absolute value + of \code{vtype} across \code{vdata} and \eqn{\epsilon} is a small + number included to prevent numerical errors that would otherwise occur + when \eqn{max_{vtype}^{vdata} = 0}. In this case, the observed and modeled values that appear in the equation for \eqn{E_{agreement}} are - essentially normalized by their maximum value, hence the name for this - method. This approach avoids over-representing variable types with - larger magnitude when determining \eqn{E_{agreement}}. + essentially normalized by their maximum magnitude, hence the name for + this method. This approach avoids over-representing variable types + with larger magnitude when determining \eqn{E_{agreement}}. + + The value of \eqn{\epsilon} is specified by + \code{normalization_param}, which defaults to \code{1e-1} if + \code{normalization_param} is \code{NULL} when using this method. + With the default value of \eqn{\epsilon}, \eqn{N_i = 10} when + \eqn{max_{vtype}^{vdata} = 0}. \item \code{'obs'}: For this method, @@ -423,35 +443,34 @@ avoids over-representing time points where a particular quantity takes its largest values when determining \eqn{E_{agreement}}. - The value of \eqn{\epsilon} is specified by the - \code{normalization_param} input argument, which defaults to - \code{1e-1} when \code{normalization_param} is \code{NULL} when using - this method. With the default value of \eqn{\epsilon}, \eqn{N_i = 10} - when \eqn{Y_{obs}^i = 0}. + The value of \eqn{\epsilon} is specified by + \code{normalization_param}, which defaults to \code{1e-1} if + \code{normalization_param} is \code{NULL} when using this method. + With the default value of \eqn{\epsilon}, \eqn{N_i = 10} when + \eqn{Y_{obs}^i = 0}. \item \code{'mean_max'}: For this method, the "mean" and "max" methods are combined so that - \deqn{N_i = n_{vtype}^{vdrivers}% - \cdot n_{data} \cdot \left( max_{vtype}^{vdata} \right)^2.} + \deqn{N_i = n_{vtype}^{vdrivers} \cdot n_{data}% + \cdot \left[ \left( max_{vtype}^{vdata} \right)^2 + \epsilon \right].} This approach avoids over-representing drivers with larger numbers of associated observations, and variable types with larger magnitudes. This method is used for parameterizing Soybean-BioCro. + The value \eqn{\epsilon} is specified by \code{normalization_param}, + using the same default value as in the \code{'max'} method. + \item \code{'mean_obs'}: For this method, the "mean" and "obs" methods are combined so that \deqn{N_i = n_{vtype}^{vdrivers} \cdot n_{data}% - \cdot \left( \left( Y_{obs}^i \right)^2 + \epsilon \right)^2.} + \cdot \left[ \left( Y_{obs}^i \right)^2 + \epsilon \right].} This approach avoids over-representing drivers with larger numbers of associated observations, and time points with larger observed values. - The value of \eqn{\epsilon} is specified by the - \code{normalization_param} input argument, which defaults to - \code{1e-1} when \code{normalization_param} is \code{NULL} when using - this method. With the default value of \eqn{\epsilon}, - \eqn{N_i = 10 \cdot n_{vtype}^{vdrivers} \cdot n_{data}} when - \eqn{Y_{obs}^i = 0}. + The value \eqn{\epsilon} is specified by \code{normalization_param}, + using the same default value as in the \code{'obs'} method. } In most situations, it is recommended to use either \code{'mean_max'} or @@ -459,7 +478,7 @@ \strong{Regularization methods} - The following regularization methods are available: + The following pre-set regularization methods are available: \itemize{ \item \code{'none'}: For this method, \eqn{P_{regularization}} is always set @@ -486,6 +505,10 @@ section below for details of how to specify \eqn{\lambda}. } + It is also possible to supply a function that accepts two input arguments + (\code{x} and \code{lambda}, as described in the "Value" section below) and + returns a numeric penalty value. + \strong{Input checks} Several checks are made to ensure that the objective function is properly @@ -505,14 +528,14 @@ output, when accounting for the \code{data_definitions} and \code{post_process_function}. - \item Ensuring that each independent and dependent argument name is either - a parameter or initial value of the model. Internally, argument names - are passed to \code{\link[BioCro]{partial_run_biocro}} via its - \code{arg_names} input. Note that argument names passed to - \code{partial_run_biocro} can technically include drivers, but it is - unlikely that the value of a driver would be varied during an - optimization, so the argument names are not allowed to include - columns in the drivers. + \item Ensuring that each independent and dependent argument name is a member + of the model's \code{parameters} or \code{initial_values}. Internally, + argument names are passed to \code{\link[BioCro]{partial_run_biocro}} + via its \code{arg_names} input. Note that argument names passed to + \code{partial_run_biocro} can technically include members of the + \code{drivers}, but it is unlikely that the value of a driver would be + varied during an optimization, so the argument names are not allowed + to include columns in the \code{drivers}. \item Ensuring that the optional \code{dependent_arg_function}, \code{post_process_function}, and \code{extra_penalty_function} @@ -538,31 +561,38 @@ \value{ A function \code{obj_fun} with signature - \code{obj_fun(x, lambda = 0, return_terms = FALSE, debug_mode = FALSE)}. + \code{obj_fun(x, lambda = 0, return_terms = FALSE, debug_mode = 'minimal')}. Here, \code{x} is a numeric vector of values of the independent arguments (in the same order as in \code{independent_arg_names}), and \code{lambda} is the value of the regularization parameter. - The \code{return_terms} argument determines the return value of - \code{obj_fun}. When \code{return_terms} is \code{FALSE}, \code{obj_fun} - returns values of the error metric \eqn{E}. When \code{return_terms} is - \code{TRUE}, \code{obj_fun} returns a list including each individual term of - the total error metric. + When \code{return_terms} is \code{FALSE}, \code{obj_fun} returns values of the + error metric \eqn{E}. When \code{return_terms} is \code{TRUE}, \code{obj_fun} + returns a list including each individual term of the total error metric. During optimization, \code{return_terms} should always be \code{FALSE}. Setting it to \code{TRUE} can be useful for troubleshooting, or for diagnostics such as checking the quality of fit for each of the data-driver pairs. - The \code{debug_mode} argument determines whether \code{obj_fun} is running in - debug mode. In debug mode, each time \code{obj_fun} is called, it will print - the values of \code{x} and the error metric to the R terminal. This can be - useful when troubleshooting a problem with an optimization, since it provides - a record of any problematic parameter combinations. When setting - \code{debug_mode} to \code{TRUE}, also consider using \code{\link[base]{sink}} - to write the outputs to a file instead of the R terminal. In that case, there - will still be a record even if R crashes. + When \code{debug_mode} is \code{'minimal'} or \code{'everything'}, + \code{obj_fun} operates in "debug mode." In the "minimal" version, problematic + values of \code{x} will be printed to the terminal, along with an explanation + of the problem that was caused. In the "everything" version, the values of + \code{x} and the error metric will be printed to the terminal every time + \code{obj_fun} is called. When \code{debug_mode} is \code{'none'} (or any + other value not mentioned above), no debug printing will occur. + + Debug mode can be useful when troubleshooting a problem with an optimization, + since it provides a record of any problematic parameter combinations. Once a + problematic set of argument values is identified, it can be investigated + further by calling \code{obj_fun} again with \code{x} set to the problematic + values and \code{return_terms} set to \code{TRUE}. + + When setting \code{debug_mode} to \code{TRUE}, also consider using + \code{\link[base]{sink}} to write the outputs to a file instead of the R + terminal. In that case, there will still be a record even if R crashes. } \examples{ @@ -602,18 +632,21 @@ if (require(BioCro)) { # The data-driver pairs can now be created by associating each data set with # its corresponding weather data. Here we will weight the 2005 data twice as - # heavily as the 2002 data. + # heavily as the 2002 data. Note that we also specify different atmospheric + # CO2 concentrations for each year. data_driver_pairs <- list( ambient_2002 = list( data = process_table(soyface_biomass[['ambient_2002']]), data_stdev = process_table(soyface_biomass[['ambient_2002_std']]), drivers = BioCro::soybean_weather[['2002']], + parameters = list(Catm = with(BioCro::catm_data, {Catm[year == '2002']})), weight = 1 ), ambient_2005 = list( data = process_table(soyface_biomass[['ambient_2005']]), data_stdev = process_table(soyface_biomass[['ambient_2005_std']]), drivers = BioCro::soybean_weather[['2005']], + parameters = list(Catm = with(BioCro::catm_data, {Catm[year == '2005']})), weight = 2 ) ) @@ -644,6 +677,8 @@ if (require(BioCro)) { # original Soybean-BioCro model. independent_args <- BioCro::soybean[['parameters']][c('alphaLeaf', 'betaLeaf')] + initial_guess <- as.numeric(independent_args) + dependent_arg_function <- function(ind_args) { list(alphaStem = ind_args[['alphaLeaf']]) } @@ -670,6 +705,12 @@ if (require(BioCro)) { } } + # We want to use the regularization term to penalize deviations away from the + # initial guess, so we will define a custom L2 regularization function + regularization_function <- function(x, lambda) { + lambda * sum((x - initial_guess)^2) + } + # Now we can finally create the objective function obj_fun <- objective_function( base_model_definition, @@ -678,6 +719,7 @@ if (require(BioCro)) { quantity_weights, data_definitions = data_definitions, stdev_weight_method = 'logarithm', + regularization_method = regularization_function, dependent_arg_function = dependent_arg_function, post_process_function = post_process_function, extra_penalty_function = extra_penalty_function @@ -686,17 +728,18 @@ if (require(BioCro)) { # This function could now be passed to an optimizer; here we will simply # evaluate it for two sets of parameter values. - # Try doubling each parameter value; in this case, the value of the - # objective function increases, indicating a lower degree of agreement between - # the model and the observed data. Here we will call `obj_fun` in debug mode, - # which will automatically print the value of the error metric. - cat('\nError metric calculated by doubling the initial argument values:\n') - error_metric <- obj_fun(2 * as.numeric(independent_args), debug_mode = TRUE) + # Try doubling each parameter value and setting lambda to a nonzero value; in + # this case, the value of the objective function increases, indicating a lower + # degree of agreement between the model and the observed data. Here we will + # call `obj_fun` in debug mode, which will automatically print the value of + # the error metric. + cat('\nError metric calculated by doubling the original argument values:\n') + error_metric <- obj_fun(2 * initial_guess, 0.001, debug_mode = 'everything') # We can also see the values of each term that makes up the error metric; # again, we will call `obj_fun` in debug mode for automatic printing. - cat('\nError metric terms calculated by doubling the initial argument values:\n') + cat('\nError metric terms calculated by doubling the original argument values:\n') error_terms <- - obj_fun(2 * as.numeric(independent_args), return_terms = TRUE, debug_mode = TRUE) + obj_fun(2 * initial_guess, 0.001, return_terms = TRUE, debug_mode = 'everything') } } diff --git a/man/write_model.Rd b/man/write_model.Rd index 86d98a4..306856d 100644 --- a/man/write_model.Rd +++ b/man/write_model.Rd @@ -62,6 +62,11 @@ output. See examples below. Note that it is customary to name the R script file as \code{name.R}, where \code{name} is the value provided to the function itself. + + The \code{initial_values} and \code{parameters} will be alphabetized + (case-insensitive). The \code{ode_solver} elements will be printed in the + customary BioCro order. The \code{direct_modules} and + \code{differential_modules} will not be reordered. } \value{ @@ -85,7 +90,7 @@ if (require(BioCro)) { if (interactive()) { # Use writeLines to save as a `.R` file - writeLines(out, "./test_soybean_model.h") + writeLines(out, "./test_soybean_model.R") } } } diff --git a/tests/test_data/test_model.R b/tests/test_data/test_model.R index 42ba546..076b505 100644 --- a/tests/test_data/test_model.R +++ b/tests/test_data/test_model.R @@ -6,19 +6,19 @@ test_model <- list( "BioCro:harmonic_oscillator" ), ode_solver = list( - type = "boost_rkck54", - output_step_size = 1.000000, + type = "boost_rkck54", + output_step_size = 1.000000, adaptive_rel_error_tol = 1.000000e-04, adaptive_abs_error_tol = 1.000000e-04, - adaptive_max_steps = 200 + adaptive_max_steps = 200 ), initial_values = list( - "position" = 1, - "velocity" = 0 + position = 1, + velocity = 0 ), parameters = list( - "timestep" = 1, - "mass" = 1, - "spring_constant" = 0.5 + mass = 1, + spring_constant = 0.5, + timestep = 1 ) ) diff --git a/tests/testthat/test-objective_function.R b/tests/testthat/test-objective_function.R index dcfccf1..cf896ed 100644 --- a/tests/testthat/test-objective_function.R +++ b/tests/testthat/test-objective_function.R @@ -27,7 +27,7 @@ ddps <- list( ) independent_args <- with(BioCro::soybean[['parameters']], { - list(alphaLeaf = alphaLeaf, betaLeaf = betaLeaf) + list(alphaLeaf = alphaLeaf, betaLeaf = betaLeaf, Catm = Catm) }) data_definitions <- list( @@ -71,6 +71,13 @@ test_that('Objective functions can be created and behave as expected', { obj_fun(as.numeric(independent_args)) ) + # Here we intentionally pass a bad value of Catm that will trigger an error + error_val <- expect_silent( + obj_fun(as.numeric(within(independent_args, {Catm = -1})), debug_mode = 'none') + ) + + expect_equal(error_val, 2 * BioCroValidation:::FAILURE_VALUE) + # One data-driver pair, no dependent arguments obj_fun <- expect_silent( objective_function( @@ -84,6 +91,24 @@ test_that('Objective functions can be created and behave as expected', { ) ) + # Make sure there are no errors for one data-driver pair, no dependent + # arguments in verbose mode + sink(tempfile()) + + expect_no_error( + objective_function( + model, + ddps[1], + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = TRUE + ) + ) + + sink() + # Two data-driver pairs, with dependent arguments and L2 regularization obj_fun <- expect_silent( objective_function( @@ -103,6 +128,25 @@ test_that('Objective functions can be created and behave as expected', { obj_fun(as.numeric(independent_args), lambda = 0.5) ) + # Two data-driver pairs, with dependent arguments and L4 regularization + obj_fun <- expect_silent( + objective_function( + model, + ddps, + independent_args, + quantity_weights, + data_definitions = data_definitions, + dependent_arg_function = dependent_arg_function, + post_process_function = post_process_function, + regularization_method = function(x, lambda) {lambda * sum(x^4)}, + verbose_startup = verbose_startup + ) + ) + + expect_silent( + obj_fun(as.numeric(independent_args), lambda = 0.5) + ) + expect_true( is.list( obj_fun(as.numeric(independent_args), lambda = 0.5, return_terms = TRUE) @@ -186,7 +230,7 @@ ambient_2005: Error: `bad_arg_name` from `arg_names` is not in the `initial_valu ) }) -test_that('Model failures are detected', { +test_that('Model failures at startup are detected', { expect_error( objective_function( within(model, {parameters$lnfun = 1}), @@ -259,6 +303,159 @@ test_that('Data-driver pairs must have correct elements', { ) }) +test_that('Driver-specific initial values are checked and used', { + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$initial_values = 5}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'When provided, the driver-specific initial values must be a list of named elements' + ) + + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$initial_values = list(5)}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'When provided, the driver-specific initial values must be a list of named elements' + ) + + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$initial_values = list(Leaf = 5)}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The following driver-specific initial value names were provided in the data-driver pairs:\nambient_2002 : Leaf\nambient_2005 : \nWhen provided, these names must be the same for each set of drivers', + fixed = TRUE + ) + + expect_error( + objective_function( + model, + within(ddps, { + ambient_2002$initial_values = list(extra_iv = 5) + ambient_2005$initial_values = list(extra_iv = 7) + }), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The following driver-specific initial values are not included in the base model definition: "extra_iv"' + ) + + # Here we specify a "bad" initial value of Leaf for 2005. This will trigger + # an error, but the particular error may depend on the particular version of + # BioCro being used, so we don't check the error message content. + expect_error( + objective_function( + model, + within(ddps, { + ambient_2002$initial_values = list(Leaf = 0.1) # This shouldn't cause a problem + ambient_2005$initial_values = list(Leaf = -1) # This should trigger an error + }), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ) + ) +}) + +test_that('Driver-specific parameters are checked and used', { + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$parameters = 5}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'When provided, the driver-specific parameters must be a list of named elements' + ) + + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$parameters = list(5)}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'When provided, the driver-specific parameters must be a list of named elements' + ) + + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$parameters = list(Catm = 5)}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The following driver-specific parameter names were provided in the data-driver pairs:\nambient_2002 : Catm\nambient_2005 : \nWhen provided, these names must be the same for each set of drivers', + fixed = TRUE + ) + + expect_error( + objective_function( + model, + within(ddps, { + ambient_2002$parameters = list(extra_param = 5) + ambient_2005$parameters = list(extra_param = 7) + }), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The following driver-specific parameters are not included in the base model definition: "extra_param"' + ) + + # Here we specify a "bad" value of the Ball-Berry intercept for 2005. This + # will trigger an error, but the particular error may depend on the + # particular version of BioCro being used, so we don't check the error + # message content. + expect_error( + objective_function( + model, + within(ddps, { + ambient_2002$parameters = list(b0 = 0.1) # This shouldn't cause a problem + ambient_2005$parameters = list(b0 = -1) # This should trigger an error + }), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ) + ) +}) + test_that('Data must have a `time` column', { expect_error( objective_function( diff --git a/vignettes/parameterizing_soybean_biocro.Rmd b/vignettes/parameterizing_soybean_biocro.Rmd index 651894a..3a3612e 100644 --- a/vignettes/parameterizing_soybean_biocro.Rmd +++ b/vignettes/parameterizing_soybean_biocro.Rmd @@ -163,7 +163,8 @@ process_table <- function(data_table, type) { The `BioCro` R package includes weather data for the years in the `soyface_biomass` data set. So now we are ready to define the data-driver pairs, which includes the weather, the observed biomass, the standard deviation of the -observed biomass, and the weight to assign to each year: +observed biomass, the atmospheric CO2 concentration to use for each year, and +the weight to assign to each year: ```{r data_driver_pairs} # Define the data-driver pairs @@ -172,12 +173,14 @@ data_driver_pairs <- list( data = process_table(soyface_biomass[['ambient_2002']], 'biomass'), data_stdev = process_table(soyface_biomass[['ambient_2002_std']], 'stdev'), drivers = BioCro::soybean_weather[['2002']], + parameters = list(Catm = with(BioCro::catm_data, {Catm[year == '2002']})), weight = 1 ), ambient_2005 = list( data = process_table(soyface_biomass[['ambient_2005']], 'biomass'), data_stdev = process_table(soyface_biomass[['ambient_2005_std']], 'stdev'), drivers = BioCro::soybean_weather[['2005']], + parameters = list(Catm = with(BioCro::catm_data, {Catm[year == '2005']})), weight = 1 ) ) @@ -564,7 +567,7 @@ optim_res <- nmkb( tol = 1e-2, restarts.max = 10 ), - debug_mode = FALSE # Passed to obj_fun; set to TRUE to enable debug mode + debug_mode = 'minimal' # Passed to obj_fun ) ``` @@ -630,9 +633,11 @@ guess. In this example, `r sum(ind_arg_table[['improved']])` out of (`r round(100 * sum(ind_arg_table[['improved']]) / nrow(ind_arg_table))` percent). We can also compare the error metric to its original value. As shown above, it -is now `r optim_res[['value']]`, which is only -`r round(100 * (optim_res[['value']] - default_error) / optim_res[['value']], 1)` -percent larger than with the default parameter values. +is now `r optim_res[['value']]`, which is +`r round(100 * abs(optim_res[['value']] - default_error) / optim_res[['value']], 1)` +percent +`r if (optim_res[['value']] > default_error) {'larger'} else {'smaller'}` +than with the default parameter values. The optimized parameter values could likely be improved by using a more stringent tolerance for the optimizer, but this would require more time to run.