diff --git a/DESCRIPTION b/DESCRIPTION index 32421d9..6ea6709 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: BioCroValidation -Version: 0.2.0-0 +Version: 0.2.0-1 Title: Tools for Validating BioCro Models Description: A collection of tools for validating BioCro crop growth models. Authors@R: c( diff --git a/NEWS.md b/NEWS.md index b0ba5f4..e7637bb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -40,6 +40,13 @@ be directly added to this file to describe the related changes. - Allowed user-supplied regularization functions +- Errors that occur while running simulations are now caught so they do not + prevent an optimization from finishing + +- More options for `debug_mode` are now available; the default setting + (`minimal`) only prints info to the terminal when an issue with a simulation + occurs + # Changes in BioCroValidation Version 0.2.0 (2025-05-23) - Added 2002 and 2005 SoyFACE biomass and standard deviation data. diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R index 0b6c4f6..b376388 100644 --- a/R/objective_function_helpers.R +++ b/R/objective_function_helpers.R @@ -4,6 +4,13 @@ # Value to return when a simulation fails to run FAILURE_VALUE <- 1e10 +# Define the allowed debug modes +DEBUG_MODES <- c( + 'none', + 'minimal', + 'everything' +) + # Helping function for getting a full list of argument names get_full_arg_names <- function(independent_args, dependent_arg_function) { # Get the independent argument names @@ -364,6 +371,24 @@ one_error <- function( } } +# Helping function for debug printing. Here, `obj` should either be a list (in +# which case its "structure" will be printed via `str`) or a numeric vector (in +# which case its values will be printed with up to 32 decimal places). +# `obj_name` should be a string describing what is being printed. The printout +# will include newlines at the start and end, along with a timestamp. +debug_print <- function(obj, obj_name) { + cat(paste0('\nTime: ', Sys.time()), ' ', obj_name, ': ') + + if (is.list(obj)) { + cat('\n\n') + utils::str(obj) + } else { + cat(paste(sprintf('%.32f', obj), collapse = ', ')) + } + + cat('\n') +} + # Helping function for returning a failure value failure_value <- function(error_sum, return_terms) { if (return_terms) { @@ -384,13 +409,40 @@ error_from_res <- function( ddp_weight, normalization_method, extra_penalty_function, - return_terms + return_terms, + x, + debug_mode ) { + # If there was an error, return a very high value + if (is.null(simulation_result)) { + return( + failure_value(NA, return_terms) + ) + } + # If the simulation did not finish, return a very high value expected_npts <- long_form_data_table[1, 'expected_npts'] if (nrow(simulation_result) < expected_npts) { + if (debug_mode %in% c(DEBUG_MODES[2], DEBUG_MODES[3])) { + # debug_mode is `minimal` or `everything`, so indicate that the + # simulation did not finish; if debug_mode is `minimal`, the inputs + # have not yet been printed, so we need to print them here + if (debug_mode == DEBUG_MODES[2]) { + debug_print(x, 'Independent argument values') + } + + msg <- paste0( + '\nSimulation did not finish; produced ', + nrow(simulation_result), + ' rows instead of the expected ', + expected_npts, '\n' + ) + + cat(msg) + } + return( failure_value(NA, return_terms) ) @@ -424,6 +476,24 @@ error_from_res <- function( # If the error sum is not finite, return a very high value if (!is.finite(error_sum)) { + if (debug_mode %in% c(DEBUG_MODES[2], DEBUG_MODES[3])) { + # debug_mode is `minimal` or `everything`, so indicate that the sum + # of squared errors is not finite; if debug_mode is `minimal`, the + # inputs have not yet been printed, so we need to print them here + if (debug_mode == DEBUG_MODES[2]) { + debug_print(x, 'Independent argument values') + } + + msg <- paste0( + '\nSum of squared errors is not finite: ', + error_sum, + '\n' + ) + + cat(msg) + } + + return( failure_value(error_sum, return_terms) ) @@ -470,7 +540,16 @@ regularization_penalty <- function( } } -# Helping function that forms the overall objective function +# Helping function that forms the overall objective function. Here, `debug_mode` +# should be a numeric value, and the debug outputs work as follows: +# +# - debug_mode 1: Minimal debug printing; inputs and error messages will be +# printed only if a simulation fails to complete +# +# - debug mode 2: Maximal debug printing; all inputs and outputs will be printed +# +# - any other value: No debug printing +# get_obj_fun <- function( model_runners, long_form_data, @@ -481,21 +560,32 @@ get_obj_fun <- function( regularization_method ) { - function(x, lambda = 0, return_terms = FALSE, debug_mode = FALSE) { - if (debug_mode) { - msg <- paste0( - '\nTime: ', - Sys.time(), - ' Independent argument values: ', - paste(x, collapse = ', '), - '\n' - ) - cat(msg) + function(x, lambda = 0, return_terms = FALSE, debug_mode = 'minimal') { + if (tolower(debug_mode) == DEBUG_MODES[3]) { + # debug_mode is `everything`, so print the inputs + debug_print(x, 'Independent argument values') } errors <- lapply(seq_along(model_runners), function(i) { runner <- model_runners[[i]] - res <- runner(x) + + res <- tryCatch( + runner(x), + error = function(e) { + if (debug_mode %in% c(DEBUG_MODES[2], DEBUG_MODES[3])) { + # debug_mode is `minimal` or `everything`, so print the + # error message; if debug_mode is `minimal`, the inputs + # have not yet been printed, so we need to print them + # here + if (debug_mode == DEBUG_MODES[2]) { + debug_print(x, 'Independent argument values') + } + cat(paste0('\n', conditionMessage(e), '\n')) + } + # Return NULL to indicate that an error occurred + NULL + } + ) error_from_res( res, @@ -504,7 +594,9 @@ get_obj_fun <- function( ddp_weights[[i]], normalization_method, extra_penalty_function, - return_terms + return_terms, + x, + debug_mode ) }) @@ -519,25 +611,18 @@ get_obj_fun <- function( regularization_penalty = reg_penalty ) - if (debug_mode) { - cat(paste0('Time: ', Sys.time()), ' Error metric terms: ') - utils::str(error_metric_terms) - cat('\n') + if (tolower(debug_mode) == DEBUG_MODES[3]) { + # debug_mode is `everything`, so print the error metric terms + debug_print(error_metric_terms, 'Error metric terms') } error_metric_terms } else { error_metric <- sum(as.numeric(errors)) + reg_penalty - if (debug_mode) { - msg <- paste0( - 'Time: ', - Sys.time(), - ' Error metric: ', - error_metric, - '\n' - ) - cat(msg) + if (tolower(debug_mode) == DEBUG_MODES[3]) { + # debug_mode is `everything`, so print the error metric + debug_print(error_metric, 'Error metric') } error_metric diff --git a/R/objective_function_input_checks.R b/R/objective_function_input_checks.R index 7a7a5a0..f8d0afb 100644 --- a/R/objective_function_input_checks.R +++ b/R/objective_function_input_checks.R @@ -420,8 +420,11 @@ check_long_form_data <- function(long_form_data) { # Helping function for checking the objective function; will throw an error if a # problem is detected, and will otherwise be silent with no return value. check_obj_fun <- function(obj_fun, initial_ind_arg_values, verbose) { - initial_error_terms <- - obj_fun(as.numeric(initial_ind_arg_values), return_terms = TRUE) + initial_error_terms <- obj_fun( + as.numeric(initial_ind_arg_values), + return_terms = TRUE, + debug_mode = 'none' + ) initial_error <- sum(unlist(initial_error_terms)) diff --git a/man/objective_function.Rd b/man/objective_function.Rd index cd11df9..21de429 100644 --- a/man/objective_function.Rd +++ b/man/objective_function.Rd @@ -551,7 +551,7 @@ \value{ A function \code{obj_fun} with signature - \code{obj_fun(x, lambda = 0, return_terms = FALSE, debug_mode = FALSE)}. + \code{obj_fun(x, lambda = 0, return_terms = FALSE, debug_mode = 'minimal')}. Here, \code{x} is a numeric vector of values of the independent arguments (in the same order as in \code{independent_arg_names}), and \code{lambda} is the @@ -566,14 +566,23 @@ diagnostics such as checking the quality of fit for each of the data-driver pairs. - When \code{debug_mode} is \code{TRUE}, \code{obj_fun} operates in "debug - mode." In debug mode, each time \code{obj_fun} is called, it will print the - values of \code{x} and the error metric to the R terminal. This can be useful - when troubleshooting a problem with an optimization, since it provides a - record of any problematic parameter combinations. When setting - \code{debug_mode} to \code{TRUE}, also consider using \code{\link[base]{sink}} - to write the outputs to a file instead of the R terminal. In that case, there - will still be a record even if R crashes. + When \code{debug_mode} is \code{'minimal'} or \code{'everything'}, + \code{obj_fun} operates in "debug mode." In the "minimal" version, problematic + values of \code{x} will be printed to the terminal, along with an explanation + of the problem that was caused. In the "everything" version, the values of + \code{x} and the error metric will be printed to the terminal every time + \code{obj_fun} is called. When \code{debug_mode} is \code{'none'} (or any + other value not mentioned above), no debug printing will occur. + + Debug mode can be useful when troubleshooting a problem with an optimization, + since it provides a record of any problematic parameter combinations. Once a + problematic set of argument values is identified, it can be investigated + further by calling \code{obj_fun} again with \code{x} set to the problematic + values and \code{return_terms} set to \code{TRUE}. + + When setting \code{debug_mode} to \code{TRUE}, also consider using + \code{\link[base]{sink}} to write the outputs to a file instead of the R + terminal. In that case, there will still be a record even if R crashes. } \examples{ @@ -712,12 +721,12 @@ if (require(BioCro)) { # call `obj_fun` in debug mode, which will automatically print the value of # the error metric. cat('\nError metric calculated by doubling the original argument values:\n') - error_metric <- obj_fun(2 * initial_guess, 0.001, debug_mode = TRUE) + error_metric <- obj_fun(2 * initial_guess, 0.001, debug_mode = 'everything') # We can also see the values of each term that makes up the error metric; # again, we will call `obj_fun` in debug mode for automatic printing. cat('\nError metric terms calculated by doubling the original argument values:\n') error_terms <- - obj_fun(2 * initial_guess, 0.001, return_terms = TRUE, debug_mode = TRUE) + obj_fun(2 * initial_guess, 0.001, return_terms = TRUE, debug_mode = 'everything') } } diff --git a/tests/testthat/test-objective_function.R b/tests/testthat/test-objective_function.R index 02df6d1..19f4969 100644 --- a/tests/testthat/test-objective_function.R +++ b/tests/testthat/test-objective_function.R @@ -27,7 +27,7 @@ ddps <- list( ) independent_args <- with(BioCro::soybean[['parameters']], { - list(alphaLeaf = alphaLeaf, betaLeaf = betaLeaf) + list(alphaLeaf = alphaLeaf, betaLeaf = betaLeaf, Catm = Catm) }) data_definitions <- list( @@ -71,6 +71,13 @@ test_that('Objective functions can be created and behave as expected', { obj_fun(as.numeric(independent_args)) ) + # Here we intentionally pass a bad value of Catm that will trigger an error + error_val <- expect_silent( + obj_fun(as.numeric(within(independent_args, {Catm = -1})), debug_mode = 'none') + ) + + expect_equal(error_val, 2 * BioCroValidation:::FAILURE_VALUE) + # One data-driver pair, no dependent arguments obj_fun <- expect_silent( objective_function( @@ -205,7 +212,7 @@ ambient_2005: Error: `bad_arg_name` from `arg_names` is not in the `initial_valu ) }) -test_that('Model failures are detected', { +test_that('Model failures at startup are detected', { expect_error( objective_function( within(model, {parameters$lnfun = 1}), diff --git a/vignettes/parameterizing_soybean_biocro.Rmd b/vignettes/parameterizing_soybean_biocro.Rmd index 651894a..81ede07 100644 --- a/vignettes/parameterizing_soybean_biocro.Rmd +++ b/vignettes/parameterizing_soybean_biocro.Rmd @@ -564,7 +564,7 @@ optim_res <- nmkb( tol = 1e-2, restarts.max = 10 ), - debug_mode = FALSE # Passed to obj_fun; set to TRUE to enable debug mode + debug_mode = 'minimal' # Passed to obj_fun ) ```