From 4f0a50bbba338e1619fed5a20070626f0c786d96 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Fri, 23 May 2025 16:18:02 -0500 Subject: [PATCH 01/24] Prepare new development version --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0e388e9..32421d9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: BioCroValidation -Version: 0.2.0 +Version: 0.2.0-0 Title: Tools for Validating BioCro Models Description: A collection of tools for validating BioCro crop growth models. Authors@R: c( diff --git a/NEWS.md b/NEWS.md index 45cd934..1ae1ad6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -33,6 +33,8 @@ In the case of a hotfix, a short section headed by the new release number should be directly added to this file to describe the related changes. --> +# UNRELEASED + # Changes in BioCroValidation Version 0.2.0 (2025-05-23) - Added 2002 and 2005 SoyFACE biomass and standard deviation data. From 9e99946e893c70725562fd36384e989f66b0358a Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Fri, 30 May 2025 15:24:47 -0500 Subject: [PATCH 02/24] Fix some typos --- NEWS.md | 3 + R/objective_function_helpers.R | 31 ++++---- man/objective_function.Rd | 137 +++++++++++++++++---------------- 3 files changed, 91 insertions(+), 80 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1ae1ad6..d90cf8f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -35,6 +35,9 @@ be directly added to this file to describe the related changes. # UNRELEASED +- Fixed typos in the help page for `objective_function`, and in the `add_norm` + function (defined in `R/objective_function_helpers.R`) + # Changes in BioCroValidation Version 0.2.0 (2025-05-23) - Added 2002 and 2005 SoyFACE biomass and standard deviation data. diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R index ae5ef09..bfbae4d 100644 --- a/R/objective_function_helpers.R +++ b/R/objective_function_helpers.R @@ -200,23 +200,25 @@ add_norm <- function( qname_subset <- data_table[data_table[['quantity_name']] == qname, ] + npts <- nrow(qname_subset) + qmax <- max(abs(qname_subset[['quantity_value']])) + qobs <- data_table[j, 'quantity_value'] + + eps_max <- get_param_default(normalization_param, 1e-1) + eps_obs <- get_param_default(normalization_param, 1e-1) + if (tolower(normalization_method) == 'equal') { 1.0 } else if (tolower(normalization_method) == 'mean') { - nrow(qname_subset) * n_ddp + npts * n_ddp } else if (tolower(normalization_method) == 'max') { - max(qname_subset[['quantity_value']])^2 + qmax^2 + eps_max } else if (tolower(normalization_method) == 'obs') { - eps <- get_param_default(normalization_param, 1e-1) - data_table[i, 'quantity_value']^2 + eps + qobs^2 + eps_obs } else if (tolower(normalization_method) == 'mean_max') { - npts <- nrow(qname_subset) - qmax <- max(qname_subset[['quantity_value']]) - npts * n_ddp * qmax^2 + npts * n_ddp * (qmax^2 + eps_max) } else if (tolower(normalization_method) == 'mean_obs') { - npts <- nrow(qname_subset) - eps <- get_param_default(normalization_param, 1e-1) - npts * n_ddp * (data_table[i, 'quantity_value']^2 + eps) + npts * n_ddp * (qobs^2 + eps_obs) } else { stop('Unsupported normalization_method: ', normalization_method) } @@ -234,15 +236,16 @@ add_w_var <- function(long_form_data, stdev_weight_method, stdev_weight_param) { data_table <- long_form_data[[i]] data_stdev <- data_table[['quantity_stdev']] + eps_log <- get_param_default(stdev_weight_param, 1e-5) + eps_inv <- get_param_default(stdev_weight_param, 1e-1) + data_table[['w_var']] <- if (tolower(stdev_weight_method) == 'equal') { 1.0 } else if (tolower(stdev_weight_method) == 'logarithm') { - eps <- get_param_default(stdev_weight_param, 1e-5) - log(1 / (data_stdev + eps)) + log(1.0 / (data_stdev + eps_log)) } else if (tolower(stdev_weight_method) == 'inverse') { - eps <- get_param_default(stdev_weight_param, 1e-1) - 1 / (data_stdev^2 + eps) + 1.0 / (data_stdev^2 + eps_inv) } else { stop('Unsupported stdev_weight_method: ', stdev_weight_method) } diff --git a/man/objective_function.Rd b/man/objective_function.Rd index 2cb1e5a..269fbf9 100644 --- a/man/objective_function.Rd +++ b/man/objective_function.Rd @@ -24,7 +24,7 @@ parameterization procedure. For a detailed example of using \code{objective_function}, see the - "Parameterizing Soybean-BioCro" vignette/article. + "Parameterizing Soybean-BioCro" vignette. } \usage{ @@ -81,8 +81,8 @@ \item{independent_args}{ A list of named numeric values. The names will determine the independent arguments to be varied during their optimization, and the values specify - "initial" or "test" values of each argument that will be used internally to - check that the objective function is properly defined and can be evaluated. + "test" values of each argument that will be used internally to check that + the objective function is properly defined and can be evaluated. } \item{quantity_weights}{ @@ -165,16 +165,17 @@ \strong{Overview} When parameterizing a BioCro model, the general idea is to vary a subset of - the model's parameters to achieve the best agreement with a set of observed - data. The degree of agreement is expressed as an "error metric," which - includes terms derived from the agreement between the modeled and observed - values, as well as (optional) penalty terms. A function that calculates an - error metric when given a set of parameter values is called an "objective - function." Defining an objective function suitable for BioCro parameterization - can be complicated, but the \code{objective_function} function helps to - simplify the process of creating such a function. It is designed to - accommodate the following scenarios, which often occur in the context of - BioCro model parameterization: + the model's \code{parameters} and/or \code{initial_values} to achieve the + best agreement with a set of observed data. The degree of agreement is + expressed as an "error metric," which includes terms derived from the + agreement between the modeled and observed values, as well as (optional) + penalty terms. A function that calculates an error metric when given a set of + argument values (\code{parameters} and/or \code{initial_values}) is called an + "objective function." Defining an objective function suitable for BioCro + parameterization can be complicated, but the \code{objective_function} + function helps to simplify the process of creating such a function. It is + designed to accommodate the following scenarios, which often occur in the + context of BioCro model parameterization: \itemize{ \item \strong{Multi-year or multi-location data:} Often the model needs to @@ -267,7 +268,7 @@ by retrieving the value of the \eqn{Y_i} variable at the closest time to \eqn{t_i} that is included in the model output. It is assumed that the model always outputs the same sequence of time values each time it - is run with a particular set of drivers, regardless of the input + is run with a particular set of drivers, regardless of the provided argument values. The quantity-based weight factors \eqn{w_i^{quantity}} are directly @@ -314,7 +315,7 @@ The function \eqn{f_{user}} must accept a single data frame as an input and return a single numeric value as its output, but has no other requirements. It is specified via the - \code{extra_penalty_function} argument. When + \code{extra_penalty_function}. When \code{extra_penalty_function} is \code{NULL}, \eqn{P_{user}} is zero. \item \strong{Regularization penalty term:} The regularization penalty term @@ -350,14 +351,14 @@ ln \left( \frac{1}{\sigma_i + \epsilon} \right),} where \eqn{ln} denotes a logarithm with base \eqn{e} and \eqn{\epsilon} is a small number included to prevent numerical errors - that would otherwise occur when \eqn{\sigma = 0}. This method was + that would otherwise occur when \eqn{\sigma_i = 0}. This method was used in the original Soybean-BioCro paper. - The value of \eqn{\epsilon} is specified by the - \code{stdev_weight_param} input argument, which defaults to - \code{1e-5} when \code{stdev_weight_param} is \code{NULL} when using - this method. With the default value of \eqn{\epsilon}, - \eqn{w_i^{stdev} \approx 11.512} when \eqn{\sigma = 0}. + The value of \eqn{\epsilon} is specified by \code{stdev_weight_param}, + which defaults to \code{1e-5} if \code{stdev_weight_param} is + \code{NULL} when using this method. With the default value of + \eqn{\epsilon}, \eqn{w_i^{stdev} \approx 11.512} when + \eqn{\sigma = 0}. Note: this method should be used with caution, because \eqn{w_i^{stdev}} is zero for \eqn{\sigma_i = 1 - \epsilon}, and it @@ -369,11 +370,10 @@ where \eqn{\epsilon} is a small number included to prevent numerical errors that would otherwise occur when \eqn{\sigma_i = 0}. - The value of \eqn{\epsilon} is specified by the - \code{stdev_weight_param} input argument, which defaults to - \code{1e-1} when \code{stdev_weight_param} is \code{NULL} when using - this method. With the default value of \eqn{\epsilon}, - \eqn{w_i^{stdev} = 10} when \eqn{\sigma_i = 0}. + The value of \eqn{\epsilon} is specified by \code{stdev_weight_param}, + which defaults to \code{1e-1} if \code{stdev_weight_param} is + \code{NULL} when using this method. With the default value of + \eqn{\epsilon}, \eqn{w_i^{stdev} = 10} when \eqn{\sigma_i = 0}. } If any values of \eqn{w_i^{stdev}} are undefined, negative, or infinite, an @@ -404,13 +404,21 @@ \item \code{'max'}: For this method, when \eqn{Y_i} is named \code{vtype} and the observation is from a set called \code{vdata}, then - \deqn{N_i = \left( max_{vtype}^{vdata} \right)^2,} - where \eqn{max_{vtype}^{vdata}} is the maximum observed value of - \code{vtype} across \code{vdata}. In this case, the observed and + \deqn{N_i = \left( max_{vtype}^{vdata} \right)^2 + \epsilon,} + where \eqn{max_{vtype}^{vdata}} is the maximum observed absolute value + of \code{vtype} across \code{vdata} and \eqn{\epsilon} is a small + number included to prevent numerical errors that would otherwise occur + when \eqn{max_{vtype}^{vdata} = 0}. In this case, the observed and modeled values that appear in the equation for \eqn{E_{agreement}} are - essentially normalized by their maximum value, hence the name for this - method. This approach avoids over-representing variable types with - larger magnitude when determining \eqn{E_{agreement}}. + essentially normalized by their maximum magnitude, hence the name for + this method. This approach avoids over-representing variable types + with larger magnitude when determining \eqn{E_{agreement}}. + + The value of \eqn{\epsilon} is specified by + \code{normalization_param}, which defaults to \code{1e-1} if + \code{normalization_param} is \code{NULL} when using this method. + With the default value of \eqn{\epsilon}, \eqn{N_i = 10} when + \eqn{max_{vtype}^{vdata} = 0}. \item \code{'obs'}: For this method, @@ -423,35 +431,34 @@ avoids over-representing time points where a particular quantity takes its largest values when determining \eqn{E_{agreement}}. - The value of \eqn{\epsilon} is specified by the - \code{normalization_param} input argument, which defaults to - \code{1e-1} when \code{normalization_param} is \code{NULL} when using - this method. With the default value of \eqn{\epsilon}, \eqn{N_i = 10} - when \eqn{Y_{obs}^i = 0}. + The value of \eqn{\epsilon} is specified by + \code{normalization_param}, which defaults to \code{1e-1} if + \code{normalization_param} is \code{NULL} when using this method. + With the default value of \eqn{\epsilon}, \eqn{N_i = 10} when + \eqn{Y_{obs}^i = 0}. \item \code{'mean_max'}: For this method, the "mean" and "max" methods are combined so that - \deqn{N_i = n_{vtype}^{vdrivers}% - \cdot n_{data} \cdot \left( max_{vtype}^{vdata} \right)^2.} + \deqn{N_i = n_{vtype}^{vdrivers} \cdot n_{data}% + \cdot \left[ \left( max_{vtype}^{vdata} \right)^2 + \epsilon \right].} This approach avoids over-representing drivers with larger numbers of associated observations, and variable types with larger magnitudes. This method is used for parameterizing Soybean-BioCro. + The value \eqn{\epsilon} is specified by \code{normalization_param}, + using the same default value as in the \code{'max'} method. + \item \code{'mean_obs'}: For this method, the "mean" and "obs" methods are combined so that \deqn{N_i = n_{vtype}^{vdrivers} \cdot n_{data}% - \cdot \left( \left( Y_{obs}^i \right)^2 + \epsilon \right)^2.} + \cdot \left[ \left( Y_{obs}^i \right)^2 + \epsilon \right].} This approach avoids over-representing drivers with larger numbers of associated observations, and time points with larger observed values. - The value of \eqn{\epsilon} is specified by the - \code{normalization_param} input argument, which defaults to - \code{1e-1} when \code{normalization_param} is \code{NULL} when using - this method. With the default value of \eqn{\epsilon}, - \eqn{N_i = 10 \cdot n_{vtype}^{vdrivers} \cdot n_{data}} when - \eqn{Y_{obs}^i = 0}. + The value \eqn{\epsilon} is specified by \code{normalization_param}, + using the same default value as in the \code{'obs'} method. } In most situations, it is recommended to use either \code{'mean_max'} or @@ -505,14 +512,14 @@ output, when accounting for the \code{data_definitions} and \code{post_process_function}. - \item Ensuring that each independent and dependent argument name is either - a parameter or initial value of the model. Internally, argument names - are passed to \code{\link[BioCro]{partial_run_biocro}} via its - \code{arg_names} input. Note that argument names passed to - \code{partial_run_biocro} can technically include drivers, but it is - unlikely that the value of a driver would be varied during an - optimization, so the argument names are not allowed to include - columns in the drivers. + \item Ensuring that each independent and dependent argument name is a member + of the model's \code{parameters} or \code{initial_values}. Internally, + argument names are passed to \code{\link[BioCro]{partial_run_biocro}} + via its \code{arg_names} input. Note that argument names passed to + \code{partial_run_biocro} can technically include members of the + \code{drivers}, but it is unlikely that the value of a driver would be + varied during an optimization, so the argument names are not allowed + to include columns in the \code{drivers}. \item Ensuring that the optional \code{dependent_arg_function}, \code{post_process_function}, and \code{extra_penalty_function} @@ -544,22 +551,20 @@ the same order as in \code{independent_arg_names}), and \code{lambda} is the value of the regularization parameter. - The \code{return_terms} argument determines the return value of - \code{obj_fun}. When \code{return_terms} is \code{FALSE}, \code{obj_fun} - returns values of the error metric \eqn{E}. When \code{return_terms} is - \code{TRUE}, \code{obj_fun} returns a list including each individual term of - the total error metric. + When \code{return_terms} is \code{FALSE}, \code{obj_fun} returns values of the + error metric \eqn{E}. When \code{return_terms} is \code{TRUE}, \code{obj_fun} + returns a list including each individual term of the total error metric. During optimization, \code{return_terms} should always be \code{FALSE}. Setting it to \code{TRUE} can be useful for troubleshooting, or for diagnostics such as checking the quality of fit for each of the data-driver pairs. - The \code{debug_mode} argument determines whether \code{obj_fun} is running in - debug mode. In debug mode, each time \code{obj_fun} is called, it will print - the values of \code{x} and the error metric to the R terminal. This can be - useful when troubleshooting a problem with an optimization, since it provides - a record of any problematic parameter combinations. When setting + When \code{debug_mode} is \code{TRUE}, \code{obj_fun} operates in "debug + mode." In debug mode, each time \code{obj_fun} is called, it will print the + values of \code{x} and the error metric to the R terminal. This can be useful + when troubleshooting a problem with an optimization, since it provides a + record of any problematic parameter combinations. When setting \code{debug_mode} to \code{TRUE}, also consider using \code{\link[base]{sink}} to write the outputs to a file instead of the R terminal. In that case, there will still be a record even if R crashes. @@ -690,12 +695,12 @@ if (require(BioCro)) { # objective function increases, indicating a lower degree of agreement between # the model and the observed data. Here we will call `obj_fun` in debug mode, # which will automatically print the value of the error metric. - cat('\nError metric calculated by doubling the initial argument values:\n') + cat('\nError metric calculated by doubling the original argument values:\n') error_metric <- obj_fun(2 * as.numeric(independent_args), debug_mode = TRUE) # We can also see the values of each term that makes up the error metric; # again, we will call `obj_fun` in debug mode for automatic printing. - cat('\nError metric terms calculated by doubling the initial argument values:\n') + cat('\nError metric terms calculated by doubling the original argument values:\n') error_terms <- obj_fun(2 * as.numeric(independent_args), return_terms = TRUE, debug_mode = TRUE) } From e8c566fe685478fe1f29b690d965b036dcf9bfbe Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Sat, 31 May 2025 11:06:00 -0500 Subject: [PATCH 03/24] Standardize and simplify method selection --- R/objective_function_helpers.R | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R index bfbae4d..d891e15 100644 --- a/R/objective_function_helpers.R +++ b/R/objective_function_helpers.R @@ -207,17 +207,19 @@ add_norm <- function( eps_max <- get_param_default(normalization_param, 1e-1) eps_obs <- get_param_default(normalization_param, 1e-1) - if (tolower(normalization_method) == 'equal') { + method <- toupper(normalization_method) + + if (method == 'EQUAL') { 1.0 - } else if (tolower(normalization_method) == 'mean') { + } else if (method == 'MEAN') { npts * n_ddp - } else if (tolower(normalization_method) == 'max') { + } else if (method == 'MAX') { qmax^2 + eps_max - } else if (tolower(normalization_method) == 'obs') { + } else if (method == 'OBS') { qobs^2 + eps_obs - } else if (tolower(normalization_method) == 'mean_max') { + } else if (method == 'MEAN_MAX') { npts * n_ddp * (qmax^2 + eps_max) - } else if (tolower(normalization_method) == 'mean_obs') { + } else if (method == 'MEAN_OBS') { npts * n_ddp * (qobs^2 + eps_obs) } else { stop('Unsupported normalization_method: ', normalization_method) @@ -239,12 +241,14 @@ add_w_var <- function(long_form_data, stdev_weight_method, stdev_weight_param) { eps_log <- get_param_default(stdev_weight_param, 1e-5) eps_inv <- get_param_default(stdev_weight_param, 1e-1) + method <- toupper(stdev_weight_method) + data_table[['w_var']] <- - if (tolower(stdev_weight_method) == 'equal') { + if (method == 'EQUAL') { 1.0 - } else if (tolower(stdev_weight_method) == 'logarithm') { + } else if (method == 'LOGARITHM') { log(1.0 / (data_stdev + eps_log)) - } else if (tolower(stdev_weight_method) == 'inverse') { + } else if (method == 'INVERSE') { 1.0 / (data_stdev^2 + eps_inv) } else { stop('Unsupported stdev_weight_method: ', stdev_weight_method) @@ -408,11 +412,13 @@ regularization_penalty <- function( regularization_lambda ) { - if (toupper(regularization_method) == 'NONE') { + method <- toupper(regularization_method) + + if (method == 'NONE') { 0.0 - } else if (toupper(regularization_method) == 'LASSO' || toupper(regularization_method) == 'L1') { + } else if (method %in% c('LASSO', 'L1')) { regularization_lambda * sum(abs(ind_arg_vals)) - } else if (toupper(regularization_method) == 'RIDGE' || toupper(regularization_method) == 'L2') { + } else if (method %in% c('RIDGE', 'L2')) { regularization_lambda * sum(ind_arg_vals^2) } else { stop('Unsupported regularization method: ', regularization_method) From 665a963a09ea7c0661a5851efc9a7e9a7463c320 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Sat, 31 May 2025 12:09:41 -0500 Subject: [PATCH 04/24] Allow custom regularization function --- R/objective_function_helpers.R | 22 +++++++------ man/objective_function.Rd | 40 +++++++++++++++++------- tests/testthat/test-objective_function.R | 19 +++++++++++ 3 files changed, 60 insertions(+), 21 deletions(-) diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R index d891e15..fc65cee 100644 --- a/R/objective_function_helpers.R +++ b/R/objective_function_helpers.R @@ -412,16 +412,20 @@ regularization_penalty <- function( regularization_lambda ) { - method <- toupper(regularization_method) - - if (method == 'NONE') { - 0.0 - } else if (method %in% c('LASSO', 'L1')) { - regularization_lambda * sum(abs(ind_arg_vals)) - } else if (method %in% c('RIDGE', 'L2')) { - regularization_lambda * sum(ind_arg_vals^2) + if (is.function(regularization_method)) { + regularization_method(ind_arg_vals, regularization_lambda) } else { - stop('Unsupported regularization method: ', regularization_method) + method <- toupper(regularization_method) + + if (method == 'NONE') { + 0.0 + } else if (method %in% c('LASSO', 'L1')) { + regularization_lambda * sum(abs(ind_arg_vals)) + } else if (method %in% c('RIDGE', 'L2')) { + regularization_lambda * sum(ind_arg_vals^2) + } else { + stop('Unsupported regularization method: ', regularization_method) + } } } diff --git a/man/objective_function.Rd b/man/objective_function.Rd index 269fbf9..cd11df9 100644 --- a/man/objective_function.Rd +++ b/man/objective_function.Rd @@ -128,7 +128,8 @@ \item{regularization_method}{ A string indicating the regularization method to be used when calculating - the regularization penalty term; see below for more details. + the regularization penalty term, or a function that calculates the penalty; + see below for more details. } \item{dependent_arg_function}{ @@ -333,9 +334,10 @@ \strong{Standard-deviation-based weight methods} - The following methods are available for determining weight factors from values - of the standard deviation (\eqn{\sigma}), which can be (optionally) supplied - via the \code{data_stdev} elements of the \code{data_driver_pairs}: + The following pre-set methods are available for determining weight factors + from values of the standard deviation (\eqn{\sigma}), which can be + (optionally) supplied via the \code{data_stdev} elements of the + \code{data_driver_pairs}: \itemize{ \item \code{'equal'}: For this method, \eqn{w_i^{stdev}} is always set to @@ -381,7 +383,7 @@ \strong{Normalization methods} - The following normalization methods are available: + The following pre-set normalization methods are available: \itemize{ \item \code{'equal'}: For this method, \eqn{N_i} is always set to 1. In @@ -466,7 +468,7 @@ \strong{Regularization methods} - The following regularization methods are available: + The following pre-set regularization methods are available: \itemize{ \item \code{'none'}: For this method, \eqn{P_{regularization}} is always set @@ -493,6 +495,10 @@ section below for details of how to specify \eqn{\lambda}. } + It is also possible to supply a function that accepts two input arguments + (\code{x} and \code{lambda}, as described in the "Value" section below) and + returns a numeric penalty value. + \strong{Input checks} Several checks are made to ensure that the objective function is properly @@ -649,6 +655,8 @@ if (require(BioCro)) { # original Soybean-BioCro model. independent_args <- BioCro::soybean[['parameters']][c('alphaLeaf', 'betaLeaf')] + initial_guess <- as.numeric(independent_args) + dependent_arg_function <- function(ind_args) { list(alphaStem = ind_args[['alphaLeaf']]) } @@ -675,6 +683,12 @@ if (require(BioCro)) { } } + # We want to use the regularization term to penalize deviations away from the + # initial guess, so we will define a custom L2 regularization function + regularization_function <- function(x, lambda) { + lambda * sum((x - initial_guess)^2) + } + # Now we can finally create the objective function obj_fun <- objective_function( base_model_definition, @@ -683,6 +697,7 @@ if (require(BioCro)) { quantity_weights, data_definitions = data_definitions, stdev_weight_method = 'logarithm', + regularization_method = regularization_function, dependent_arg_function = dependent_arg_function, post_process_function = post_process_function, extra_penalty_function = extra_penalty_function @@ -691,17 +706,18 @@ if (require(BioCro)) { # This function could now be passed to an optimizer; here we will simply # evaluate it for two sets of parameter values. - # Try doubling each parameter value; in this case, the value of the - # objective function increases, indicating a lower degree of agreement between - # the model and the observed data. Here we will call `obj_fun` in debug mode, - # which will automatically print the value of the error metric. + # Try doubling each parameter value and setting lambda to a nonzero value; in + # this case, the value of the objective function increases, indicating a lower + # degree of agreement between the model and the observed data. Here we will + # call `obj_fun` in debug mode, which will automatically print the value of + # the error metric. cat('\nError metric calculated by doubling the original argument values:\n') - error_metric <- obj_fun(2 * as.numeric(independent_args), debug_mode = TRUE) + error_metric <- obj_fun(2 * initial_guess, 0.001, debug_mode = TRUE) # We can also see the values of each term that makes up the error metric; # again, we will call `obj_fun` in debug mode for automatic printing. cat('\nError metric terms calculated by doubling the original argument values:\n') error_terms <- - obj_fun(2 * as.numeric(independent_args), return_terms = TRUE, debug_mode = TRUE) + obj_fun(2 * initial_guess, 0.001, return_terms = TRUE, debug_mode = TRUE) } } diff --git a/tests/testthat/test-objective_function.R b/tests/testthat/test-objective_function.R index dcfccf1..02df6d1 100644 --- a/tests/testthat/test-objective_function.R +++ b/tests/testthat/test-objective_function.R @@ -103,6 +103,25 @@ test_that('Objective functions can be created and behave as expected', { obj_fun(as.numeric(independent_args), lambda = 0.5) ) + # Two data-driver pairs, with dependent arguments and L4 regularization + obj_fun <- expect_silent( + objective_function( + model, + ddps, + independent_args, + quantity_weights, + data_definitions = data_definitions, + dependent_arg_function = dependent_arg_function, + post_process_function = post_process_function, + regularization_method = function(x, lambda) {lambda * sum(x^4)}, + verbose_startup = verbose_startup + ) + ) + + expect_silent( + obj_fun(as.numeric(independent_args), lambda = 0.5) + ) + expect_true( is.list( obj_fun(as.numeric(independent_args), lambda = 0.5, return_terms = TRUE) From b8f71c3219d3ef01ccec494cc9db170e3a0f1649 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Sat, 31 May 2025 13:44:40 -0500 Subject: [PATCH 05/24] Print more startup information --- R/objective_function.R | 20 +++--- R/objective_function_helpers.R | 110 +++++++++++++++++++++++++++++---- 2 files changed, 111 insertions(+), 19 deletions(-) diff --git a/R/objective_function.R b/R/objective_function.R index 6c21e00..c03ee8a 100644 --- a/R/objective_function.R +++ b/R/objective_function.R @@ -76,14 +76,16 @@ objective_function <- function( long_form_data, normalization_method, normalization_param, - length(data_driver_pairs) + length(data_driver_pairs), + verbose_startup ) # Add variance-based weights long_form_data <- add_w_var( long_form_data, stdev_weight_method, - stdev_weight_param + stdev_weight_param, + verbose_startup ) # Print the long form data, if desired. Do this before checking the data, @@ -109,11 +111,15 @@ objective_function <- function( # Get the data-driver pair weights ddp_weights <- get_ddp_weights(data_driver_pairs) - # Print the data-driver pair weights, if desired - if (verbose_startup) { - cat('\nThe user-supplied data-driver pair weights:\n\n') - utils::str(ddp_weights) - } + # Print additional startup information, if desired + print_misc_verbose_startup( + ddp_weights, + regularization_method, + dependent_arg_function, + post_process_function, + extra_penalty_function, + verbose_startup + ) # Create the objective function obj_fun <- get_obj_fun( diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R index fc65cee..0b6c4f6 100644 --- a/R/objective_function_helpers.R +++ b/R/objective_function_helpers.R @@ -188,9 +188,32 @@ add_norm <- function( long_form_data, normalization_method, normalization_param, - n_ddp + n_ddp, + verbose_startup ) { + eps_max <- get_param_default(normalization_param, 1e-1) + eps_obs <- get_param_default(normalization_param, 1e-1) + + method <- toupper(normalization_method) + + if (verbose_startup) { + method_info <- if (method %in% c('MAX', 'MEAN_MAX')) { + paste(method, 'with eps =', eps_max) + } else if (method %in% c('OBS', 'MEAN_OBS')) { + paste(method, 'with eps =', eps_obs) + } else { + method + } + + cat(paste( + '\nNormalization method:', + method_info, + '\n', + collapse = '' + )) + } + for (i in seq_along(long_form_data)) { data_table <- long_form_data[[i]] @@ -204,11 +227,6 @@ add_norm <- function( qmax <- max(abs(qname_subset[['quantity_value']])) qobs <- data_table[j, 'quantity_value'] - eps_max <- get_param_default(normalization_param, 1e-1) - eps_obs <- get_param_default(normalization_param, 1e-1) - - method <- toupper(normalization_method) - if (method == 'EQUAL') { 1.0 } else if (method == 'MEAN') { @@ -233,16 +251,39 @@ add_norm <- function( } # Helping function for getting variance-based weights -add_w_var <- function(long_form_data, stdev_weight_method, stdev_weight_param) { +add_w_var <- function( + long_form_data, + stdev_weight_method, + stdev_weight_param, + verbose_startup +) +{ + eps_log <- get_param_default(stdev_weight_param, 1e-5) + eps_inv <- get_param_default(stdev_weight_param, 1e-1) + + method <- toupper(stdev_weight_method) + + if (verbose_startup) { + method_info <- if (method == 'LOGARITHM') { + paste(method, 'with eps =', eps_log) + } else if (method == 'INVERSE') { + paste(method, 'with eps =', eps_inv) + } else { + method + } + + cat(paste( + '\nStandard-deviation-based weight method:', + method_info, + '\n', + collapse = '' + )) + } + for (i in seq_along(long_form_data)) { data_table <- long_form_data[[i]] data_stdev <- data_table[['quantity_stdev']] - eps_log <- get_param_default(stdev_weight_param, 1e-5) - eps_inv <- get_param_default(stdev_weight_param, 1e-1) - - method <- toupper(stdev_weight_method) - data_table[['w_var']] <- if (method == 'EQUAL') { 1.0 @@ -503,3 +544,48 @@ get_obj_fun <- function( } } } + +# Print the data-driver pair weights, information about the regularization +# method, and information about optional functions, if desired +print_misc_verbose_startup <- function( + ddp_weights, + regularization_method, + dependent_arg_function, + post_process_function, + extra_penalty_function, + verbose_startup +) +{ + if (verbose_startup){ + user_func_msg <- 'user-supplied function:\n\n' + + cat('\nThe user-supplied data-driver pair weights:\n\n') + utils::str(ddp_weights) + + cat('\nRegularization method: ') + + if (is.function(regularization_method)) { + cat(user_func_msg) + print(regularization_method) + } else { + cat(paste0(toupper(regularization_method), '\n')) + } + + func_to_print <- list( + list(info = 'Dependent argument', func = dependent_arg_function), + list(info = 'Post-processing', func = post_process_function), + list(info = 'Extra penalty', func = extra_penalty_function) + ) + + for (x in func_to_print) { + cat(paste0('\n', x$info, ' function: ')) + + if (is.null(x$func)) { + cat('none\n') + } else { + cat(user_func_msg) + print(x$func) + } + } + } +} From f55b3067045d7d1acef52c52e44ff832cdf10754 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Sat, 31 May 2025 13:46:26 -0500 Subject: [PATCH 06/24] Update NEWS.md --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index d90cf8f..b0ba5f4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -38,6 +38,8 @@ be directly added to this file to describe the related changes. - Fixed typos in the help page for `objective_function`, and in the `add_norm` function (defined in `R/objective_function_helpers.R`) +- Allowed user-supplied regularization functions + # Changes in BioCroValidation Version 0.2.0 (2025-05-23) - Added 2002 and 2005 SoyFACE biomass and standard deviation data. From 104547753fe4ec0f5f63f862fc37f48374e83349 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Thu, 7 Aug 2025 15:09:20 -0500 Subject: [PATCH 07/24] Catch errors when running model --- R/objective_function_helpers.R | 8 +++++++- tests/testthat/test-objective_function.R | 11 +++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R index 0b6c4f6..c544f05 100644 --- a/R/objective_function_helpers.R +++ b/R/objective_function_helpers.R @@ -495,7 +495,13 @@ get_obj_fun <- function( errors <- lapply(seq_along(model_runners), function(i) { runner <- model_runners[[i]] - res <- runner(x) + + res <- tryCatch( + runner(x), + error = function(e) { + data.frame() + } + ) error_from_res( res, diff --git a/tests/testthat/test-objective_function.R b/tests/testthat/test-objective_function.R index 02df6d1..0161f81 100644 --- a/tests/testthat/test-objective_function.R +++ b/tests/testthat/test-objective_function.R @@ -27,7 +27,7 @@ ddps <- list( ) independent_args <- with(BioCro::soybean[['parameters']], { - list(alphaLeaf = alphaLeaf, betaLeaf = betaLeaf) + list(alphaLeaf = alphaLeaf, betaLeaf = betaLeaf, Catm = Catm) }) data_definitions <- list( @@ -71,6 +71,13 @@ test_that('Objective functions can be created and behave as expected', { obj_fun(as.numeric(independent_args)) ) + # Here we intentionally pass a bad value of Catm that will trigger an error + error_val <- expect_silent( + obj_fun(as.numeric(within(independent_args, {Catm = -1}))) + ) + + expect_equal(error_val, 2 * BioCroValidation:::FAILURE_VALUE) + # One data-driver pair, no dependent arguments obj_fun <- expect_silent( objective_function( @@ -205,7 +212,7 @@ ambient_2005: Error: `bad_arg_name` from `arg_names` is not in the `initial_valu ) }) -test_that('Model failures are detected', { +test_that('Model failures at startup are detected', { expect_error( objective_function( within(model, {parameters$lnfun = 1}), From 02f288f05c909583ffc5cc104ff85cd0b2c72d91 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Thu, 7 Aug 2025 15:38:48 -0500 Subject: [PATCH 08/24] Make separate `debug_print` function --- R/objective_function_helpers.R | 40 ++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R index c544f05..3efc60f 100644 --- a/R/objective_function_helpers.R +++ b/R/objective_function_helpers.R @@ -470,6 +470,24 @@ regularization_penalty <- function( } } +# Helping function for debug printing. Here, `obj` should either be a list (in +# which case its "structure" will be printed via `str`) or a numeric vector (in +# which case its values will be printed with up to 32 decimal places). +# `obj_name` should be a string describing what is being printed. The printout +# will include newlines at the start and end, along with a timestamp. +debug_print <- function(obj, obj_name) { + cat(paste0('\nTime: ', Sys.time()), ' ', obj_name, ': ') + + if (is.list(obj)) { + cat('\n\n') + utils::str(obj) + } else { + cat(paste(sprintf('%.32f', obj), collapse = ', ')) + } + + cat('\n') +} + # Helping function that forms the overall objective function get_obj_fun <- function( model_runners, @@ -483,14 +501,7 @@ get_obj_fun <- function( { function(x, lambda = 0, return_terms = FALSE, debug_mode = FALSE) { if (debug_mode) { - msg <- paste0( - '\nTime: ', - Sys.time(), - ' Independent argument values: ', - paste(x, collapse = ', '), - '\n' - ) - cat(msg) + debug_print(x, 'Independent argument values') } errors <- lapply(seq_along(model_runners), function(i) { @@ -526,9 +537,7 @@ get_obj_fun <- function( ) if (debug_mode) { - cat(paste0('Time: ', Sys.time()), ' Error metric terms: ') - utils::str(error_metric_terms) - cat('\n') + debug_print(error_metric_terms, 'Error metric terms') } error_metric_terms @@ -536,14 +545,7 @@ get_obj_fun <- function( error_metric <- sum(as.numeric(errors)) + reg_penalty if (debug_mode) { - msg <- paste0( - 'Time: ', - Sys.time(), - ' Error metric: ', - error_metric, - '\n' - ) - cat(msg) + debug_print(error_metric, 'Error metric') } error_metric From 850691592116134ceaa24eb2594dab428756e216 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Thu, 7 Aug 2025 16:46:28 -0500 Subject: [PATCH 09/24] Add more options for debug mode --- R/objective_function_helpers.R | 129 ++++++++++++++++++----- R/objective_function_input_checks.R | 7 +- man/objective_function.Rd | 31 ++++-- tests/testthat/test-objective_function.R | 2 +- 4 files changed, 129 insertions(+), 40 deletions(-) diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R index 3efc60f..b376388 100644 --- a/R/objective_function_helpers.R +++ b/R/objective_function_helpers.R @@ -4,6 +4,13 @@ # Value to return when a simulation fails to run FAILURE_VALUE <- 1e10 +# Define the allowed debug modes +DEBUG_MODES <- c( + 'none', + 'minimal', + 'everything' +) + # Helping function for getting a full list of argument names get_full_arg_names <- function(independent_args, dependent_arg_function) { # Get the independent argument names @@ -364,6 +371,24 @@ one_error <- function( } } +# Helping function for debug printing. Here, `obj` should either be a list (in +# which case its "structure" will be printed via `str`) or a numeric vector (in +# which case its values will be printed with up to 32 decimal places). +# `obj_name` should be a string describing what is being printed. The printout +# will include newlines at the start and end, along with a timestamp. +debug_print <- function(obj, obj_name) { + cat(paste0('\nTime: ', Sys.time()), ' ', obj_name, ': ') + + if (is.list(obj)) { + cat('\n\n') + utils::str(obj) + } else { + cat(paste(sprintf('%.32f', obj), collapse = ', ')) + } + + cat('\n') +} + # Helping function for returning a failure value failure_value <- function(error_sum, return_terms) { if (return_terms) { @@ -384,13 +409,40 @@ error_from_res <- function( ddp_weight, normalization_method, extra_penalty_function, - return_terms + return_terms, + x, + debug_mode ) { + # If there was an error, return a very high value + if (is.null(simulation_result)) { + return( + failure_value(NA, return_terms) + ) + } + # If the simulation did not finish, return a very high value expected_npts <- long_form_data_table[1, 'expected_npts'] if (nrow(simulation_result) < expected_npts) { + if (debug_mode %in% c(DEBUG_MODES[2], DEBUG_MODES[3])) { + # debug_mode is `minimal` or `everything`, so indicate that the + # simulation did not finish; if debug_mode is `minimal`, the inputs + # have not yet been printed, so we need to print them here + if (debug_mode == DEBUG_MODES[2]) { + debug_print(x, 'Independent argument values') + } + + msg <- paste0( + '\nSimulation did not finish; produced ', + nrow(simulation_result), + ' rows instead of the expected ', + expected_npts, '\n' + ) + + cat(msg) + } + return( failure_value(NA, return_terms) ) @@ -424,6 +476,24 @@ error_from_res <- function( # If the error sum is not finite, return a very high value if (!is.finite(error_sum)) { + if (debug_mode %in% c(DEBUG_MODES[2], DEBUG_MODES[3])) { + # debug_mode is `minimal` or `everything`, so indicate that the sum + # of squared errors is not finite; if debug_mode is `minimal`, the + # inputs have not yet been printed, so we need to print them here + if (debug_mode == DEBUG_MODES[2]) { + debug_print(x, 'Independent argument values') + } + + msg <- paste0( + '\nSum of squared errors is not finite: ', + error_sum, + '\n' + ) + + cat(msg) + } + + return( failure_value(error_sum, return_terms) ) @@ -470,25 +540,16 @@ regularization_penalty <- function( } } -# Helping function for debug printing. Here, `obj` should either be a list (in -# which case its "structure" will be printed via `str`) or a numeric vector (in -# which case its values will be printed with up to 32 decimal places). -# `obj_name` should be a string describing what is being printed. The printout -# will include newlines at the start and end, along with a timestamp. -debug_print <- function(obj, obj_name) { - cat(paste0('\nTime: ', Sys.time()), ' ', obj_name, ': ') - - if (is.list(obj)) { - cat('\n\n') - utils::str(obj) - } else { - cat(paste(sprintf('%.32f', obj), collapse = ', ')) - } - - cat('\n') -} - -# Helping function that forms the overall objective function +# Helping function that forms the overall objective function. Here, `debug_mode` +# should be a numeric value, and the debug outputs work as follows: +# +# - debug_mode 1: Minimal debug printing; inputs and error messages will be +# printed only if a simulation fails to complete +# +# - debug mode 2: Maximal debug printing; all inputs and outputs will be printed +# +# - any other value: No debug printing +# get_obj_fun <- function( model_runners, long_form_data, @@ -499,8 +560,9 @@ get_obj_fun <- function( regularization_method ) { - function(x, lambda = 0, return_terms = FALSE, debug_mode = FALSE) { - if (debug_mode) { + function(x, lambda = 0, return_terms = FALSE, debug_mode = 'minimal') { + if (tolower(debug_mode) == DEBUG_MODES[3]) { + # debug_mode is `everything`, so print the inputs debug_print(x, 'Independent argument values') } @@ -510,7 +572,18 @@ get_obj_fun <- function( res <- tryCatch( runner(x), error = function(e) { - data.frame() + if (debug_mode %in% c(DEBUG_MODES[2], DEBUG_MODES[3])) { + # debug_mode is `minimal` or `everything`, so print the + # error message; if debug_mode is `minimal`, the inputs + # have not yet been printed, so we need to print them + # here + if (debug_mode == DEBUG_MODES[2]) { + debug_print(x, 'Independent argument values') + } + cat(paste0('\n', conditionMessage(e), '\n')) + } + # Return NULL to indicate that an error occurred + NULL } ) @@ -521,7 +594,9 @@ get_obj_fun <- function( ddp_weights[[i]], normalization_method, extra_penalty_function, - return_terms + return_terms, + x, + debug_mode ) }) @@ -536,7 +611,8 @@ get_obj_fun <- function( regularization_penalty = reg_penalty ) - if (debug_mode) { + if (tolower(debug_mode) == DEBUG_MODES[3]) { + # debug_mode is `everything`, so print the error metric terms debug_print(error_metric_terms, 'Error metric terms') } @@ -544,7 +620,8 @@ get_obj_fun <- function( } else { error_metric <- sum(as.numeric(errors)) + reg_penalty - if (debug_mode) { + if (tolower(debug_mode) == DEBUG_MODES[3]) { + # debug_mode is `everything`, so print the error metric debug_print(error_metric, 'Error metric') } diff --git a/R/objective_function_input_checks.R b/R/objective_function_input_checks.R index 7a7a5a0..f8d0afb 100644 --- a/R/objective_function_input_checks.R +++ b/R/objective_function_input_checks.R @@ -420,8 +420,11 @@ check_long_form_data <- function(long_form_data) { # Helping function for checking the objective function; will throw an error if a # problem is detected, and will otherwise be silent with no return value. check_obj_fun <- function(obj_fun, initial_ind_arg_values, verbose) { - initial_error_terms <- - obj_fun(as.numeric(initial_ind_arg_values), return_terms = TRUE) + initial_error_terms <- obj_fun( + as.numeric(initial_ind_arg_values), + return_terms = TRUE, + debug_mode = 'none' + ) initial_error <- sum(unlist(initial_error_terms)) diff --git a/man/objective_function.Rd b/man/objective_function.Rd index cd11df9..21de429 100644 --- a/man/objective_function.Rd +++ b/man/objective_function.Rd @@ -551,7 +551,7 @@ \value{ A function \code{obj_fun} with signature - \code{obj_fun(x, lambda = 0, return_terms = FALSE, debug_mode = FALSE)}. + \code{obj_fun(x, lambda = 0, return_terms = FALSE, debug_mode = 'minimal')}. Here, \code{x} is a numeric vector of values of the independent arguments (in the same order as in \code{independent_arg_names}), and \code{lambda} is the @@ -566,14 +566,23 @@ diagnostics such as checking the quality of fit for each of the data-driver pairs. - When \code{debug_mode} is \code{TRUE}, \code{obj_fun} operates in "debug - mode." In debug mode, each time \code{obj_fun} is called, it will print the - values of \code{x} and the error metric to the R terminal. This can be useful - when troubleshooting a problem with an optimization, since it provides a - record of any problematic parameter combinations. When setting - \code{debug_mode} to \code{TRUE}, also consider using \code{\link[base]{sink}} - to write the outputs to a file instead of the R terminal. In that case, there - will still be a record even if R crashes. + When \code{debug_mode} is \code{'minimal'} or \code{'everything'}, + \code{obj_fun} operates in "debug mode." In the "minimal" version, problematic + values of \code{x} will be printed to the terminal, along with an explanation + of the problem that was caused. In the "everything" version, the values of + \code{x} and the error metric will be printed to the terminal every time + \code{obj_fun} is called. When \code{debug_mode} is \code{'none'} (or any + other value not mentioned above), no debug printing will occur. + + Debug mode can be useful when troubleshooting a problem with an optimization, + since it provides a record of any problematic parameter combinations. Once a + problematic set of argument values is identified, it can be investigated + further by calling \code{obj_fun} again with \code{x} set to the problematic + values and \code{return_terms} set to \code{TRUE}. + + When setting \code{debug_mode} to \code{TRUE}, also consider using + \code{\link[base]{sink}} to write the outputs to a file instead of the R + terminal. In that case, there will still be a record even if R crashes. } \examples{ @@ -712,12 +721,12 @@ if (require(BioCro)) { # call `obj_fun` in debug mode, which will automatically print the value of # the error metric. cat('\nError metric calculated by doubling the original argument values:\n') - error_metric <- obj_fun(2 * initial_guess, 0.001, debug_mode = TRUE) + error_metric <- obj_fun(2 * initial_guess, 0.001, debug_mode = 'everything') # We can also see the values of each term that makes up the error metric; # again, we will call `obj_fun` in debug mode for automatic printing. cat('\nError metric terms calculated by doubling the original argument values:\n') error_terms <- - obj_fun(2 * initial_guess, 0.001, return_terms = TRUE, debug_mode = TRUE) + obj_fun(2 * initial_guess, 0.001, return_terms = TRUE, debug_mode = 'everything') } } diff --git a/tests/testthat/test-objective_function.R b/tests/testthat/test-objective_function.R index 0161f81..19f4969 100644 --- a/tests/testthat/test-objective_function.R +++ b/tests/testthat/test-objective_function.R @@ -73,7 +73,7 @@ test_that('Objective functions can be created and behave as expected', { # Here we intentionally pass a bad value of Catm that will trigger an error error_val <- expect_silent( - obj_fun(as.numeric(within(independent_args, {Catm = -1}))) + obj_fun(as.numeric(within(independent_args, {Catm = -1})), debug_mode = 'none') ) expect_equal(error_val, 2 * BioCroValidation:::FAILURE_VALUE) From 7ee5803dad651db47e873c9acbd2ab9403e70c23 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Thu, 7 Aug 2025 16:46:39 -0500 Subject: [PATCH 10/24] Update NEWS and DESCRIPTION --- DESCRIPTION | 2 +- NEWS.md | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 32421d9..6ea6709 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: BioCroValidation -Version: 0.2.0-0 +Version: 0.2.0-1 Title: Tools for Validating BioCro Models Description: A collection of tools for validating BioCro crop growth models. Authors@R: c( diff --git a/NEWS.md b/NEWS.md index b0ba5f4..e7637bb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -40,6 +40,13 @@ be directly added to this file to describe the related changes. - Allowed user-supplied regularization functions +- Errors that occur while running simulations are now caught so they do not + prevent an optimization from finishing + +- More options for `debug_mode` are now available; the default setting + (`minimal`) only prints info to the terminal when an issue with a simulation + occurs + # Changes in BioCroValidation Version 0.2.0 (2025-05-23) - Added 2002 and 2005 SoyFACE biomass and standard deviation data. From 06561b0c3291d23e7d3480c4efd5d9320f67247a Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Thu, 7 Aug 2025 17:24:40 -0500 Subject: [PATCH 11/24] Update parameterizing_soybean_biocro.Rmd --- vignettes/parameterizing_soybean_biocro.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/parameterizing_soybean_biocro.Rmd b/vignettes/parameterizing_soybean_biocro.Rmd index 651894a..81ede07 100644 --- a/vignettes/parameterizing_soybean_biocro.Rmd +++ b/vignettes/parameterizing_soybean_biocro.Rmd @@ -564,7 +564,7 @@ optim_res <- nmkb( tol = 1e-2, restarts.max = 10 ), - debug_mode = FALSE # Passed to obj_fun; set to TRUE to enable debug mode + debug_mode = 'minimal' # Passed to obj_fun ) ``` From f59009d834e1e69fe607c4d4082df5db70983899 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Fri, 15 Aug 2025 13:14:14 -0500 Subject: [PATCH 12/24] Improve alignment and ordering --- R/write_model.R | 22 +++++++++++++++++----- tests/test_data/test_model.R | 16 ++++++++-------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/R/write_model.R b/R/write_model.R index 2bb652f..a40fdfc 100644 --- a/R/write_model.R +++ b/R/write_model.R @@ -7,6 +7,18 @@ write_model <- function( ode_solver ) { + # Get the longest name among the initial values and parameters + all_names <- c( + names(initial_values), + names(parameters) + ) + + name_width <- max(sapply(all_names, nchar)) + + # Alphabetize the initial values and parameters + initial_values <- initial_values[order(tolower(names(initial_values)))] + parameters <- parameters[order(tolower(names(parameters)))] + # Fill in the module definition template (defined below) model_text <- sprintf( model_definition_template, @@ -18,8 +30,8 @@ write_model <- function( ode_solver[['adaptive_rel_error_tol']], ode_solver[['adaptive_abs_error_tol']], ode_solver[['adaptive_max_steps']], - paste(paste0(' "', names(initial_values), '" = ', initial_values), collapse = ',\n'), - paste(paste0(' "', names(parameters), '" = ', parameters), collapse = ',\n') + paste(paste0(' ', format(names(initial_values), width = name_width), ' = ', initial_values), collapse = ',\n'), + paste(paste0(' ', format(names(parameters), width = name_width), ' = ', parameters), collapse = ',\n') ) } @@ -31,11 +43,11 @@ model_definition_template <- '%s <- list( %s ), ode_solver = list( - type = "%s", - output_step_size = %f, + type = "%s", + output_step_size = %f, adaptive_rel_error_tol = %e, adaptive_abs_error_tol = %e, - adaptive_max_steps = %i + adaptive_max_steps = %i ), initial_values = list( %s diff --git a/tests/test_data/test_model.R b/tests/test_data/test_model.R index 42ba546..076b505 100644 --- a/tests/test_data/test_model.R +++ b/tests/test_data/test_model.R @@ -6,19 +6,19 @@ test_model <- list( "BioCro:harmonic_oscillator" ), ode_solver = list( - type = "boost_rkck54", - output_step_size = 1.000000, + type = "boost_rkck54", + output_step_size = 1.000000, adaptive_rel_error_tol = 1.000000e-04, adaptive_abs_error_tol = 1.000000e-04, - adaptive_max_steps = 200 + adaptive_max_steps = 200 ), initial_values = list( - "position" = 1, - "velocity" = 0 + position = 1, + velocity = 0 ), parameters = list( - "timestep" = 1, - "mass" = 1, - "spring_constant" = 0.5 + mass = 1, + spring_constant = 0.5, + timestep = 1 ) ) From 2ce4127c6d63ac50d6a216774aab73b09a57efd2 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Fri, 15 Aug 2025 13:24:39 -0500 Subject: [PATCH 13/24] Make sure module names are preserved --- R/write_model.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/write_model.R b/R/write_model.R index a40fdfc..be6ea32 100644 --- a/R/write_model.R +++ b/R/write_model.R @@ -19,12 +19,20 @@ write_model <- function( initial_values <- initial_values[order(tolower(names(initial_values)))] parameters <- parameters[order(tolower(names(parameters)))] + # Prepare the module list strings + dir_module_string <- paste(paste0(' ', names(direct_modules), ' = "', direct_modules, '"'), collapse = ',\n') + diff_module_string <- paste(paste0(' ', names(differential_modules), ' = "', differential_modules, '"'), collapse = ',\n') + + # Remove any lines without names + dir_module_string <- gsub(' = ', ' ', dir_module_string) + diff_module_string <- gsub(' = ', ' ', diff_module_string) + # Fill in the module definition template (defined below) model_text <- sprintf( model_definition_template, name, - paste(paste0(' "', direct_modules, '"'), collapse = ',\n'), - paste(paste0(' "', differential_modules, '"'), collapse = ',\n'), + dir_module_string, + diff_module_string, ode_solver[['type']], ode_solver[['output_step_size']], ode_solver[['adaptive_rel_error_tol']], From 5d8830bffdfeb87c29ff2bf72d832fe1e2a4becb Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Fri, 15 Aug 2025 13:28:24 -0500 Subject: [PATCH 14/24] Update NEWS.md --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index e7637bb..404022e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -47,6 +47,10 @@ be directly added to this file to describe the related changes. (`minimal`) only prints info to the terminal when an issue with a simulation occurs +- Improved formatting of output from `write_module`, so that initial value and + parameter lists are alphabetized, equals signs are aligned, and module names + are preserved + # Changes in BioCroValidation Version 0.2.0 (2025-05-23) - Added 2002 and 2005 SoyFACE biomass and standard deviation data. From 69f5b57535a747e35388f134c1b431a16a94b20a Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Fri, 15 Aug 2025 13:34:21 -0500 Subject: [PATCH 15/24] Update write_model.Rd --- man/write_model.Rd | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/man/write_model.Rd b/man/write_model.Rd index 86d98a4..306856d 100644 --- a/man/write_model.Rd +++ b/man/write_model.Rd @@ -62,6 +62,11 @@ output. See examples below. Note that it is customary to name the R script file as \code{name.R}, where \code{name} is the value provided to the function itself. + + The \code{initial_values} and \code{parameters} will be alphabetized + (case-insensitive). The \code{ode_solver} elements will be printed in the + customary BioCro order. The \code{direct_modules} and + \code{differential_modules} will not be reordered. } \value{ @@ -85,7 +90,7 @@ if (require(BioCro)) { if (interactive()) { # Use writeLines to save as a `.R` file - writeLines(out, "./test_soybean_model.h") + writeLines(out, "./test_soybean_model.R") } } } From 10cc54700269dbd0b36280c8f4bda453442684e3 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Thu, 21 Aug 2025 11:24:44 -0500 Subject: [PATCH 16/24] Enable driver-specific initial values and parameters --- NEWS.md | 2 + R/combine_lists.R | 9 ++ R/objective_function.R | 6 +- R/objective_function_helpers.R | 10 +- R/objective_function_input_checks.R | 149 ++++++++++++++++++- man/objective_function.Rd | 21 ++- tests/testthat/test-objective_function.R | 152 ++++++++++++++++++++ vignettes/parameterizing_soybean_biocro.Rmd | 13 +- 8 files changed, 347 insertions(+), 15 deletions(-) create mode 100644 R/combine_lists.R diff --git a/NEWS.md b/NEWS.md index 404022e..c3c5a63 100644 --- a/NEWS.md +++ b/NEWS.md @@ -40,6 +40,8 @@ be directly added to this file to describe the related changes. - Allowed user-supplied regularization functions +- Allowed driver-specific initial values and parameters + - Errors that occur while running simulations are now caught so they do not prevent an optimization from finishing diff --git a/R/combine_lists.R b/R/combine_lists.R new file mode 100644 index 0000000..ff8f5bd --- /dev/null +++ b/R/combine_lists.R @@ -0,0 +1,9 @@ +# Helping function for overwriting elements of a base list with elements of a +# second list; here we assume that both lists have names, and that the names of +# new_list is a subset of the names of base_list +combine_lists <- function(base_list, new_list) { + for (element_name in names(new_list)) { + base_list[[element_name]] <- new_list[[element_name]] + } + base_list +} diff --git a/R/objective_function.R b/R/objective_function.R index c03ee8a..7a93a0a 100644 --- a/R/objective_function.R +++ b/R/objective_function.R @@ -19,7 +19,11 @@ objective_function <- function( ) { # Check the data-driver pairs - check_data_driver_pairs(base_model_definition, data_driver_pairs) + check_data_driver_pairs( + base_model_definition, + data_driver_pairs, + verbose_startup + ) # Check the arguments to be varied check_args_to_vary( diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R index b376388..12d7312 100644 --- a/R/objective_function_helpers.R +++ b/R/objective_function_helpers.R @@ -46,8 +46,14 @@ get_model_runner <- function( # Build the runner tryCatch({ partial_func <- BioCro::partial_run_biocro( - base_model_definition[['initial_values']], - base_model_definition[['parameters']], + combine_lists( + base_model_definition[['initial_values']], + ddp[['initial_values']] + ), + combine_lists( + base_model_definition[['parameters']], + ddp[['parameters']] + ), ddp[['drivers']], base_model_definition[['direct_modules']], base_model_definition[['differential_modules']], diff --git a/R/objective_function_input_checks.R b/R/objective_function_input_checks.R index f8d0afb..1120ab5 100644 --- a/R/objective_function_input_checks.R +++ b/R/objective_function_input_checks.R @@ -4,7 +4,7 @@ # Helping function for checking the data-driver pairs; will throw an error if # a problem is detected, and will otherwise be silent with no return value. -check_data_driver_pairs <- function(base_model_definition, data_driver_pairs) { +check_data_driver_pairs <- function(base_model_definition, data_driver_pairs, verbose) { # There must be at least one data-driver pair if (length(data_driver_pairs) < 1) { stop('`data_driver_pairs` must have at least one element') @@ -37,7 +37,7 @@ check_data_driver_pairs <- function(base_model_definition, data_driver_pairs) { } # Only required or optional elements should be provided - optional_elements <- 'data_stdev' + optional_elements <- c('data_stdev', 'initial_values', 'parameters') acceptable_elements <- c(required_elements, optional_elements) @@ -117,12 +117,132 @@ check_data_driver_pairs <- function(base_model_definition, data_driver_pairs) { stop(msg) } + # When provided, the driver-specific initial values and parameters must be + # lists + iv_is_list <- sapply(data_driver_pairs, function(x) { + iv <- x[['initial_values']] + + if (!is.null(iv)) { + is.list(iv) && !is.null(names(iv)) + } else { + TRUE + } + }) + + if (any(!iv_is_list)) { + stop( + 'When provided, the driver-specific initial values must be a list ', + 'of named elements' + ) + } + + param_is_list <- sapply(data_driver_pairs, function(x) { + param <- x[['parameters']] + + if (!is.null(param)) { + is.list(param) && !is.null(names(param)) + } else { + TRUE + } + }) + + if (any(!param_is_list)) { + stop( + 'When provided, the driver-specific parameters must be a list ', + 'of named elements' + ) + } + + # Names of driver-specific initial values and parameter names must be the + # same for all data-driver pairs + iv_names <- lapply(data_driver_pairs, function(x) { + sort(names(x[['initial_values']])) + }) + + if (length(unique(iv_names)) > 1) { + msg <- paste0( + 'The following driver-specific initial value names were provided ', + 'in the data-driver pairs:\n', + paste( + names(iv_names), ':', + sapply(iv_names, function(x) {paste(x, collapse = ', ')}), + collapse = '\n' + ), + '\nWhen provided, these names must be the same for each set of ', + 'drivers' + ) + + stop(msg) + } + + param_names <- lapply(data_driver_pairs, function(x) { + sort(names(x[['parameters']])) + }) + + if (length(unique(param_names)) > 1) { + msg <- paste0( + 'The following driver-specific parameter names were provided ', + 'in the data-driver pairs:\n', + paste( + names(param_names), ':', + sapply(param_names, function(x) {paste(x, collapse = ', ')}), + collapse = '\n' + ), + '\nWhen provided, these names must be the same for each set of ', + 'drivers' + ) + + stop(msg) + } + + # Each driver-specific initial value and parameter must be included in the + # base model definition + iv_names_to_check <- unique(unlist(iv_names)) + + iv_names_okay <- + iv_names_to_check %in% names(base_model_definition[['initial_values']]) + + if (!all(iv_names_okay)) { + bad_names <- iv_names_to_check[!iv_names_okay] + + msg <- paste( + 'The following driver-specific initial values are not included in', + 'the base model definition:', + paste0('"', bad_names, '"', collapse = ', ') + ) + + stop(msg) + } + + param_names_to_check <- unique(unlist(param_names)) + + param_names_okay <- + param_names_to_check %in% names(base_model_definition[['parameters']]) + + if (!all(param_names_okay)) { + bad_names <- param_names_to_check[!param_names_okay] + + msg <- paste( + 'The following driver-specific parameters are not included in', + 'the base model definition:', + paste0('"', bad_names, '"', collapse = ', ') + ) + + stop(msg) + } + # Each set of drivers must form a valid dynamical system along with the # base model definition valid_definitions <- sapply(data_driver_pairs, function(ddp) { BioCro::validate_dynamical_system_inputs( - base_model_definition[['initial_values']], - base_model_definition[['parameters']], + combine_lists( + base_model_definition[['initial_values']], + ddp[['initial_values']] + ), + combine_lists( + base_model_definition[['parameters']], + ddp[['parameters']] + ), ddp[['drivers']], base_model_definition[['direct_modules']], base_model_definition[['differential_modules']], @@ -141,6 +261,27 @@ check_data_driver_pairs <- function(base_model_definition, data_driver_pairs) { stop(msg) } + # Print driver-specific initial values and parameters, if necessary + if (verbose) { + cat('\nDriver-specific initial values:\n\n') + + if (all(sapply(iv_names, is.null))) { + cat(' None\n') + } else { + iv <- lapply(data_driver_pairs, function(x) {x[['initial_values']]}) + utils::str(iv) + } + + cat('\nDriver-specific parameters:\n\n') + + if (all(sapply(param_names, is.null))) { + cat(' None\n') + } else { + param <- lapply(data_driver_pairs, function(x) {x[['parameters']]}) + utils::str(param) + } + } + return(invisible(NULL)) } diff --git a/man/objective_function.Rd b/man/objective_function.Rd index 21de429..b97461a 100644 --- a/man/objective_function.Rd +++ b/man/objective_function.Rd @@ -55,8 +55,8 @@ \item{data_driver_pairs}{ A list of named elements, where each element is a "data-driver pair." A data-driver pair is a list with three required elements: \code{data}, - \code{drivers}, and \code{weight}. Optionally, it may also have a - \code{data_stdev} element. + \code{drivers}, and \code{weight}. Optionally, it may also have elements + named \code{data_stdev}, \code{initial_values}, and \code{parameters}. The \code{data} element must be a data frame with one column named \code{time}, whose values follow BioCro's definition of @@ -76,6 +76,13 @@ should represent the standard deviation associated with each entry in \code{data}. If \code{data_stdev} is not supplied, all standard deviations will be set to 1. + + The optional \code{initial_values} and \code{parameters} elements must be + named lists of driver-specific initial values and parameters, respectively, + that will overwrite the default values specified in the + \code{base_model_definition}. When driver-specific \code{initial_values} are + provided, they must be provided for each of the data-driver pairs; the same + rule applies for driver-specific \code{parameters}. } \item{independent_args}{ @@ -185,7 +192,10 @@ simulation must be compared to associated sets of observed data. Here, this is handled through the \code{data_driver_pairs}, which allows the user to specify which drivers and data sets should be compared to each - other. + other. Optionally, it is also possible to specify different initial + values or parameters for each set of drivers; for example, the + atmospheric CO2 concentration may need to change for different years, + or soil properties may need to change for different locations. \item \strong{Complicated normalization:} Care must be taken to ensure that certain years or output variables are not over-valued in the error @@ -622,18 +632,21 @@ if (require(BioCro)) { # The data-driver pairs can now be created by associating each data set with # its corresponding weather data. Here we will weight the 2005 data twice as - # heavily as the 2002 data. + # heavily as the 2002 data. Note that we also specify different atmospheric + # CO2 concentrations for each year. data_driver_pairs <- list( ambient_2002 = list( data = process_table(soyface_biomass[['ambient_2002']]), data_stdev = process_table(soyface_biomass[['ambient_2002_std']]), drivers = BioCro::soybean_weather[['2002']], + parameters = list(Catm = with(BioCro::catm_data, {Catm[year == '2002']})), weight = 1 ), ambient_2005 = list( data = process_table(soyface_biomass[['ambient_2005']]), data_stdev = process_table(soyface_biomass[['ambient_2005_std']]), drivers = BioCro::soybean_weather[['2005']], + parameters = list(Catm = with(BioCro::catm_data, {Catm[year == '2005']})), weight = 2 ) ) diff --git a/tests/testthat/test-objective_function.R b/tests/testthat/test-objective_function.R index 19f4969..738abdd 100644 --- a/tests/testthat/test-objective_function.R +++ b/tests/testthat/test-objective_function.R @@ -285,6 +285,158 @@ test_that('Data-driver pairs must have correct elements', { ) }) +test_that('Driver-specific initial values are checked and used', { + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$initial_values = 5}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'When provided, the driver-specific initial values must be a list of named elements' + ) + + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$initial_values = list(5)}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'When provided, the driver-specific initial values must be a list of named elements' + ) + + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$initial_values = list(Leaf = 5)}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The following driver-specific initial value names were provided in the data-driver pairs:\nambient_2002 : Leaf\nambient_2005 : \nWhen provided, these names must be the same for each set of drivers', + fixed = TRUE + ) + + expect_error( + objective_function( + model, + within(ddps, { + ambient_2002$initial_values = list(extra_iv = 5) + ambient_2005$initial_values = list(extra_iv = 7) + }), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The following driver-specific initial values are not included in the base model definition: "extra_iv"' + ) + + expect_error( + objective_function( + model, + within(ddps, { + ambient_2002$initial_values = list(Leaf = 0.1) # This shouldn't cause a problem + ambient_2005$initial_values = list(Leaf = -1) # This should trigger an error in the Ci solver + }), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The model could not be run with the following drivers: +ambient_2005', + fixed = TRUE + ) +}) + +test_that('Driver-specific parameters are checked and used', { + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$parameters = 5}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'When provided, the driver-specific parameters must be a list of named elements' + ) + + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$parameters = list(5)}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'When provided, the driver-specific parameters must be a list of named elements' + ) + + expect_error( + objective_function( + model, + within(ddps, {ambient_2002$parameters = list(Catm = 5)}), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The following driver-specific parameter names were provided in the data-driver pairs:\nambient_2002 : Catm\nambient_2005 : \nWhen provided, these names must be the same for each set of drivers', + fixed = TRUE + ) + + expect_error( + objective_function( + model, + within(ddps, { + ambient_2002$parameters = list(extra_param = 5) + ambient_2005$parameters = list(extra_param = 7) + }), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The following driver-specific parameters are not included in the base model definition: "extra_param"' + ) + + expect_error( + objective_function( + model, + within(ddps, { + ambient_2002$parameters = list(b0 = 0.1) # This shouldn't cause a problem + ambient_2005$parameters = list(b0 = -1) # This should trigger an `hs < 0` error from the Ball-Berry model code + }), + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = verbose_startup + ), + 'The model could not be run with the following drivers: +ambient_2005', + fixed = TRUE + ) +}) + test_that('Data must have a `time` column', { expect_error( objective_function( diff --git a/vignettes/parameterizing_soybean_biocro.Rmd b/vignettes/parameterizing_soybean_biocro.Rmd index 81ede07..3a3612e 100644 --- a/vignettes/parameterizing_soybean_biocro.Rmd +++ b/vignettes/parameterizing_soybean_biocro.Rmd @@ -163,7 +163,8 @@ process_table <- function(data_table, type) { The `BioCro` R package includes weather data for the years in the `soyface_biomass` data set. So now we are ready to define the data-driver pairs, which includes the weather, the observed biomass, the standard deviation of the -observed biomass, and the weight to assign to each year: +observed biomass, the atmospheric CO2 concentration to use for each year, and +the weight to assign to each year: ```{r data_driver_pairs} # Define the data-driver pairs @@ -172,12 +173,14 @@ data_driver_pairs <- list( data = process_table(soyface_biomass[['ambient_2002']], 'biomass'), data_stdev = process_table(soyface_biomass[['ambient_2002_std']], 'stdev'), drivers = BioCro::soybean_weather[['2002']], + parameters = list(Catm = with(BioCro::catm_data, {Catm[year == '2002']})), weight = 1 ), ambient_2005 = list( data = process_table(soyface_biomass[['ambient_2005']], 'biomass'), data_stdev = process_table(soyface_biomass[['ambient_2005_std']], 'stdev'), drivers = BioCro::soybean_weather[['2005']], + parameters = list(Catm = with(BioCro::catm_data, {Catm[year == '2005']})), weight = 1 ) ) @@ -630,9 +633,11 @@ guess. In this example, `r sum(ind_arg_table[['improved']])` out of (`r round(100 * sum(ind_arg_table[['improved']]) / nrow(ind_arg_table))` percent). We can also compare the error metric to its original value. As shown above, it -is now `r optim_res[['value']]`, which is only -`r round(100 * (optim_res[['value']] - default_error) / optim_res[['value']], 1)` -percent larger than with the default parameter values. +is now `r optim_res[['value']]`, which is +`r round(100 * abs(optim_res[['value']] - default_error) / optim_res[['value']], 1)` +percent +`r if (optim_res[['value']] > default_error) {'larger'} else {'smaller'}` +than with the default parameter values. The optimized parameter values could likely be improved by using a more stringent tolerance for the optimizer, but this would require more time to run. From 44f4c801ba28f2d8fc4cfaa47de87619d8f7e4b9 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Thu, 21 Aug 2025 11:56:29 -0500 Subject: [PATCH 17/24] Fix verbose mode bug --- NEWS.md | 3 +++ R/objective_function_input_checks.R | 7 ++++++- tests/testthat/test-objective_function.R | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index c3c5a63..2394a7d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -53,6 +53,9 @@ be directly added to this file to describe the related changes. parameter lists are alphabetized, equals signs are aligned, and module names are preserved +- Fixed a bug where calling `objective_function` with + `dependent_arg_function = NULL` and `verbose_mode = TRUE` caused an error + # Changes in BioCroValidation Version 0.2.0 (2025-05-23) - Added 2002 and 2005 SoyFACE biomass and standard deviation data. diff --git a/R/objective_function_input_checks.R b/R/objective_function_input_checks.R index 1120ab5..ebaa5ac 100644 --- a/R/objective_function_input_checks.R +++ b/R/objective_function_input_checks.R @@ -316,7 +316,12 @@ check_args_to_vary <- function( utils::str(independent_args) cat('\nThe dependent arguments and their initial values:\n\n') - utils::str(dependent_arg_function(independent_args)) + + if (is.null(dependent_arg_function)) { + cat('\nNone.\n') + } else { + utils::str(dependent_arg_function(independent_args)) + } } # Make sure no drivers were specified diff --git a/tests/testthat/test-objective_function.R b/tests/testthat/test-objective_function.R index 738abdd..69f960a 100644 --- a/tests/testthat/test-objective_function.R +++ b/tests/testthat/test-objective_function.R @@ -91,6 +91,24 @@ test_that('Objective functions can be created and behave as expected', { ) ) + # Make sure there are no errors for one data-driver pair, no dependent + # arguments in verbose mode + sink(tempfile()) + + expect_no_error( + objective_function( + model, + ddps[1], + independent_args, + quantity_weights, + data_definitions = data_definitions, + post_process_function = post_process_function, + verbose_startup = TRUE + ) + ) + + sink() + # Two data-driver pairs, with dependent arguments and L2 regularization obj_fun <- expect_silent( objective_function( From 9c021c0df03d6e63e41b5d2362860116073439db Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Thu, 21 Aug 2025 12:00:40 -0500 Subject: [PATCH 18/24] Make printout consistent with others --- R/objective_function_input_checks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/objective_function_input_checks.R b/R/objective_function_input_checks.R index ebaa5ac..1736c27 100644 --- a/R/objective_function_input_checks.R +++ b/R/objective_function_input_checks.R @@ -318,7 +318,7 @@ check_args_to_vary <- function( cat('\nThe dependent arguments and their initial values:\n\n') if (is.null(dependent_arg_function)) { - cat('\nNone.\n') + cat(' None\n') } else { utils::str(dependent_arg_function(independent_args)) } From f5d0e9036d6c4b6bf09c8944fc6526c9db701db3 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Thu, 21 Aug 2025 12:22:16 -0500 Subject: [PATCH 19/24] Fix bad tests I was developing these tests using BioCro version 3.2.0-3, but the online checks are using version 3.2.0, which produces different error messages. So I changed some tests so they don't check the content of error messages, which may differ with different BioCro versions. --- tests/testthat/test-objective_function.R | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test-objective_function.R b/tests/testthat/test-objective_function.R index 69f960a..cf896ed 100644 --- a/tests/testthat/test-objective_function.R +++ b/tests/testthat/test-objective_function.R @@ -360,22 +360,22 @@ test_that('Driver-specific initial values are checked and used', { 'The following driver-specific initial values are not included in the base model definition: "extra_iv"' ) + # Here we specify a "bad" initial value of Leaf for 2005. This will trigger + # an error, but the particular error may depend on the particular version of + # BioCro being used, so we don't check the error message content. expect_error( objective_function( model, within(ddps, { ambient_2002$initial_values = list(Leaf = 0.1) # This shouldn't cause a problem - ambient_2005$initial_values = list(Leaf = -1) # This should trigger an error in the Ci solver + ambient_2005$initial_values = list(Leaf = -1) # This should trigger an error }), independent_args, quantity_weights, data_definitions = data_definitions, post_process_function = post_process_function, verbose_startup = verbose_startup - ), - 'The model could not be run with the following drivers: -ambient_2005', - fixed = TRUE + ) ) }) @@ -436,22 +436,23 @@ test_that('Driver-specific parameters are checked and used', { 'The following driver-specific parameters are not included in the base model definition: "extra_param"' ) + # Here we specify a "bad" value of the Ball-Berry intercept for 2005. This + # will trigger an error, but the particular error may depend on the + # particular version of BioCro being used, so we don't check the error + # message content. expect_error( objective_function( model, within(ddps, { ambient_2002$parameters = list(b0 = 0.1) # This shouldn't cause a problem - ambient_2005$parameters = list(b0 = -1) # This should trigger an `hs < 0` error from the Ball-Berry model code + ambient_2005$parameters = list(b0 = -1) # This should trigger an error }), independent_args, quantity_weights, data_definitions = data_definitions, post_process_function = post_process_function, verbose_startup = verbose_startup - ), - 'The model could not be run with the following drivers: -ambient_2005', - fixed = TRUE + ) ) }) From e6c9eae91967447800824f0979e8293e8df879e7 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Fri, 5 Sep 2025 09:01:59 -0500 Subject: [PATCH 20/24] Use `modifyList` --- R/combine_lists.R | 9 --------- R/objective_function_helpers.R | 8 ++++---- R/objective_function_input_checks.R | 8 ++++---- 3 files changed, 8 insertions(+), 17 deletions(-) delete mode 100644 R/combine_lists.R diff --git a/R/combine_lists.R b/R/combine_lists.R deleted file mode 100644 index ff8f5bd..0000000 --- a/R/combine_lists.R +++ /dev/null @@ -1,9 +0,0 @@ -# Helping function for overwriting elements of a base list with elements of a -# second list; here we assume that both lists have names, and that the names of -# new_list is a subset of the names of base_list -combine_lists <- function(base_list, new_list) { - for (element_name in names(new_list)) { - base_list[[element_name]] <- new_list[[element_name]] - } - base_list -} diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R index 12d7312..45d6bfc 100644 --- a/R/objective_function_helpers.R +++ b/R/objective_function_helpers.R @@ -46,13 +46,13 @@ get_model_runner <- function( # Build the runner tryCatch({ partial_func <- BioCro::partial_run_biocro( - combine_lists( + utils::modifyList( base_model_definition[['initial_values']], - ddp[['initial_values']] + c(list(), ddp[['initial_values']]) ), - combine_lists( + utils::modifyList( base_model_definition[['parameters']], - ddp[['parameters']] + c(list(), ddp[['parameters']]) ), ddp[['drivers']], base_model_definition[['direct_modules']], diff --git a/R/objective_function_input_checks.R b/R/objective_function_input_checks.R index 1736c27..4d9de7b 100644 --- a/R/objective_function_input_checks.R +++ b/R/objective_function_input_checks.R @@ -235,13 +235,13 @@ check_data_driver_pairs <- function(base_model_definition, data_driver_pairs, ve # base model definition valid_definitions <- sapply(data_driver_pairs, function(ddp) { BioCro::validate_dynamical_system_inputs( - combine_lists( + utils::modifyList( base_model_definition[['initial_values']], - ddp[['initial_values']] + c(list(), ddp[['initial_values']]) ), - combine_lists( + utils::modifyList( base_model_definition[['parameters']], - ddp[['parameters']] + c(list(), ddp[['parameters']]) ), ddp[['drivers']], base_model_definition[['direct_modules']], From eedc56524da412adb74ab87ad06a93d737d3f904 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Fri, 5 Sep 2025 09:03:38 -0500 Subject: [PATCH 21/24] Update links --- DESCRIPTION | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6ea6709..4564a0e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,5 +23,5 @@ VignetteBuilder: knitr License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -URL: https://github.com/BioCro/BioCroValidation, https://biocro.github.io/BioCroValidation/ +URL: https://github.com/BioCro/BioCroValidation, https://biocro.org/BioCroValidation/ Config/testthat/edition: 3 diff --git a/README.md b/README.md index f6eeee0..42a223e 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ from within R by typing `install.packages('remotes')`. ### Usage The best way to learn about using `BioCroValidation` is to visit the -[BioCroValidation website](https://biocro.github.io/BioCroValidation/index.html) +[BioCroValidation website](https://biocro.org/BioCroValidation/index.html) and click the "Get Started" link in the top menu bar. The website includes documentation for all the functions and data sets included in the package, as well as articles that describe its general features and several important use From ce1af0ca978436df92d3d654d913126aad5a7f93 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Wed, 11 Mar 2026 15:37:58 -0700 Subject: [PATCH 22/24] Increment version --- DESCRIPTION | 2 +- NEWS.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4564a0e..949a286 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: BioCroValidation -Version: 0.2.0-1 +Version: 0.3.0 Title: Tools for Validating BioCro Models Description: A collection of tools for validating BioCro crop growth models. Authors@R: c( diff --git a/NEWS.md b/NEWS.md index 2394a7d..f10903e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -33,7 +33,7 @@ In the case of a hotfix, a short section headed by the new release number should be directly added to this file to describe the related changes. --> -# UNRELEASED +# Changes in BioCroValidation Version 0.3.0 (2026-03-11) - Fixed typos in the help page for `objective_function`, and in the `add_norm` function (defined in `R/objective_function_helpers.R`) From ea7abffc48c66de346aca465597e0eb91bba61fc Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Wed, 11 Mar 2026 15:44:20 -0700 Subject: [PATCH 23/24] Update DESCRIPTION --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 949a286..7eee250 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,7 @@ Authors@R: c( person("BioCroValidation authors", role = "cph") ) Depends: - R (>= 3.6.0) + R (>= 4.1.0) Imports: BioCro (>= 3.2.0) Suggests: @@ -18,7 +18,7 @@ Suggests: lattice, knitr, rmarkdown, - testthat (>= 3.0.0) + testthat (>= 3.2.0) VignetteBuilder: knitr License: MIT + file LICENSE Encoding: UTF-8 From 9b04b1401bfad3b4893cfa309e48bd87047b3d80 Mon Sep 17 00:00:00 2001 From: eloch216 <48919455+eloch216@users.noreply.github.com> Date: Wed, 11 Mar 2026 15:46:28 -0700 Subject: [PATCH 24/24] Update R-CMD-check.yaml --- .github/workflows/R-CMD-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index acfefe6..69208fb 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -25,7 +25,7 @@ jobs: - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - {os: ubuntu-latest, r: 'oldrel-1'} - - {os: ubuntu-latest, r: '3.6.0'} + - {os: ubuntu-latest, r: '4.1.0'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}