biocro · eloch216 · May 23, 2025 · May 23, 2025 · May 23, 2025 · May 23, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -14,6 +14,7 @@ Imports:
     BioCro (>= 3.2.0)
 Suggests:
     dfoptim,
+    DEoptim,
     lattice,
     knitr,
     rmarkdown,

diff --git a/NEWS.md b/NEWS.md
@@ -38,14 +38,17 @@ be directly added to this file to describe the related changes.
 - Added 2002 and 2005 SoyFACE biomass data.
 
 - Added several new functions: `objective_function`, `update_model`, and
-  `bounds_table`
+  `bounds_table`.
 
-- Added a vignette illustrating how to perform a model parameterization.
+- Added two new vignettes: a "Getting Started" article (`BioCroValidation.Rmd`)
+  and a user guide illustrating how to perform a model parameterization
+  (`parameterizing_soybean_biocro.Rmd`).
 
 # Changes in BioCroValidation Version 0.1.0
 
 - This is the first version of BioCroValidation. At this point, the package is
   in a state of rapid development, and not all changes will be described here.
+
 - We are reserving version `1.0.0` for a more stable and complete future
   release; until then, major changes should only increase the minor version
   number.
diff --git a/R/objective_function.R b/R/objective_function.R
@@ -8,7 +8,9 @@ objective_function <- function(
     quantity_weights,
     data_definitions = list(),
     normalization_method = 'mean_max',
+    normalization_param = NULL,
     stdev_weight_method = 'equal',
+    stdev_weight_param = NULL,
     regularization_method = 'none',
     dependent_arg_function = NULL,
     post_process_function = NULL,
@@ -73,13 +75,15 @@ objective_function <- function(
     long_form_data <- add_norm(
         long_form_data,
         normalization_method,
+        normalization_param,
         length(data_driver_pairs)
     )
 
     # Add variance-based weights
     long_form_data <- add_w_var(
         long_form_data,
-        stdev_weight_method
+        stdev_weight_method,
+        stdev_weight_param
     )
 
     # Print the long form data, if desired. Do this before checking the data,

diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R
@@ -174,8 +174,23 @@ add_time_indices <- function(initial_runner_res, long_form_data) {
     long_form_data
 }
 
+# Helping function for getting a parameter value that has a default
+get_param_default <- function(param, default) {
+    if (is.null(param)) {
+        default
+    } else {
+        param
+    }
+}
+
 # Helping function for getting normalization factors
-add_norm <- function(long_form_data, normalization_method, n_ddp) {
+add_norm <- function(
+    long_form_data,
+    normalization_method,
+    normalization_param,
+    n_ddp
+)
+{
     for (i in seq_along(long_form_data)) {
         data_table <- long_form_data[[i]]
 
@@ -191,10 +206,17 @@ add_norm <- function(long_form_data, normalization_method, n_ddp) {
                 nrow(qname_subset) * n_ddp
             } else if (tolower(normalization_method) == 'max') {
                 max(qname_subset[['quantity_value']])^2
+            } else if (tolower(normalization_method) == 'obs') {
+                eps <- get_param_default(normalization_param, 1e-1)
+                data_table[i, 'quantity_value']^2 + eps
             } else if (tolower(normalization_method) == 'mean_max') {
                 npts <- nrow(qname_subset)
                 qmax <- max(qname_subset[['quantity_value']])
                 npts * n_ddp * qmax^2
+            } else if (tolower(normalization_method) == 'mean_obs') {
+                npts <- nrow(qname_subset)
+                eps <- get_param_default(normalization_param, 1e-1)
+                npts * n_ddp * (data_table[i, 'quantity_value']^2 + eps)
             } else {
                 stop('Unsupported normalization_method: ', normalization_method)
             }
@@ -207,7 +229,7 @@ add_norm <- function(long_form_data, normalization_method, n_ddp) {
 }
 
 # Helping function for getting variance-based weights
-add_w_var <- function(long_form_data, stdev_weight_method) {
+add_w_var <- function(long_form_data, stdev_weight_method, stdev_weight_param) {
     for (i in seq_along(long_form_data)) {
         data_table <- long_form_data[[i]]
         data_stdev <- data_table[['quantity_stdev']]
@@ -216,9 +238,11 @@ add_w_var <- function(long_form_data, stdev_weight_method) {
             if (tolower(stdev_weight_method) == 'equal') {
                 1.0
             } else if (tolower(stdev_weight_method) == 'logarithm') {
-                log(1 / (data_stdev + 1e-5))
+                eps <- get_param_default(stdev_weight_param, 1e-5)
+                log(1 / (data_stdev + eps))
             } else if (tolower(stdev_weight_method) == 'inverse') {
-                1 / data_stdev^2
+                eps <- get_param_default(stdev_weight_param, 1e-1)
+                1 / (data_stdev^2 + eps)
             } else {
                 stop('Unsupported stdev_weight_method: ', stdev_weight_method)
             }

diff --git a/README.md b/README.md
@@ -22,6 +22,15 @@ remotes::install_github('BioCro/BioCroValidation')
 Note that this method requires the `remotes` package, which can be installed
 from within R by typing `install.packages('remotes')`.
 
+### Usage
+
+The best way to learn about using `BioCroValidation` is to visit the
+[BioCroValidation website](https://biocro.github.io/BioCroValidation/index.html)
+and click the "Get Started" link in the top menu bar. The website includes
+documentation for all the functions and data sets included in the package, as
+well as articles that describe its general features and several important use
+cases.
+
 ### License
 
 The `BioCroValidation` R package and its documentation are licensed under the

diff --git a/man/bounds_table.Rd b/man/bounds_table.Rd
@@ -56,6 +56,10 @@
   bounds; an error will be thrown if any do lie outside the bounds. A warning
   will also be thrown if any initial guesses lie on the bounds, since this can
   be problematic for some optimizers, such as \code{\link[dfoptim]{nmkb}}.
+
+  Some optimizers, such as \code{\link[DEoptim]{DEoptim}}, do not require an
+  initial guess; in this case, there is no strong need to pass an initial guess
+  to \code{bounds_table}.
 }
 
 \value{

diff --git a/man/objective_function.Rd b/man/objective_function.Rd
@@ -22,6 +22,9 @@
   It is also possible to include "dependent" model arguments, whose values are
   determined from the "independent" model arguments that are varied during the
   parameterization procedure.
+
+  For a detailed example of using \code{objective_function}, see the
+  "Parameterizing Soybean-BioCro" vignette/article.
 }
 
 \usage{
@@ -32,7 +35,9 @@
     quantity_weights,
     data_definitions = list(),
     normalization_method = 'mean_max',
+    normalization_param = NULL,
     stdev_weight_method = 'equal',
+    stdev_weight_param = NULL,
     regularization_method = 'none',
     dependent_arg_function = NULL,
     post_process_function = NULL,
@@ -57,7 +62,8 @@
     \code{time}, whose values follow BioCro's definition of
     \code{\link[BioCro]{time}}; the other columns should represent observed
     values of model outputs. Any \code{NA} values in \code{data} will
-    be ignored when calculating the error metric.
+    be ignored when calculating the error metric, but all non-\code{NA} values
+    of all columns (except \code{time}) will be compared to the model output.
 
     The \code{drivers} element must be a data frame that can be passed to
     \code{\link[BioCro]{run_biocro}} as its \code{drivers} input argument.
@@ -75,8 +81,8 @@
   \item{independent_args}{
     A list of named numeric values. The names will determine the independent
     arguments to be varied during their optimization, and the values specify
-    "test" values of each argument that will be used internally to check that
-    the objective function is properly defined and can be evaluated.
+    "initial" or "test" values of each argument that will be used internally to
+    check that the objective function is properly defined and can be evaluated.
   }
 
   \item{quantity_weights}{
@@ -101,11 +107,25 @@
     error metric; see below for more details.
   }
 
+  \item{normalization_param}{
+    An (optional) parameter value used by some normalization methods. When
+    \code{normalization_param} is \code{NULL}, a default value will be used,
+    which depends on the particular normalization method. Otherwise, the
+    user-specified value will be used. See details below.
+  }
+
   \item{stdev_weight_method}{
     A string indicating the method to be used when calculating the
     variance-based weights used in the error metric; see below for more details.
   }
 
+  \item{stdev_weight_param}{
+    An (optional) parameter value used by some normalization methods. When
+    \code{stdev_weight_param} is \code{NULL}, a default value will be used,
+    which depends on the particular normalization method. Otherwise, the
+    user-specified value will be used. See details below.
+  }
+
   \item{regularization_method}{
     A string indicating the regularization method to be used when calculating
     the regularization penalty term; see below for more details.
@@ -327,18 +347,33 @@
           as
 
           \deqn{w_i^{stdev} =%
-          ln \left( \frac{1}{\sigma + 10^{-5}} \right),}
-          where \eqn{ln} denotes a logarithm with base \eqn{e}. This method
-          was used in the original Soybean-BioCro paper. Note: this method
-          should be used with caution, because \eqn{w_i^{stdev}} is zero
-          for \eqn{\sigma = 1 - 10^{-5} = 0.99999}, and it becomes negative for
-          larger values of \eqn{\sigma}.
+          ln \left( \frac{1}{\sigma_i + \epsilon} \right),}
+          where \eqn{ln} denotes a logarithm with base \eqn{e} and
+          \eqn{\epsilon} is a small number included to prevent numerical errors
+          that would otherwise occur when \eqn{\sigma = 0}. This method was
+          used in the original Soybean-BioCro paper.
+
+          The value of \eqn{\epsilon} is specified by the
+          \code{stdev_weight_param} input argument, which defaults to
+          \code{1e-5} when \code{stdev_weight_param} is \code{NULL} when using
+          this method. With the default value of \eqn{\epsilon},
+          \eqn{w_i^{stdev} \approx 11.512} when \eqn{\sigma = 0}.
+
+          Note: this method should be used with caution, because
+          \eqn{w_i^{stdev}} is zero for \eqn{\sigma_i = 1 - \epsilon}, and it
+          becomes negative for larger values of \eqn{\sigma_i}.
 
     \item \code{'inverse'}: For this method, \eqn{w_i^{stdev}} is calculated as
 
-          \deqn{w_i^{stdev} = \frac{1}{\sigma^2}.}
-          Note: this method should be used with caution, because
-          \eqn{w_i^{stdev}} is infinite when \eqn{\sigma} is zero.
+          \deqn{w_i^{stdev} = \frac{1}{\sigma_i^2 + \epsilon},}
+          where \eqn{\epsilon} is a small number included to prevent numerical
+          errors that would otherwise occur when \eqn{\sigma_i = 0}.
+
+          The value of \eqn{\epsilon} is specified by the
+          \code{stdev_weight_param} input argument, which defaults to
+          \code{1e-1} when \code{stdev_weight_param} is \code{NULL} when using
+          this method. With the default value of \eqn{\epsilon},
+          \eqn{w_i^{stdev} = 10} when \eqn{\sigma_i = 0}.
   }
 
   If any values of \eqn{w_i^{stdev}} are undefined, negative, or infinite, an
@@ -377,17 +412,51 @@
           method. This approach avoids over-representing variable types with
           larger magnitude when determining \eqn{E_{agreement}}.
 
+    \item \code{'obs'}: For this method,
+
+          \deqn{N_i = \left( Y_{obs}^i \right)^2 + \epsilon,}
+          where \eqn{\epsilon} is a small number included to prevent numerical
+          errors that would otherwise occur when \eqn{Y_{obs}^i = 0}. In this
+          case, the equation for \eqn{E_{agreement}} essentially uses relative
+          differences rather than absolute differences, which is achieved by
+          normalizing by the observed values, hence the name. This approach
+          avoids over-representing time points where a particular quantity takes
+          its largest values when determining \eqn{E_{agreement}}.
+
+          The value of \eqn{\epsilon} is specified by the
+          \code{normalization_param} input argument, which defaults to
+          \code{1e-1} when \code{normalization_param} is \code{NULL} when using
+          this method. With the default value of \eqn{\epsilon}, \eqn{N_i = 10}
+          when \eqn{Y_{obs}^i = 0}.
+
     \item \code{'mean_max'}: For this method, the "mean" and "max" methods are
           combined so that
 
           \deqn{N_i = n_{vtype}^{vdrivers}%
           \cdot n_{data} \cdot \left( max_{vtype}^{vdata} \right)^2.}
           This approach avoids over-representing drivers with larger numbers of
           associated observations, and variable types with larger magnitudes.
-          This method is used for parameterizing Soybean-BioCro, and is
-          recommended for most situations.
+          This method is used for parameterizing Soybean-BioCro.
+
+    \item \code{'mean_obs'}: For this method, the "mean" and "obs" methods are
+          combined so that
+
+          \deqn{N_i = n_{vtype}^{vdrivers} \cdot n_{data}%
+          \cdot \left( \left( Y_{obs}^i \right)^2 + \epsilon \right)^2.}
+          This approach avoids over-representing drivers with larger numbers of
+          associated observations, and time points with larger observed values.
+
+          The value of \eqn{\epsilon} is specified by the
+          \code{normalization_param} input argument, which defaults to
+          \code{1e-1} when \code{normalization_param} is \code{NULL} when using
+          this method. With the default value of \eqn{\epsilon},
+          \eqn{N_i = 10 \cdot n_{vtype}^{vdrivers} \cdot n_{data}} when
+          \eqn{Y_{obs}^i = 0}.
   }
 
+  In most situations, it is recommended to use either \code{'mean_max'} or
+  \code{'mean_obs'} depending on user preference or performance.
+
   \strong{Regularization methods}
 
   The following regularization methods are available:
@@ -437,21 +506,22 @@
           \code{post_process_function}.
 
     \item Ensuring that each independent and dependent argument name is either
-          a parameter or initial value of the model. This is accomplished using
-          \code{\link[BioCro]{partial_run_biocro}}. Note that argument names
-          passed to \code{partial_run_biocro} can technically include drivers,
-          but it is unlikely that the value of a driver would be varied during
-          an optimization, so the argument names are not allowed to include
+          a parameter or initial value of the model. Internally, argument names
+          are passed to \code{\link[BioCro]{partial_run_biocro}} via its
+          \code{arg_names} input. Note that argument names passed to
+          \code{partial_run_biocro} can technically include drivers, but it is
+          unlikely that the value of a driver would be varied during an
+          optimization, so the argument names are not allowed to include
           columns in the drivers.
 
     \item Ensuring that the optional \code{dependent_arg_function},
           \code{post_process_function}, and \code{extra_penalty_function}
           functions can be run without causing errors.
 
     \item Ensuring that certain values are finite (such as \eqn{Y_{obs}},
-          \eqn{\sigma}, \eqn{w_i^{stdev}}, and \eqn{N}), and that certain values
-          are not negative (such as \eqn{\sigma}, \eqn{w_i^{stdev}}, and
-          \eqn{N}).
+          \eqn{\sigma_i}, \eqn{w_i^{stdev}}, and \eqn{N_i}), and that certain
+          values are not negative (such as \eqn{\sigma_i}, \eqn{w_i^{stdev}},
+          and \eqn{N_i}).
   }
 
   If any issues are detected, an informative error message will be sent. Note

diff --git a/tests/testthat/test-objective_function.R b/tests/testthat/test-objective_function.R
@@ -473,6 +473,7 @@ test_that('Bad data values and weights are detected', {
             independent_args,
             quantity_weights,
             stdev_weight_method = 'inverse',
+            stdev_weight_param = 0,
             data_definitions = data_definitions,
             post_process_function = post_process_function,
             verbose_startup = verbose_startup