From 382678f81e0cbce7528617773ff5d4934e8c3976 Mon Sep 17 00:00:00 2001
From: eloch216 <48919455+eloch216@users.noreply.github.com>
Date: Fri, 23 May 2025 09:59:31 -0500
Subject: [PATCH 1/3] Add more info about `bounds_table`

---
 DESCRIPTION         | 1 +
 man/bounds_table.Rd | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 720c39c..78e5225 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -14,6 +14,7 @@ Imports:
     BioCro (>= 3.2.0)
 Suggests:
     dfoptim,
+    DEoptim,
     lattice,
     knitr,
     rmarkdown,
diff --git a/man/bounds_table.Rd b/man/bounds_table.Rd
index b833c59..45cd68a 100644
--- a/man/bounds_table.Rd
+++ b/man/bounds_table.Rd
@@ -56,6 +56,10 @@
   bounds; an error will be thrown if any do lie outside the bounds. A warning
   will also be thrown if any initial guesses lie on the bounds, since this can
   be problematic for some optimizers, such as \code{\link[dfoptim]{nmkb}}.
+
+  Some optimizers, such as \code{\link[DEoptim]{DEoptim}}, do not require an
+  initial guess; in this case, there is no strong need to pass an initial guess
+  to \code{bounds_table}.
 }
 
 \value{

From 43a0e74baaf2a2dc2322842517554b94ac735c7e Mon Sep 17 00:00:00 2001
From: eloch216 <48919455+eloch216@users.noreply.github.com>
Date: Fri, 23 May 2025 09:59:57 -0500
Subject: [PATCH 2/3] Allow flexible `epsilon`

---
 R/objective_function.R                      |   6 +-
 R/objective_function_helpers.R              |  32 +++++-
 man/objective_function.Rd                   | 114 ++++++++++++++++----
 tests/testthat/test-objective_function.R    |   1 +
 vignettes/parameterizing_soybean_biocro.Rmd |  28 +++--
 5 files changed, 139 insertions(+), 42 deletions(-)

diff --git a/R/objective_function.R b/R/objective_function.R
index 1730e29..6c21e00 100644
--- a/R/objective_function.R
+++ b/R/objective_function.R
@@ -8,7 +8,9 @@ objective_function <- function(
     quantity_weights,
     data_definitions = list(),
     normalization_method = 'mean_max',
+    normalization_param = NULL,
     stdev_weight_method = 'equal',
+    stdev_weight_param = NULL,
     regularization_method = 'none',
     dependent_arg_function = NULL,
     post_process_function = NULL,
@@ -73,13 +75,15 @@ objective_function <- function(
     long_form_data <- add_norm(
         long_form_data,
         normalization_method,
+        normalization_param,
         length(data_driver_pairs)
     )
 
     # Add variance-based weights
     long_form_data <- add_w_var(
         long_form_data,
-        stdev_weight_method
+        stdev_weight_method,
+        stdev_weight_param
     )
 
     # Print the long form data, if desired. Do this before checking the data,
diff --git a/R/objective_function_helpers.R b/R/objective_function_helpers.R
index ce32bef..ae5ef09 100644
--- a/R/objective_function_helpers.R
+++ b/R/objective_function_helpers.R
@@ -174,8 +174,23 @@ add_time_indices <- function(initial_runner_res, long_form_data) {
     long_form_data
 }
 
+# Helping function for getting a parameter value that has a default
+get_param_default <- function(param, default) {
+    if (is.null(param)) {
+        default
+    } else {
+        param
+    }
+}
+
 # Helping function for getting normalization factors
-add_norm <- function(long_form_data, normalization_method, n_ddp) {
+add_norm <- function(
+    long_form_data,
+    normalization_method,
+    normalization_param,
+    n_ddp
+)
+{
     for (i in seq_along(long_form_data)) {
         data_table <- long_form_data[[i]]
 
@@ -191,10 +206,17 @@ add_norm <- function(long_form_data, normalization_method, n_ddp) {
                 nrow(qname_subset) * n_ddp
             } else if (tolower(normalization_method) == 'max') {
                 max(qname_subset[['quantity_value']])^2
+            } else if (tolower(normalization_method) == 'obs') {
+                eps <- get_param_default(normalization_param, 1e-1)
+                data_table[i, 'quantity_value']^2 + eps
             } else if (tolower(normalization_method) == 'mean_max') {
                 npts <- nrow(qname_subset)
                 qmax <- max(qname_subset[['quantity_value']])
                 npts * n_ddp * qmax^2
+            } else if (tolower(normalization_method) == 'mean_obs') {
+                npts <- nrow(qname_subset)
+                eps <- get_param_default(normalization_param, 1e-1)
+                npts * n_ddp * (data_table[i, 'quantity_value']^2 + eps)
             } else {
                 stop('Unsupported normalization_method: ', normalization_method)
             }
@@ -207,7 +229,7 @@ add_norm <- function(long_form_data, normalization_method, n_ddp) {
 }
 
 # Helping function for getting variance-based weights
-add_w_var <- function(long_form_data, stdev_weight_method) {
+add_w_var <- function(long_form_data, stdev_weight_method, stdev_weight_param) {
     for (i in seq_along(long_form_data)) {
         data_table <- long_form_data[[i]]
         data_stdev <- data_table[['quantity_stdev']]
@@ -216,9 +238,11 @@ add_w_var <- function(long_form_data, stdev_weight_method) {
             if (tolower(stdev_weight_method) == 'equal') {
                 1.0
             } else if (tolower(stdev_weight_method) == 'logarithm') {
-                log(1 / (data_stdev + 1e-5))
+                eps <- get_param_default(stdev_weight_param, 1e-5)
+                log(1 / (data_stdev + eps))
             } else if (tolower(stdev_weight_method) == 'inverse') {
-                1 / data_stdev^2
+                eps <- get_param_default(stdev_weight_param, 1e-1)
+                1 / (data_stdev^2 + eps)
             } else {
                 stop('Unsupported stdev_weight_method: ', stdev_weight_method)
             }
diff --git a/man/objective_function.Rd b/man/objective_function.Rd
index 4693d9c..2cb1e5a 100644
--- a/man/objective_function.Rd
+++ b/man/objective_function.Rd
@@ -22,6 +22,9 @@
   It is also possible to include "dependent" model arguments, whose values are
   determined from the "independent" model arguments that are varied during the
   parameterization procedure.
+
+  For a detailed example of using \code{objective_function}, see the
+  "Parameterizing Soybean-BioCro" vignette/article.
 }
 
 \usage{
@@ -32,7 +35,9 @@
     quantity_weights,
     data_definitions = list(),
     normalization_method = 'mean_max',
+    normalization_param = NULL,
     stdev_weight_method = 'equal',
+    stdev_weight_param = NULL,
     regularization_method = 'none',
     dependent_arg_function = NULL,
     post_process_function = NULL,
@@ -57,7 +62,8 @@
     \code{time}, whose values follow BioCro's definition of
     \code{\link[BioCro]{time}}; the other columns should represent observed
     values of model outputs. Any \code{NA} values in \code{data} will
-    be ignored when calculating the error metric.
+    be ignored when calculating the error metric, but all non-\code{NA} values
+    of all columns (except \code{time}) will be compared to the model output.
 
     The \code{drivers} element must be a data frame that can be passed to
     \code{\link[BioCro]{run_biocro}} as its \code{drivers} input argument.
@@ -75,8 +81,8 @@
   \item{independent_args}{
     A list of named numeric values. The names will determine the independent
     arguments to be varied during their optimization, and the values specify
-    "test" values of each argument that will be used internally to check that
-    the objective function is properly defined and can be evaluated.
+    "initial" or "test" values of each argument that will be used internally to
+    check that the objective function is properly defined and can be evaluated.
   }
 
   \item{quantity_weights}{
@@ -101,11 +107,25 @@
     error metric; see below for more details.
   }
 
+  \item{normalization_param}{
+    An (optional) parameter value used by some normalization methods. When
+    \code{normalization_param} is \code{NULL}, a default value will be used,
+    which depends on the particular normalization method. Otherwise, the
+    user-specified value will be used. See details below.
+  }
+
   \item{stdev_weight_method}{
     A string indicating the method to be used when calculating the
     variance-based weights used in the error metric; see below for more details.
   }
 
+  \item{stdev_weight_param}{
+    An (optional) parameter value used by some normalization methods. When
+    \code{stdev_weight_param} is \code{NULL}, a default value will be used,
+    which depends on the particular normalization method. Otherwise, the
+    user-specified value will be used. See details below.
+  }
+
   \item{regularization_method}{
     A string indicating the regularization method to be used when calculating
     the regularization penalty term; see below for more details.
@@ -327,18 +347,33 @@
           as
 
           \deqn{w_i^{stdev} =%
-          ln \left( \frac{1}{\sigma + 10^{-5}} \right),}
-          where \eqn{ln} denotes a logarithm with base \eqn{e}. This method
-          was used in the original Soybean-BioCro paper. Note: this method
-          should be used with caution, because \eqn{w_i^{stdev}} is zero
-          for \eqn{\sigma = 1 - 10^{-5} = 0.99999}, and it becomes negative for
-          larger values of \eqn{\sigma}.
+          ln \left( \frac{1}{\sigma_i + \epsilon} \right),}
+          where \eqn{ln} denotes a logarithm with base \eqn{e} and
+          \eqn{\epsilon} is a small number included to prevent numerical errors
+          that would otherwise occur when \eqn{\sigma = 0}. This method was
+          used in the original Soybean-BioCro paper.
+
+          The value of \eqn{\epsilon} is specified by the
+          \code{stdev_weight_param} input argument, which defaults to
+          \code{1e-5} when \code{stdev_weight_param} is \code{NULL} when using
+          this method. With the default value of \eqn{\epsilon},
+          \eqn{w_i^{stdev} \approx 11.512} when \eqn{\sigma = 0}.
+
+          Note: this method should be used with caution, because
+          \eqn{w_i^{stdev}} is zero for \eqn{\sigma_i = 1 - \epsilon}, and it
+          becomes negative for larger values of \eqn{\sigma_i}.
 
     \item \code{'inverse'}: For this method, \eqn{w_i^{stdev}} is calculated as
 
-          \deqn{w_i^{stdev} = \frac{1}{\sigma^2}.}
-          Note: this method should be used with caution, because
-          \eqn{w_i^{stdev}} is infinite when \eqn{\sigma} is zero.
+          \deqn{w_i^{stdev} = \frac{1}{\sigma_i^2 + \epsilon},}
+          where \eqn{\epsilon} is a small number included to prevent numerical
+          errors that would otherwise occur when \eqn{\sigma_i = 0}.
+
+          The value of \eqn{\epsilon} is specified by the
+          \code{stdev_weight_param} input argument, which defaults to
+          \code{1e-1} when \code{stdev_weight_param} is \code{NULL} when using
+          this method. With the default value of \eqn{\epsilon},
+          \eqn{w_i^{stdev} = 10} when \eqn{\sigma_i = 0}.
   }
 
   If any values of \eqn{w_i^{stdev}} are undefined, negative, or infinite, an
@@ -377,6 +412,23 @@
           method. This approach avoids over-representing variable types with
           larger magnitude when determining \eqn{E_{agreement}}.
 
+    \item \code{'obs'}: For this method,
+
+          \deqn{N_i = \left( Y_{obs}^i \right)^2 + \epsilon,}
+          where \eqn{\epsilon} is a small number included to prevent numerical
+          errors that would otherwise occur when \eqn{Y_{obs}^i = 0}. In this
+          case, the equation for \eqn{E_{agreement}} essentially uses relative
+          differences rather than absolute differences, which is achieved by
+          normalizing by the observed values, hence the name. This approach
+          avoids over-representing time points where a particular quantity takes
+          its largest values when determining \eqn{E_{agreement}}.
+
+          The value of \eqn{\epsilon} is specified by the
+          \code{normalization_param} input argument, which defaults to
+          \code{1e-1} when \code{normalization_param} is \code{NULL} when using
+          this method. With the default value of \eqn{\epsilon}, \eqn{N_i = 10}
+          when \eqn{Y_{obs}^i = 0}.
+
     \item \code{'mean_max'}: For this method, the "mean" and "max" methods are
           combined so that
 
@@ -384,10 +436,27 @@
           \cdot n_{data} \cdot \left( max_{vtype}^{vdata} \right)^2.}
           This approach avoids over-representing drivers with larger numbers of
           associated observations, and variable types with larger magnitudes.
-          This method is used for parameterizing Soybean-BioCro, and is
-          recommended for most situations.
+          This method is used for parameterizing Soybean-BioCro.
+
+    \item \code{'mean_obs'}: For this method, the "mean" and "obs" methods are
+          combined so that
+
+          \deqn{N_i = n_{vtype}^{vdrivers} \cdot n_{data}%
+          \cdot \left( \left( Y_{obs}^i \right)^2 + \epsilon \right)^2.}
+          This approach avoids over-representing drivers with larger numbers of
+          associated observations, and time points with larger observed values.
+
+          The value of \eqn{\epsilon} is specified by the
+          \code{normalization_param} input argument, which defaults to
+          \code{1e-1} when \code{normalization_param} is \code{NULL} when using
+          this method. With the default value of \eqn{\epsilon},
+          \eqn{N_i = 10 \cdot n_{vtype}^{vdrivers} \cdot n_{data}} when
+          \eqn{Y_{obs}^i = 0}.
   }
 
+  In most situations, it is recommended to use either \code{'mean_max'} or
+  \code{'mean_obs'} depending on user preference or performance.
+
   \strong{Regularization methods}
 
   The following regularization methods are available:
@@ -437,11 +506,12 @@
           \code{post_process_function}.
 
     \item Ensuring that each independent and dependent argument name is either
-          a parameter or initial value of the model. This is accomplished using
-          \code{\link[BioCro]{partial_run_biocro}}. Note that argument names
-          passed to \code{partial_run_biocro} can technically include drivers,
-          but it is unlikely that the value of a driver would be varied during
-          an optimization, so the argument names are not allowed to include
+          a parameter or initial value of the model. Internally, argument names
+          are passed to \code{\link[BioCro]{partial_run_biocro}} via its
+          \code{arg_names} input. Note that argument names passed to
+          \code{partial_run_biocro} can technically include drivers, but it is
+          unlikely that the value of a driver would be varied during an
+          optimization, so the argument names are not allowed to include
           columns in the drivers.
 
     \item Ensuring that the optional \code{dependent_arg_function},
@@ -449,9 +519,9 @@
           functions can be run without causing errors.
 
     \item Ensuring that certain values are finite (such as \eqn{Y_{obs}},
-          \eqn{\sigma}, \eqn{w_i^{stdev}}, and \eqn{N}), and that certain values
-          are not negative (such as \eqn{\sigma}, \eqn{w_i^{stdev}}, and
-          \eqn{N}).
+          \eqn{\sigma_i}, \eqn{w_i^{stdev}}, and \eqn{N_i}), and that certain
+          values are not negative (such as \eqn{\sigma_i}, \eqn{w_i^{stdev}},
+          and \eqn{N_i}).
   }
 
   If any issues are detected, an informative error message will be sent. Note
diff --git a/tests/testthat/test-objective_function.R b/tests/testthat/test-objective_function.R
index 6fee66a..dcfccf1 100644
--- a/tests/testthat/test-objective_function.R
+++ b/tests/testthat/test-objective_function.R
@@ -473,6 +473,7 @@ test_that('Bad data values and weights are detected', {
             independent_args,
             quantity_weights,
             stdev_weight_method = 'inverse',
+            stdev_weight_param = 0,
             data_definitions = data_definitions,
             post_process_function = post_process_function,
             verbose_startup = verbose_startup
diff --git a/vignettes/parameterizing_soybean_biocro.Rmd b/vignettes/parameterizing_soybean_biocro.Rmd
index 3db1dea..651894a 100644
--- a/vignettes/parameterizing_soybean_biocro.Rmd
+++ b/vignettes/parameterizing_soybean_biocro.Rmd
@@ -100,10 +100,10 @@ added to the observed values.
 
 In previous parameterizations, a standard deviation for the root mass was not
 explicitly estimated; instead, the standard-deviation-based weight factor was
-implicitly set to 1. Because the `'logarithm'` method was used, a weight factor
-of 1 implies a standard deviation of \(1 / e - 10^{-5} \approx 0.3678694\). See
-the documentation page (`?objective_function`) for more information about this
-weighting method.
+implicitly set to 1. Because the `'logarithm'` method with
+\(\epsilon = 10^{-5}\) was used, a weight factor of 1 implies a standard
+deviation of \(1 / e - 10^{-5} \approx 0.3678694\). See the documentation page
+(`?objective_function`) for more information about this weighting method.
 
 Finally, the data set includes some values that are not needed for the
 parameterization. This includes the leaf litter accumulated between each
@@ -129,14 +129,12 @@ process_table <- function(data_table, type) {
     sqrt(data_table$Rep_Mg_per_ha^2 + data_table$Seed_Mg_per_ha^2)
   }
 
-  # Define new `Root_Mg_per_ha` column
-  if (type == 'biomass') {
-    # Initialize all values to NA
-    data_table$Root_Mg_per_ha <- NA
+  # Define new `Root_Mg_per_ha` column, which has just one non-NA value
+  row_to_use <- 5                 # Choose row to use
+  data_table$Root_Mg_per_ha <- NA # Initialize all values to NA
 
+  if (type == 'biomass') {
     # Estimate a mass at one time point
-    row_to_use <- 5
-
     col_to_add <- c(
       'Leaf_Mg_per_ha',
       'Stem_Mg_per_ha',
@@ -146,9 +144,8 @@ process_table <- function(data_table, type) {
     data_table[row_to_use, 'Root_Mg_per_ha'] <-
       0.17 * sum(data_table[row_to_use, col_to_add])
   } else {
-    # We can set a value for each time point; any time points where the root
-    # mass is NA will be ignored
-    data_table$Root_Mg_per_ha <- 1 / exp(1) - 1e-5
+    # Estimate standard deviation at one time point
+    data_table[row_to_use, 'Root_Mg_per_ha'] <- 1 / exp(1) - 1e-5
   }
 
   # Remove columns by setting them to NULL
@@ -397,8 +394,8 @@ more details to discuss:
   for more details.
 
 - Soybean-BioCro has always used the `'logarithm'` method for determining
-  weights from standard deviations; see Equation 17 of
-  @matthews_soybean_biocro_2021 for more details.
+  weights from standard deviations with \(\epsilon = 10^{-5}\); see Equation 17
+  of @matthews_soybean_biocro_2021 for more details.
 
 - Soybean-BioCro has not used any regularization.
 
@@ -416,6 +413,7 @@ obj_fun <- objective_function(
   data_definitions       = data_definitions,
   normalization_method   = 'mean_max',
   stdev_weight_method    = 'logarithm',
+  stdev_weight_param     = 1e-5,
   regularization_method  = 'none',
   dependent_arg_function = dependent_arg_function,
   post_process_function  = post_process_function,

From 4b3e3490cc9fa3aa4672a2824272dcaed613d951 Mon Sep 17 00:00:00 2001
From: eloch216 <48919455+eloch216@users.noreply.github.com>
Date: Fri, 23 May 2025 15:46:09 -0500
Subject: [PATCH 3/3] Add "Getting Started" vignette

---
 NEWS.md                        |  7 ++-
 README.md                      |  9 ++++
 vignettes/BioCroValidation.Rmd | 92 ++++++++++++++++++++++++++++++++++
 3 files changed, 106 insertions(+), 2 deletions(-)
 create mode 100644 vignettes/BioCroValidation.Rmd

diff --git a/NEWS.md b/NEWS.md
index ab5e3fa..350c164 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -38,14 +38,17 @@ be directly added to this file to describe the related changes.
 - Added 2002 and 2005 SoyFACE biomass data.
 
 - Added several new functions: `objective_function`, `update_model`, and
-  `bounds_table`
+  `bounds_table`.
 
-- Added a vignette illustrating how to perform a model parameterization.
+- Added two new vignettes: a "Getting Started" article (`BioCroValidation.Rmd`)
+  and a user guide illustrating how to perform a model parameterization
+  (`parameterizing_soybean_biocro.Rmd`).
 
 # Changes in BioCroValidation Version 0.1.0
 
 - This is the first version of BioCroValidation. At this point, the package is
   in a state of rapid development, and not all changes will be described here.
+
 - We are reserving version `1.0.0` for a more stable and complete future
   release; until then, major changes should only increase the minor version
   number.
diff --git a/README.md b/README.md
index 2b2c91c..f6eeee0 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,15 @@ remotes::install_github('BioCro/BioCroValidation')
 Note that this method requires the `remotes` package, which can be installed
 from within R by typing `install.packages('remotes')`.
 
+### Usage
+
+The best way to learn about using `BioCroValidation` is to visit the
+[BioCroValidation website](https://biocro.github.io/BioCroValidation/index.html)
+and click the "Get Started" link in the top menu bar. The website includes
+documentation for all the functions and data sets included in the package, as
+well as articles that describe its general features and several important use
+cases.
+
 ### License
 
 The `BioCroValidation` R package and its documentation are licensed under the
diff --git a/vignettes/BioCroValidation.Rmd b/vignettes/BioCroValidation.Rmd
new file mode 100644
index 0000000..4982053
--- /dev/null
+++ b/vignettes/BioCroValidation.Rmd
@@ -0,0 +1,92 @@
+---
+title: "Getting Started With BioCroValidation"
+output:
+  rmarkdown::html_vignette:
+    toc: true
+    number_sections: true
+vignette: >
+  %\VignetteIndexEntry{Getting Started With BioCroValidation}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  fig.width = 6,
+  fig.height = 5,
+  fig.align = "center"
+)
+```
+
+# Overview
+
+**BioCroValidation** is an R package that provides a suite of tools for
+validating BioCro crop growth models.
+
+Validation is a key part of using and developing BioCro models. The goal of this
+package is to provide some convenient "helping" functions to aid with various
+aspects of model validation, especially parameterization.
+
+The central tool in the package is the `objective_function` function. As its
+name implies, it can be used to create an objective function that calculates the
+value of an error metric value given a set of model parameter values. In turn,
+the objective function itself can be passed to an optimizer.
+
+While it is possible for each BioCro user to write their own customized
+objective function, creating one can be a very complex process because there are
+many potential aspects to consider:
+
+- **Mathematical / Statistical Approach:** This refers to choices like "Do I
+  want to normalize the error terms by each observed value, or by the largest
+  observed value for each measured quantity?" or "Do I want to use L1 or L2
+  regularization?"
+
+- **Implementation of Mathematical / Statistical Approach:** Once an approach
+  has been identified, code must be written to properly implement it.
+
+- **Error Checks:** A wide variety of strange conditions can occur during
+  parameterization, and the objective function must be ready to handle them. For
+  example, how should the objective function respond when a simulation does not
+  run to completion, or when an optimizer passes `NA` as a parameter value?
+
+- **Technical Details:** Parameterization can take a long time to perform, so it
+  is important for the objective function code to be as efficient and fast as
+  possible.
+
+The goal of `objective_function` is to allow users to make the key choices about
+their mathematical approach using clear code statements like
+`regularization_method = 'L2'`, while the implementation details, error checks,
+and other technical details are handled internally. This will result in clear
+scripts that are also more reliable.
+
+Besides `objective_function`, the package also includes a few other functions
+with a similar goal of clarifying code and hiding implementation details, such
+as `bounds_table` and `update_model`.
+
+# Installing BioCroValidation
+
+The easiest way to install `BioCroValidation` is to type the following from
+within an R terminal:
+
+```{r, eval = FALSE}
+remotes::install_github('biocro/BioCroValidation')
+```
+
+Note that this method requires the `remotes` package, which can be installed
+from within R by typing `install.packages('remotes')`.
+
+# Learning More
+
+The `BioCroValidation` package includes extensive documentation. The best place
+to start is the
+[Parameterizing Soybean-BioCro](parameterizing_soybean_biocro.html)
+article, which illustrates the full process of defining an objective function,
+running an optimization, examining the results, and saving the new model
+definition.
+
+Another key resource is the help page for `objective_function`, which can be
+accessed online or by typing `?objective_function` in an R terminal. This
+document explains all the available options for normalization approaches,
+regularization approaches, and other aspects of defining an objective function.