USEPA · michaeldumelle · Jul 3, 2025 · May 13, 2025 · Jun 21, 2025 · Jun 26, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: spmodel
 Title: Spatial Statistical Modeling and Prediction
-Version: 0.10.0
+Version: 0.11.0
 Authors@R: c(
     person(given = "Michael",
            family = "Dumelle",

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,19 @@
+# spmodel 0.11.0
+
+## Major Updates
+
+* Added support for Block Prediction (i.e., Block Kriging) to estimate averages (and their uncertainties) over a geographic region.
+* Added a vignette to the [`spmodel` website](https://usepa.github.io/spmodel/) titled "Block Prediction (i.e., Block Kriging) in spmodel".
+* Added a `fc_borders` data set which contains borders for the Four Corners states in the United States.
+
+## Bug Fixes
+
+* Fixed a bug that incorrectly calculated the log determinant of the fixed effects in the restricted log likelihood.
+* Fixed a bug that caused an error when `covmatrix(..., cov_type = "pred.pred")` was called on `object` with a non-`NULL` `newdata` element.
+
 # spmodel 0.10.0
 
-## Minor updates
+## Minor Updates
 
 * Added a robust semivariogram option to `esv()`; see the `robust`argument to `esv()` ([#28](https://github.com/USEPA/spmodel/issues/28)).
 * Added `"none"` and `"ie"` spatial covariance types (via `spcov_type` or `spcov_initial`) to `spautor()`, `spgautor()`, and `spautorRF()` ([#27](https://github.com/USEPA/spmodel/issues/27)).

diff --git a/R/covmatrix.R b/R/covmatrix.R
@@ -65,7 +65,7 @@ covmatrix.splm <- function(object, newdata, cov_type, ...) {
 
   if (cov_type == "pred.pred") {
     if (inherits(object$newdata, "sf")) {
-      object$obdata <- sf_to_df(newdata)
+      object$obdata <- sf_to_df(object$newdata)
     } else {
       object$obdata <- newdata
     }

diff --git a/R/data.R b/R/data.R
@@ -211,3 +211,23 @@
 #' @source
 #' The data source is the \code{elect80} data set in the \code{spData} R package.
 "texas"
+
+
+#' Four Corners State Borders
+#'
+#' @description State borders for the four corners states in the United States:
+#'   Arizona, Colorado, New Mexico, Utah.
+#'
+#' @format An \code{sf} object with 4 rows and 4 columns:
+#'
+#' \itemize{
+#'   \item NAME: State name.
+#'   \item STUSPS: State postal code.
+#'   \item GEO.ID: State GEO.ID from the United States Census Bureau.
+#'   \item geometry: \code{POINT} geometry representing coordinates in a NAD83
+#'     projection (EPSG: 5070). Distances between points are in meters.
+#' }
+#' @source
+#' The data source is the United States Census Bureau TIGER/Line Shapefiles.
+"fc_borders"
+# start with us_borders <- tigris::states(cb = TRUE, year = 2024)
diff --git a/R/dispersion_optim2orig.R b/R/dispersion_optim2orig.R
@@ -10,6 +10,9 @@ dispersion_optim2orig <- function(dispersion_orig2optim, par) {
   } else {
     fill_optim_par_val <- par[length(par)] # dispersion is the last element
     fill_orig_val <- exp(fill_optim_par_val)
+    # cap lower and upper dispersion values for numeric stability
+    fill_orig_val <- pmax(1e-8, fill_orig_val)
+    fill_orig_val <- pmin(1e8, fill_orig_val)
     names(fill_orig_val) <- "dispersion"
     new_par <- par[-length(par)] # remove dispersion parameter
   }

diff --git a/R/dispersion_orig2optim.R b/R/dispersion_orig2optim.R
@@ -5,6 +5,7 @@ dispersion_orig2optim <- function(dispersion_initial) {
   names(dispersion_orig2optim_is_known) <- "dispersion_log"
 
   # return dispersion parameter vector
+  # can consider lower limit like -10, 10 for numerical stability
   dispersion_orig2optim_val <- ifelse(dispersion_orig2optim_val > 50 & !dispersion_orig2optim_is_known, 50, dispersion_orig2optim_val)
   dispersion_orig2optim_val <- ifelse(dispersion_orig2optim_val < -50 & !dispersion_orig2optim_is_known, -50, dispersion_orig2optim_val)
 

diff --git a/R/get_local_list.R b/R/get_local_list.R
@@ -156,3 +156,44 @@ get_local_list_prediction <- function(local) {
 
   local
 }
+
+
+
+
+get_local_list_prediction_block <- function(local) {
+
+  # set local neighborhood size
+  # method can be "all" (for all data), "distance" (for local distance neighborhoods)
+  # or "covariance" (for local covariance neighborhoods)
+
+  if (is.logical(local)) {
+    if (local) {
+      local <- list(method = "covariance", size = 1000)
+    } else {
+      local <- list(method = "all")
+    }
+  }
+
+  names_local <- names(local)
+
+  # errors
+  if ("method" %in% names_local) {
+    if (!local$method %in% c("all", "covariance", "distance")) {
+      stop("Invalid local method. Local method must be \"all\", \"covariance\", or \"distance\".", call. = FALSE)
+    }
+  }
+
+
+  if (!"method" %in% names_local) {
+    # local$method <- "all"
+    local$method <- "covariance"
+  }
+
+  if (local$method %in% c("distance", "covariance") && !"size" %in% names_local) {
+    local$size <- 1000
+  }
+
+  # no parallel required
+
+  local
+}
diff --git a/R/laploglik_products.R b/R/laploglik_products.R
@@ -229,6 +229,7 @@ get_w_and_H_spglm <- function(data_object, dispersion, SigInv_list, SigInv_X, co
       # Next, compute H
       D <- get_D(family, w, y, size, dispersion)
       H <- D - Ptheta # not PD but -H is
+      # can consider changing tol here for numeric stability
       solveHg <- solve(H, g)
       wnew <- w - solveHg
       # mH_upchol <- chol(Matrix::forceSymmetric(-H))
@@ -342,6 +343,7 @@ get_w_and_H_spgautor <- function(data_object, dispersion, SigInv, SigInv_X, cov_
     # Next, compute H
     D <- get_D(family, w, y, size, dispersion)
     H <- D - Ptheta # not PD but -H is
+    # can consider changing tol here for numeric stability
     solveHg <- solve(H, g)
     wnew <- w - solveHg
     # mH_upchol <- chol(Matrix::forceSymmetric(-H))

diff --git a/R/predict.R b/R/predict.R
@@ -69,10 +69,25 @@
 #'   If \code{local} is \code{TRUE}, defaults for \code{local} are chosen such
 #'   that \code{local} is transformed into
 #'   \code{list(size = 100, method = "covariance", parallel = FALSE)}.
+#'
+#'   If \code{block} is \code{TRUE}, \code{local} accepts \code{method} and \code{size},
+#'   and \code{method} takes values of \code{"all"}, \code{"covariance"},
+#'   and \code{"distance"}, similar as when \code{block} is \code{FALSE}.
+#'   The default \code{method} is \code{"distance"} with size \code{1000}. This default \code{size} is
+#'   much larger than when \code{block} is \code{FALSE}. This is because when \code{block} is \code{TRUE},
+#'   covariances and explanatory variables are averaged before prediction, which greatly
+#'   reduces computational burden, only requiring the Cholesky decomposition
+#'   of one observed covariance matrix. Because the computational burden is reduced dramatically when \code{block} is \code{TRUE},
+#'   parallel processing is not needed and hence, \code{parallel} and \code{ncores} are ignored if specified in \code{local}.
 #' @param terms If \code{type} is \code{"terms"}, the type of terms to be returned,
 #'   specified via either numeric position or name. The default is all terms are included.
 #' @param na.action Missing (\code{NA}) values in \code{newdata} will return an error and should
 #'   be removed before proceeding.
+#' @param block A logical indicating whether a block prediction over the entire region
+#'   in \code{newdata} should be returned. When \code{block} is \code{TRUE},
+#'   \code{newdata} should be a dense grid of prediction locations that span
+#'   the entire region. The default is \code{FALSE}, which
+#'   returns point predictions for each location on \code{newdata}.
 #' @param ... Other arguments. Only used for models fit using \code{splmRF()}
 #'   or \code{spautorRF()} where \code{...} indicates other
 #'   arguments to \code{ranger::predict.ranger()}.
@@ -122,7 +137,8 @@
 #' predict(spmod, sulfate_preds, interval = "prediction")
 #' augment(spmod, newdata = sulfate_preds, interval = "prediction")
 predict.splm <- function(object, newdata, se.fit = FALSE, scale = NULL, df = Inf, interval = c("none", "confidence", "prediction"),
-                         level = 0.95, type = c("response", "terms"), local, terms = NULL, na.action = na.fail, ...) {
+                         level = 0.95, type = c("response", "terms"), block = FALSE, local, terms = NULL, na.action = na.fail, ...) {
+
 
   # match interval argument so the three display
   interval <- match.arg(interval)
@@ -133,6 +149,11 @@ predict.splm <- function(object, newdata, se.fit = FALSE, scale = NULL, df = Inf
     local <- NULL
   }
 
+  if (block) {
+    object <- predict_block_splm(object, newdata, se.fit, scale, df, interval, level, type, local, terms, na.action, ...)
+    return(object)
+  }
+
   # check scale is numeric (if specified)
   if (!is.null(scale) && !is.numeric(scale)) {
     stop("scale must be numeric.", call. = FALSE)