USEPA · michaeldumelle · Jan 26, 2026 · Nov 20, 2025 · Nov 20, 2025 · Nov 21, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: spmodel
 Title: Spatial Statistical Modeling and Prediction
-Version: 0.11.1
+Version: 0.12.0
 Authors@R: c(
     person(given = "Michael",
            family = "Dumelle",

diff --git a/NAMESPACE b/NAMESPACE
@@ -188,6 +188,7 @@ S3method(model.matrix,spautor)
 S3method(model.matrix,spgautor)
 S3method(model.matrix,spglm)
 S3method(model.matrix,splm)
+S3method(plot,eacf)
 S3method(plot,esv)
 S3method(plot,spautor)
 S3method(plot,spgautor)
@@ -359,6 +360,7 @@ export(augment)
 export(covmatrix)
 export(dispersion_initial)
 export(dispersion_params)
+export(eacf)
 export(esv)
 export(glance)
 export(glances)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,20 @@
+# spmodel 0.12.0
+
+## Major Updates
+
+* Added `eacf()` to compute the empirical autocovariance function.
+
+## Minor Updates
+
+* Changed the default `size` argument to the local argument in `predict(..., block = TRUE)` and `augment(..., block = TRUE)` from 1000 to 4000. This enhances the block prediction (i.e., Kriging) approximation's accuracy but can slightly increase computational complexity.
+* Enhanced efficiency of block prediction (i.e., Kriging) for large `newdata` objects.
+* Added a warning message that clarifies `xcoord` and `ycoord` are ignored when `data` is an `sf` object.
+* Minor documentation updates.
+
+## Bug Fixes
+
+* Fixed a bug in `predict(object, newdata)` and `augment(object, newdata)` that could cause `NA` values when the spatial covariance was `"matern"` and a location in `newdata` was the exact same as a location in `data` (the `data` argument used to fit `object`).
+
 # spmodel 0.11.1
 
 ## Minor Updates

diff --git a/R/eacf.R b/R/eacf.R
@@ -0,0 +1,204 @@
+#' Compute the empirical autocovariance
+#'
+#' @description Compute the empirical autocovariance (i.e., empirical covariance)
+#'   for varying bin sizes and cutoff values.
+#'
+#' @param formula A formula describing the fixed effect structure.
+#' @param data A data frame or \code{sf} object containing the variables in \code{formula}
+#'   and geographic information.
+#' @param xcoord Name of the variable in \code{data} representing the x-coordinate.
+#'   Can be quoted or unquoted. Not required if \code{data} is an \code{sf} object.
+#' @param ycoord Name of the variable in \code{data} representing the y-coordinate.
+#'   Can be quoted or unquoted. Not required if \code{data} is an \code{sf} object.
+#' @param cloud A logical indicating whether the empirical autocovariance should
+#'   be summarized by distance class or not. When \code{cloud = FALSE} (the default), pairwise autocovariances
+#'   are binned and averaged within distance classes. When \code{cloud} = TRUE,
+#'   all pairwise autocovariances and distances are returned (this is known as
+#'   the "cloud" autocovariance).
+#' @param bins The number of equally spaced bins. The default is 15. Ignored if
+#'   \code{cloud = TRUE}.
+#' @param cutoff The maximum distance considered.
+#'   The default is half the diagonal of the bounding box from the coordinates.
+#' @param dist_matrix A distance matrix to be used instead of providing coordinate names.
+#' @param partition_factor An optional formula specifying the partition factor.
+#'   If specified, autocovariances are only computed for observations sharing the
+#'   same level of the partition factor.
+#'
+#' @details The empirical autocovariance (i.e., empirical covariance) is a tool used to visualize and model
+#'   spatial dependence by estimating the semivariance of a process at varying distances.
+#'   For a constant-mean process, the
+#'   autocovariance at distance \eqn{h} is denoted \eqn{Cov(h)} and defined as
+#'   \eqn{Cov(z1, z2)}. Under second-order stationarity,
+#'   \eqn{Cov(h) = Cov(0) - \gamma(h)}, where \eqn{gamma(h)} is the semivariance function at distance \code{h}. Typically the residuals from an ordinary
+#'   least squares fit defined by \code{formula} are second-order stationary with
+#'   mean zero. These residuals are used to compute the empirical autocovariance
+#'   At a distance \code{h}, the empirical autocovariance is
+#'   \eqn{1/N(h) \sum (r1 \times r2)}, where \eqn{N(h)} is the number of (unique)
+#'   pairs in the set of observations whose distance separation is \code{h} and
+#'   \code{r1} and \code{r2} are residuals corresponding to observations whose
+#'   distance separation is \code{h}. In spmodel, these distance bins actually
+#'   contain observations whose distance separation is \code{h +- c},
+#'   where \code{c} is a constant determined implicitly by \code{bins}. Typically,
+#'   only observations whose distance separation is below some cutoff are used
+#'   to compute the empirical semivariogram (this cutoff is determined by \code{cutoff}).
+#'
+#' @name eacf
+#'
+#' @return If \code{cloud = FALSE}, a tibble (data.frame) with distance bins
+#'   (\code{bins}), the average distance (\code{dist}), the average autocovariance (\code{acov}), and the
+#'   number of (unique) pairs (\code{np}). If \code{cloud = TRUE}, a tibble
+#'   (data.frame) with distance (\code{dist}) and autocovariance (\code{acov})
+#'   for each unique pair.
+#'
+#' @export
+#'
+#' @examples
+#' eacf(sulfate ~ 1, sulfate)
+#' plot(eacf(sulfate ~ 1, sulfate))
+eacf <- function(formula, data, xcoord, ycoord, cloud = FALSE, bins = 15, cutoff, dist_matrix, partition_factor) {
+
+  # filter out missing response values
+  na_index <- is.na(data[[all.vars(formula)[1]]])
+  data <- data[!na_index, , drop = FALSE]
+  # finding model frame
+  data_model_frame <- model.frame(formula, data, drop.unused.levels = TRUE, na.action = na.pass)
+  # model matrix with potential NA
+  ob_predictors <- complete.cases(model.matrix(formula, data_model_frame))
+  if (any(!ob_predictors)) {
+    stop("Cannot have NA values in predictors.", call. = FALSE)
+  }
+
+  # covert sp to sf
+  attr_sp <- attr(class(data), "package")
+  if (!is.null(attr_sp) && length(attr_sp) == 1 && attr_sp == "sp") {
+    stop("sf objects must be used instead of sp objects. To convert your sp object into an sf object, run sf::st_as_sf().", call. = FALSE)
+  }
+
+  ## convert sf to data frame (point geometry) (1d objects obsolete)
+  ### see if data has sf class
+  if (inherits(data, "sf")) {
+    data <- suppressWarnings(sf::st_centroid(data))
+    data <- sf_to_df(data)
+    ### name xcoord ".xcoord" to be used later
+    xcoord <- ".xcoord"
+    ### name ycoord ".ycoord" to be used later
+    ycoord <- ".ycoord"
+  }
+
+  # compute spatial distances
+  if (missing(dist_matrix)) {
+    # non standard evaluation for the x and y coordinates
+    xcoord <- substitute(xcoord)
+    ycoord <- substitute(ycoord)
+
+    if (missing(xcoord)) {
+      stop("The xcoord argument must be specified.", call. = FALSE)
+    }
+
+    if (!missing(xcoord)) {
+      if (!as.character(xcoord) %in% colnames(data)) {
+        stop("The xcoord argument must match the name of a variable in data.", call. = FALSE)
+      }
+    }
+
+    if (missing(ycoord)) {
+      dist_matrix <- spdist(data, xcoord)
+    } else {
+      if (!as.character(ycoord) %in% colnames(data)) {
+        stop("The ycoord argument must match the name of a variable in data.", call. = FALSE)
+      }
+      dist_matrix <- spdist(data, xcoord, ycoord)
+    }
+  }
+
+
+  dist_matrix <- as.matrix(dist_matrix)
+  dist_matrix <- dist_matrix[upper.tri(dist_matrix)]
+
+  if (any(dist_matrix == 0)) {
+    warning("Zero distances observed between at least one pair. Ignoring pairs. If using splm(), consider a different estimation method.", call. = FALSE)
+  }
+
+  if (missing(partition_factor)) {
+    partition_factor <- NULL
+  }
+
+  if (!is.null(partition_factor)) {
+    # partition_matrix_val <- triu(partition_matrix(partition_factor, data = data), k = 1)
+    partition_matrix_val <- as.matrix(partition_matrix(partition_factor, data = data))
+    partition_matrix_val <- partition_matrix_val[upper.tri(partition_matrix_val)]
+    dist_matrix <- dist_matrix * partition_matrix_val
+  }
+
+  if (missing(cutoff)) {
+    cutoff <- NULL
+  }
+  if (is.null(cutoff)) {
+    cutoff <- max(dist_matrix) / 2
+  }
+
+  dist_vector <- dist_matrix
+  if (any(dist_vector == 0)) {
+    dist_index <- dist_vector > 0 & dist_vector <= cutoff
+  } else {
+    dist_index <- dist_vector <= cutoff
+  }
+
+  dist_vector <- dist_vector[dist_index]
+
+  # compute squared differences in the residuals
+  lmod <- lm(formula = formula, data = data)
+  residuals <- residuals(lmod)
+  residual_matrix <- outer(residuals, residuals) # ybar is the fitted mean
+  residual_matrix <- residual_matrix[upper.tri(residual_matrix)]
+  if (!is.null(partition_factor)) {
+    residual_matrix <- residual_matrix * partition_matrix_val
+  }
+
+  residual_vector <- residual_matrix
+  residual_vector <- residual_vector[dist_index]
+  residual_vector2 <- residual_vector # just to align with esv() names
+
+
+  if (cloud) {
+    eacf_out <- get_eacf_cloud(residual_vector2, dist_vector)
+  } else {
+    eacf_out <- get_eacf(residual_vector2, dist_vector, bins, cutoff)
+  }
+
+
+
+
+  # remove NA
+  # eacf_out <- na.omit(eacf_out)
+  eacf_out <- structure(eacf_out, class = c("eacf", class(eacf_out)), call = match.call(), cloud = cloud)
+  eacf_out
+}
+
+#' @rdname eacf
+#' @method plot eacf
+#' @param x An object from \code{eacf()}.
+#' @param ... Other arguments passed to other methods.
+#' @export
+plot.eacf <- function(x, ...) {
+
+  cal <- attr(x, "call")
+  if (!is.na(m.f <- match("formula", names(cal)))) {
+    cal <- cal[c(1, m.f)]
+    names(cal)[2L] <- ""
+  }
+  cc <- deparse(cal, 80)
+  nc <- nchar(cc[1L], "c")
+  abbr <- length(cc) > 1 || nc > 75
+  sub.caption <- if (abbr) {
+    paste(substr(cc[1L], 1L, min(75L, nc)), "...")
+  } else {
+    cc[1L]
+  }
+
+  dotlist <- list(...)
+  dotlist <- get_eacf_dotlist_defaults(x, dotlist, cloud = attr(x, "cloud"))
+
+  do.call("plot", c(list(x = x$dist, y = x$acov), dotlist))
+  title(sub = sub.caption)
+}
diff --git a/R/esv.R b/R/esv.R
@@ -170,7 +170,7 @@ esv <- function(formula, data, xcoord, ycoord, cloud = FALSE, robust = FALSE, bi
   residual_vector2 <- residual_vector^2
 
   if (cloud) {
-    esv_out <- get_esv_cloud(residual_vector2, dist_vector, formula)
+    esv_out <- get_esv_cloud(residual_vector2, dist_vector)
   } else {
     if (robust) {
       residual_vector12 <- sqrt(residual_vector)

diff --git a/R/get_data_object.R b/R/get_data_object.R
@@ -23,6 +23,9 @@ get_data_object_splm <- function(formula, data, spcov_initial, xcoord, ycoord, e
     data_sf <- suppressWarnings(sf::st_centroid(data))
     # store as data frame
     data <- sf_to_df(data_sf)
+    if (!missing(xcoord) || !missing(ycoord)) {
+      warning("data is an sf object. Ignoring xcoord and ycoord arguments.", call. = FALSE)
+    }
     ## name xcoord ".xcoord" to be used later
     xcoord <- ".xcoord"
     ## name ycoord ".ycoord" to be used later

diff --git a/R/get_data_object_glm.R b/R/get_data_object_glm.R
@@ -24,6 +24,9 @@ get_data_object_spglm <- function(formula, family, data, spcov_initial, xcoord,
     data_sf <- suppressWarnings(sf::st_centroid(data))
     # store as data frame
     data <- sf_to_df(data_sf)
+    if (!missing(xcoord) || !missing(ycoord)) {
+      warning("data is an sf object. Ignoring xcoord and ycoord arguments.", call. = FALSE)
+    }
     ## name xcoord ".xcoord" to be used later
     xcoord <- ".xcoord"
     ## name ycoord ".ycoord" to be used later

diff --git a/R/get_eacf.R b/R/get_eacf.R
@@ -0,0 +1,41 @@
+get_eacf <- function(residual_vector2, dist_vector, bins, cutoff, formula) {
+
+  # compute semivariogram classes
+  dist_classes <- cut(dist_vector, breaks = seq(0, cutoff, length.out = bins + 1))
+
+  # compute squared differences within each class
+  acov <- tapply(residual_vector2, dist_classes, function(x) mean(x))
+
+  # compute pairs within each class
+  np <- tapply(residual_vector2, dist_classes, length)
+
+  # set as zero if necessary
+  np <- ifelse(is.na(np), 0, np)
+
+  # compute average distance within each class
+  dist <- tapply(dist_vector, dist_classes, mean)
+
+  # return output
+  eacf_out <- tibble::tibble(
+    bins = factor(levels(dist_classes), levels = levels(dist_classes)),
+    dist = as.numeric(dist),
+    acov = as.numeric(acov),
+    np = as.numeric(np)
+  )
+
+  # set row names to NULL
+  # row.names(eacf_out) <- NULL
+
+  eacf_out
+}
+
+
+get_eacf_cloud <- function(residual_vector2, dist_vector) {
+
+  eacf_out <- tibble::tibble(dist = dist_vector, acov = residual_vector2)
+
+  # set row names to NULL
+  # row.names(eacf_out) <- NULL
+
+  eacf_out
+}
diff --git a/R/get_eacf_dotlist.R b/R/get_eacf_dotlist.R
@@ -0,0 +1,68 @@
+#' Get empirical autocovariance dotlist
+#'
+#' @param ... Additional arguments to \code{eacf()}
+#' @param max_halfdist
+#'
+#' @return An eacf dotlist
+#'
+#' @noRd
+get_eacf_dotlist <- function(..., max_halfdist) {
+  # storing dotlist and setting defaults for eacf
+  dotlist <- list(...)
+
+  if (!("bins" %in% names(dotlist))) {
+    dotlist$bins <- 15
+  }
+
+  if (!("cutoff" %in% names(dotlist))) {
+    dotlist$cutoff <- max_halfdist
+  }
+
+  # make dotlist eacf
+  dotlist_eacf <- list(bins = dotlist$bins, cutoff = dotlist$cutoff)
+}
+
+
+get_eacf_dotlist_defaults <- function(x, dotlist, cloud) {
+
+  names_dotlist <- names(dotlist)
+
+  # set defaults
+  if (!"main" %in% names_dotlist) {
+    dotlist$main <- "Empirical Autocovariance"
+    if (cloud) dotlist$main <- paste0(dotlist$main, " (Cloud)")
+  }
+
+  if (!"xlab" %in% names_dotlist) {
+    dotlist$xlab <- "Distance"
+  }
+
+  if (!"ylab" %in% names_dotlist) {
+    dotlist$ylab <- "Autocovariance"
+  }
+
+  if (!cloud && !"pch" %in% names_dotlist) {
+    dotlist$pch <- 19
+  }
+
+  if (!cloud && !"cex" %in% names_dotlist) {
+    dotlist$cex <- (x$np - min(x$np)) / (max(x$np) - min(x$np)) * 2 + 1
+  }
+
+  if (!"ylim" %in% names_dotlist) {
+
+    # include zero if not in limits
+
+    ## all greater than zero (positive)
+    if (all(x$acov > 0)) {
+      dotlist$ylim <- c(0, 1.1 * max(x$acov))
+    }
+
+    ## all less than zero (negative)
+    if (all(x$acov < 0)) {
+      dotlist$ylim <- c(1.1 * min(x$acov), 0)
+    }
+  }
+
+  dotlist
+}
diff --git a/R/get_esv.R b/R/get_esv.R
@@ -62,7 +62,7 @@ get_esv_robust <- function(residual_vector12, dist_vector, bins, cutoff, formula
   esv_out
 }
 
-get_esv_cloud <- function(residual_vector2, dist_vector, formula) {
+get_esv_cloud <- function(residual_vector2, dist_vector) {
 
   esv_out <- tibble::tibble(dist = dist_vector, gamma = residual_vector2 / 2)
-Original file line number
+Diff line change
@@ Expand Up @@
       esv_out
     }
-    get_esv_cloud <- function(residual_vector2, dist_vector, formula) {
+    get_esv_cloud <- function(residual_vector2, dist_vector) {
       esv_out <- tibble::tibble(dist = dist_vector, gamma = residual_vector2 / 2)
@@ Expand Down @@